Skip to content

Commit

Permalink
Add ability to encode raw binary strings
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonpaulos committed Jul 16, 2024
1 parent c72fa85 commit 96aae2c
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 11 deletions.
13 changes: 12 additions & 1 deletion src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays, RawBinaryString } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -326,6 +326,8 @@ export class Encoder<ContextType = undefined> {
this.encodeArray(object, depth);
} else if (ArrayBuffer.isView(object)) {
this.encodeBinary(object);
} else if (object instanceof RawBinaryString) {
this.encodeBinaryAsString(object);
} else if (typeof object === "bigint") {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
Expand Down Expand Up @@ -361,6 +363,13 @@ export class Encoder<ContextType = undefined> {
this.writeU8a(bytes);
}

private encodeBinaryAsString(binaryString: RawBinaryString) {
const object = binaryString.rawBinaryValue;
this.writeStringHeader(object.byteLength);
const bytes = ensureUint8Array(object);
this.writeU8a(bytes);
}

private encodeArray(object: Array<unknown>, depth: number) {
const size = object.length;
if (size < 16) {
Expand Down Expand Up @@ -464,6 +473,8 @@ export class Encoder<ContextType = undefined> {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else if (key instanceof RawBinaryString) {
this.encodeBinaryAsString(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { Encoder } from "./Encoder";
export { Encoder };
import type { EncoderOptions } from "./Encoder";
export type { EncoderOptions };
import { RawBinaryString } from "./utils/typedArrays";
export { RawBinaryString };

// Utilities for Extension Types:

Expand Down
19 changes: 19 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,22 @@ export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
}
return a.length - b.length;
}

/**
* Represents a binary value that should be encoded as if it were a string.
*
* Effectively, this is a string that has already been UTF-8 encoded to a binary string. This is
* useful if you need to encode a value as a string, but that value contains invalid UTF-8 sequences;
* ideally this situation should be avoided and the value should be encoded as binary, not string,
* but this may be necessary for compatibility with non-ideal systems.
*/
export class RawBinaryString {
/**
* Create a new RawBinaryString from an ArrayBufferView.
*/
public constructor(public readonly rawBinaryValue: ArrayBufferView) {
if (!ArrayBuffer.isView(rawBinaryValue)) {
throw new TypeError("RawBinaryString: rawBinaryValue must be an ArrayBufferView");
}
}
}
49 changes: 39 additions & 10 deletions test/decode-raw-strings.test.ts → test/raw-strings.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,44 @@
import assert from "assert";
import { encode, decode } from "../src";
import { encode, decode, RawBinaryString } from "../src";
import type { DecoderOptions } from "../src";

const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, 184,
221, 66, 188, 171, 36, 135, 121,
]);

describe("encode with RawBinaryString", () => {
it("encodes a RawBinaryString value as a string", () => {
const actual = encode(new RawBinaryString(Uint8Array.from([0x66, 0x6f, 0x6f])));
const expected = encode("foo");
assert.deepStrictEqual(actual, expected);
});

it("encodes an invalid UTF-8 RawBinaryString value as a string", () => {
const actual = encode(new RawBinaryString(invalidUtf8String));
const expected = Uint8Array.from([
217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
176, 184, 221, 66, 188, 171, 36, 135, 121,
]);
assert.deepStrictEqual(actual, expected);
});

it("encodes a RawBinaryString map key as a string", () => {
const actual = encode(new Map([[new RawBinaryString(Uint8Array.from([0x6b, 0x65, 0x79])), "foo"]]));
const expected = encode({ key: "foo" });
assert.deepStrictEqual(actual, expected);
});

it("encodes an invalid UTF-8 RawBinaryString map key as a string", () => {
const actual = encode(new Map([[new RawBinaryString(invalidUtf8String), "abc"]]));
const expected = Uint8Array.from([
129, 217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247,
19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121, 163, 97, 98, 99,
]);
assert.deepStrictEqual(actual, expected);
});
});

describe("decode with rawBinaryStringValues specified", () => {
const options = { rawBinaryStringValues: true } satisfies DecoderOptions;

Expand All @@ -12,12 +49,8 @@ describe("decode with rawBinaryStringValues specified", () => {
});

it("decodes invalid UTF-8 string values as binary", () => {
const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
184, 221, 66, 188, 171, 36, 135, 121,
]);
const encoded = Uint8Array.from([
196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
176, 184, 221, 66, 188, 171, 36, 135, 121,
]);

Expand Down Expand Up @@ -64,10 +97,6 @@ describe("decode with rawBinaryStringKeys specified", () => {
});

it("decodes invalid UTF-8 string keys as binary", () => {
const invalidUtf8String = Uint8Array.from([
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
184, 221, 66, 188, 171, 36, 135, 121,
]);
const encodedMap = Uint8Array.from([
129, 217, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247,
19, 50, 176, 184, 221, 66, 188, 171, 36, 135, 121, 163, 97, 98, 99,
Expand Down

0 comments on commit 96aae2c

Please sign in to comment.