Skip to content

Commit

Permalink
Add ability to encode raw binary strings (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonpaulos authored Jul 31, 2024
1 parent c72fa85 commit 6600a96
Show file tree
Hide file tree
Showing 8 changed files with 339 additions and 149 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ NodeJS `Buffer` is also acceptable because it is a subclass of `Uint8Array`.
| intMode | IntMode | `IntMode.AS_ENCODED` if `useBigInt64` is `true` or `IntMode.UNSAFE_NUMBER` otherwise |
| rawBinaryStringKeys | boolean | false |
| rawBinaryStringValues | boolean | false |
| useRawBinaryStringClass | boolean | false |
| useMap | boolean | false |
| supportObjectNumberKeys | boolean | false |
| maxStrLength | number | `4_294_967_295` (UINT32_MAX) |
Expand All @@ -174,7 +175,7 @@ You can use `max${Type}Length` to limit the length of each type decoded.

`intMode` determines whether decoded integers should be returned as numbers or bigints in different circumstances. The possible values are [described below](#intmode).

To skip UTF-8 decoding of strings, one or both of `rawBinaryStringKeys` and `rawBinaryStringValues` can be set to `true`. If enabled, strings are decoded into `Uint8Array`. `rawBinaryStringKeys` affects only map keys, while `rawBinaryStringValues` affect all other string values.
To skip UTF-8 decoding of strings, one or both of `rawBinaryStringKeys` and `rawBinaryStringValues` can be set to `true`. If enabled, strings are decoded into `Uint8Array`, or a `RawBinaryString` which wraps a `Uint8Array` if `useRawBinaryStringClass` is true. `rawBinaryStringKeys` affects only map keys, while `rawBinaryStringValues` affect all other string values. You may want to enable `useRawBinaryStringClass` if you want to distinguish between regular strings and binary strings, or if you wish to re-encode the object, since `RawBinaryString` instances will be encoded as regular strings.

If `useMap` is enabled, maps are decoded into the `Map` container instead of plain objects. `Map` objects support a wider range of key types. Plain objects only support string keys (though you can enable `supportObjectNumberKeys` to coerce number keys to strings), while `Map` objects support strings, numbers, bigints, and Uint8Arrays.

Expand Down Expand Up @@ -549,7 +550,7 @@ The mapping of integers varies on the setting of `intMode`.

- \*1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
- \*2 MessagePack ints are decoded as either numbers or bigints depending on the [IntMode](#intmode) used during decoding.
- \*3 If you'd like to skip UTF-8 decoding of strings, enable one of `rawBinaryStringKeys` or `rawBinaryStringValues`. In that case, strings are decoded into `Uint8Array`.
- \*3 If you'd like to skip UTF-8 decoding of strings, enable one of `rawBinaryStringKeys` or `rawBinaryStringValues`. In that case, strings are decoded into a `Uint8Array` or a `RawBinaryString`, depending on the value of `useRawBinaryStringClass`.
- \*4 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
- \*5 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
- \*6 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
Expand Down
31 changes: 27 additions & 4 deletions src/Decoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { prettyByte } from "./utils/prettyByte";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { IntMode, getInt64, getUint64, convertSafeIntegerToMode, UINT32_MAX } from "./utils/int";
import { utf8Decode } from "./utils/utf8";
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
import { createDataView, ensureUint8Array, RawBinaryString } from "./utils/typedArrays";
import { CachedKeyDecoder, KeyDecoder } from "./CachedKeyDecoder";
import { DecodeError } from "./DecodeError";
import type { ContextOf } from "./context";
Expand Down Expand Up @@ -53,6 +53,17 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
*/
rawBinaryStringKeys: boolean;

/**
* If true, the decoder will use the RawBinaryString class to store raw binary strings created
* during decoding from the rawBinaryStringValues and rawBinaryStringKeys options. If false, it
* will use Uint8Arrays.
*
* Defaults to false.
*
* Has no effect if rawBinaryStringValues and rawBinaryStringKeys are both false.
*/
useRawBinaryStringClass: boolean;

/**
* If true, the decoder will use the Map object to store map values. If false, it will use plain
* objects. Defaults to false.
Expand Down Expand Up @@ -126,7 +137,13 @@ type MapKeyType = string | number | bigint | Uint8Array;

function isValidMapKeyType(key: unknown, useMap: boolean, supportObjectNumberKeys: boolean): key is MapKeyType {
if (useMap) {
return typeof key === "string" || typeof key === "number" || typeof key === "bigint" || key instanceof Uint8Array;
return (
typeof key === "string" ||
typeof key === "number" ||
typeof key === "bigint" ||
key instanceof Uint8Array ||
key instanceof RawBinaryString
);
}
// Plain objects support a more limited set of key types
return typeof key === "string" || (supportObjectNumberKeys && typeof key === "number");
Expand Down Expand Up @@ -261,6 +278,7 @@ export class Decoder<ContextType = undefined> {
private readonly intMode: IntMode;
private readonly rawBinaryStringValues: boolean;
private readonly rawBinaryStringKeys: boolean;
private readonly useRawBinaryStringClass: boolean;
private readonly useMap: boolean;
private readonly supportObjectNumberKeys: boolean;
private readonly maxStrLength: number;
Expand All @@ -285,6 +303,7 @@ export class Decoder<ContextType = undefined> {
this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
this.rawBinaryStringValues = options?.rawBinaryStringValues ?? false;
this.rawBinaryStringKeys = options?.rawBinaryStringKeys ?? false;
this.useRawBinaryStringClass = options?.useRawBinaryStringClass ?? false;
this.useMap = options?.useMap ?? false;
this.supportObjectNumberKeys = options?.supportObjectNumberKeys ?? false;
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
Expand Down Expand Up @@ -716,9 +735,13 @@ export class Decoder<ContextType = undefined> {
this.stack.pushArrayState(size);
}

private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array | RawBinaryString {
if (this.stateIsMapKey() ? this.rawBinaryStringKeys : this.rawBinaryStringValues) {
return this.decodeBinary(byteLength, headerOffset);
const decoded = this.decodeBinary(byteLength, headerOffset);
if (this.useRawBinaryStringClass) {
return new RawBinaryString(decoded);
}
return decoded;
}
return this.decodeUtf8String(byteLength, headerOffset);
}
Expand Down
23 changes: 20 additions & 3 deletions src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays, RawBinaryString } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -326,6 +326,8 @@ export class Encoder<ContextType = undefined> {
this.encodeArray(object, depth);
} else if (ArrayBuffer.isView(object)) {
this.encodeBinary(object);
} else if (object instanceof RawBinaryString) {
this.encodeBinaryAsString(object);
} else if (typeof object === "bigint") {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
Expand Down Expand Up @@ -361,6 +363,13 @@ export class Encoder<ContextType = undefined> {
this.writeU8a(bytes);
}

private encodeBinaryAsString(binaryString: RawBinaryString) {
const object = binaryString.rawBinaryValue;
this.writeStringHeader(object.byteLength);
const bytes = ensureUint8Array(object);
this.writeU8a(bytes);
}

private encodeArray(object: Array<unknown>, depth: number) {
const size = object.length;
if (size < 16) {
Expand Down Expand Up @@ -397,6 +406,7 @@ export class Encoder<ContextType = undefined> {
private sortMapKeys(keys: Array<unknown>): Array<unknown> {
const numericKeys: Array<number | bigint> = [];
const stringKeys: Array<string> = [];
const rawStringKeys: Array<RawBinaryString> = [];
const binaryKeys: Array<Uint8Array> = [];
for (const key of keys) {
if (typeof key === "number") {
Expand All @@ -410,15 +420,20 @@ export class Encoder<ContextType = undefined> {
stringKeys.push(key);
} else if (ArrayBuffer.isView(key)) {
binaryKeys.push(ensureUint8Array(key));
} else if (key instanceof RawBinaryString) {
rawStringKeys.push(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
}
numericKeys.sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); // Avoid using === to compare numbers and bigints
stringKeys.sort();
rawStringKeys.sort((a, b) =>
compareUint8Arrays(ensureUint8Array(a.rawBinaryValue), ensureUint8Array(b.rawBinaryValue)),
);
binaryKeys.sort(compareUint8Arrays);
// At the moment this arbitrarily orders the keys as numeric, string, binary
return ([] as Array<unknown>).concat(numericKeys, stringKeys, binaryKeys);
// At the moment this arbitrarily orders the keys as numeric, string, raw string, binary
return ([] as Array<unknown>).concat(numericKeys, stringKeys, rawStringKeys, binaryKeys);
}

private encodeMapObject(object: Record<string, unknown>, depth: number) {
Expand Down Expand Up @@ -464,6 +479,8 @@ export class Encoder<ContextType = undefined> {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else if (key instanceof RawBinaryString) {
this.encodeBinaryAsString(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { Encoder } from "./Encoder";
export { Encoder };
import type { EncoderOptions } from "./Encoder";
export type { EncoderOptions };
import { RawBinaryString } from "./utils/typedArrays";
export { RawBinaryString };

// Utilities for Extension Types:

Expand Down
19 changes: 19 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,22 @@ export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
}
return a.length - b.length;
}

/**
* Represents a binary value that should be encoded as if it were a string.
*
* Effectively, this is a string that has already been UTF-8 encoded to a binary string. This is
* useful if you need to encode a value as a string, but that value contains invalid UTF-8 sequences;
* ideally this situation should be avoided and the value should be encoded as binary, not string,
* but this may be necessary for compatibility with non-ideal systems.
*/
export class RawBinaryString {
/**
* Create a new RawBinaryString from an ArrayBufferView.
*/
public constructor(public readonly rawBinaryValue: ArrayBufferView) {
if (!ArrayBuffer.isView(rawBinaryValue)) {
throw new TypeError("RawBinaryString: rawBinaryValue must be an ArrayBufferView");
}
}
}
132 changes: 0 additions & 132 deletions test/decode-raw-strings.test.ts

This file was deleted.

Loading

0 comments on commit 6600a96

Please sign in to comment.