Skip to content

Commit 5b43b33

Browse files
authoredFeb 22, 2024··
Merge pull request #63 from mattjohnsonpint/special-encoding
Identify and fix encoding bugs
2 parents c6c636a + a705685 commit 5b43b33

File tree

6 files changed

+564
-225
lines changed

6 files changed

+564
-225
lines changed
 

‎assembly/__tests__/as-json.spec.ts

+298-2
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,18 @@ describe("Ser/de Numbers", () => {
7373
canSerde<f64>(10e2, "1000.0");
7474

7575
canSerde<f64>(123456e-5, "1.23456");
76-
7776
canSerde<f64>(0.0, "0.0");
78-
canSerde<f64>(7.23, "7.23");
77+
canSerde<f64>(-7.23, "-7.23");
78+
79+
canSerde<f64>(1e-6, "0.000001");
80+
canSerde<f64>(1e-7, "1e-7");
81+
canDeser<f64>("1E-7", 1e-7);
82+
83+
canSerde<f64>(1e20, "100000000000000000000.0");
84+
canSerde<f64>(1e21, "1e+21");
85+
canDeser<f64>("1E+21", 1e21);
86+
canDeser<f64>("1e21", 1e21);
87+
canDeser<f64>("1E21", 1e21);
7988
});
8089

8190
it("should ser/de booleans", () => {
@@ -97,6 +106,11 @@ describe("Ser/de Array", () => {
97106

98107
it("should ser/de float arrays", () => {
99108
canSerde<f64[]>([7.23, 10e2, 10e2, 123456e-5, 123456e-5, 0.0, 7.23]);
109+
110+
canSerde<f64[]>([1e21,1e22,1e-7,1e-8,1e-9], "[1e+21,1e+22,1e-7,1e-8,1e-9]");
111+
canDeser<f64[]>("[1E+21,1E+22,1E-7,1E-8,1E-9]", [1e21,1e22,1e-7,1e-8,1e-9]);
112+
canDeser<f64[]>("[1e21,1e22,1e-7,1e-8,1e-9]", [1e21,1e22,1e-7,1e-8,1e-9]);
113+
canDeser<f64[]>("[1E21,1E22,1E-7,1E-8,1E-9]", [1e21,1e22,1e-7,1e-8,1e-9]);
100114
});
101115

102116
it("should ser/de boolean arrays", () => {
@@ -167,6 +181,38 @@ describe("Ser/de Objects", () => {
167181
isVerified: true,
168182
}, '{"firstName":"Emmet","lastName":"West","lastActive":[8,27,2022],"age":23,"pos":{"x":3.4,"y":1.2,"z":8.3},"isVerified":true}');
169183
});
184+
185+
it("should ser/de object with floats", () => {
186+
canSerde<ObjectWithFloat>({ f: 7.23 }, '{"f":7.23}');
187+
canSerde<ObjectWithFloat>({ f: 0.000001 }, '{"f":0.000001}');
188+
189+
canSerde<ObjectWithFloat>({ f: 1e-7 }, '{"f":1e-7}');
190+
canDeser<ObjectWithFloat>('{"f":1E-7}', { f: 1e-7 });
191+
192+
canSerde<ObjectWithFloat>({ f: 1e20 }, '{"f":100000000000000000000.0}');
193+
canSerde<ObjectWithFloat>({ f: 1e21 }, '{"f":1e+21}');
194+
canDeser<ObjectWithFloat>('{"f":1E+21}', { f: 1e21 });
195+
canDeser<ObjectWithFloat>('{"f":1e21}', { f: 1e21 });
196+
});
197+
198+
it("should ser/de object with float arrays", () => {
199+
canSerde<ObjectWithFloatArray>(
200+
{ fa: [1e21,1e22,1e-7,1e-8,1e-9] },
201+
'{"fa":[1e+21,1e+22,1e-7,1e-8,1e-9]}');
202+
203+
canDeser<ObjectWithFloatArray>(
204+
'{"fa":[1E+21,1E+22,1E-7,1E-8,1E-9]}',
205+
{ fa: [1e21,1e22,1e-7,1e-8,1e-9] });
206+
207+
canDeser<ObjectWithFloatArray>(
208+
'{"fa":[1e21,1e22,1e-7,1e-8,1e-9]}',
209+
{ fa: [1e21,1e22,1e-7,1e-8,1e-9] });
210+
211+
canDeser<ObjectWithFloatArray>(
212+
'{"fa":[1E21,1E22,1E-7,1E-8,1E-9]}',
213+
{ fa: [1e21,1e22,1e-7,1e-8,1e-9] });
214+
215+
});
170216
});
171217

172218
describe("Ser externals", () => {
@@ -343,3 +389,253 @@ describe("Ser/de Maps", () => {
343389
});
344390

345391
});
392+
393+
describe("Ser/de escape sequences in strings", () => {
394+
it("should encode short escape sequences", () => {
395+
canSer("\\", '"\\\\"');
396+
canSer('"', '"\\""');
397+
canSer("\n", '"\\n"');
398+
canSer("\r", '"\\r"');
399+
canSer("\t", '"\\t"');
400+
canSer("\b", '"\\b"');
401+
canSer("\f", '"\\f"');
402+
});
403+
404+
it("should decode short escape sequences", () => {
405+
canDeser('"\\\\"', "\\");
406+
canDeser('"\\""', '"');
407+
canDeser('"\\n"', "\n");
408+
canDeser('"\\r"', "\r");
409+
canDeser('"\\t"', "\t");
410+
canDeser('"\\b"', "\b");
411+
canDeser('"\\f"', "\f");
412+
});
413+
414+
it("should decode escaped forward slash but not encode", () => {
415+
canSer("/", '"/"');
416+
canDeser('"/"', "/");
417+
canDeser('"\\/"', "/"); // allowed
418+
});
419+
420+
// 0x00 - 0x1f, excluding characters that have short escape sequences
421+
it("should encode long escape sequences", () => {
422+
const singles = ["\n", "\r", "\t", "\b", "\f"];
423+
for (let i = 0; i < 0x1F; i++) {
424+
const c = String.fromCharCode(i);
425+
if (singles.includes(c)) continue;
426+
const actual = JSON.stringify(c);
427+
const expected = `"\\u${i.toString(16).padStart(4, "0")}"`;
428+
expect(actual).toBe(expected, `Failed to encode '\\x${i.toString(16).padStart(2, "0")}'`);
429+
}
430+
});
431+
432+
// \u0000 - \u001f
433+
it("should decode long escape sequences (lower cased)", () => {
434+
for (let i = 0; i <= 0x1f; i++) {
435+
const s = `"\\u${i.toString(16).padStart(4, "0").toLowerCase()}"`;
436+
const actual = JSON.parse<string>(s);
437+
const expected = String.fromCharCode(i);
438+
expect(actual).toBe(expected, `Failed to decode ${s}`);
439+
}
440+
});
441+
442+
// \u0000 - \u001F
443+
it("should decode long escape sequences (upper cased)", () => {
444+
for (let i = 0; i <= 0x1f; i++) {
445+
const s = `"\\u${i.toString(16).padStart(4, "0").toUpperCase()}"`;
446+
const actual = JSON.parse<string>(s);
447+
const expected = String.fromCharCode(i);
448+
expect(actual).toBe(expected, `Failed to decode ${s}`);
449+
}
450+
});
451+
452+
// See https://datatracker.ietf.org/doc/html/rfc8259#section-7
453+
it("should decode UTF-16 surrogate pairs", () => {
454+
const s = '"\\uD834\\uDD1E"';
455+
const actual = JSON.parse<string>(s);
456+
const expected = "𝄞";
457+
expect(actual).toBe(expected);
458+
});
459+
460+
// Just because we can decode UTF-16 surrogate pairs, doesn't mean we should encode them.
461+
it("should not encode UTF-16 surrogate pairs", () => {
462+
const s = "𝄞";
463+
const actual = JSON.stringify(s);
464+
const expected = '"𝄞"';
465+
expect(actual).toBe(expected);
466+
});
467+
468+
it("should encode multiple escape sequences", () => {
469+
canSer('"""', '"\\"\\"\\""');
470+
canSer('\\\\\\', '"\\\\\\\\\\\\"');
471+
});
472+
473+
it("cannot parse invalid escape sequences", () => {
474+
expect(() => {
475+
JSON.parse<string>('"\\z"');
476+
}).toThrow();
477+
});
478+
479+
});
480+
481+
describe("Ser/de special strings in object values", () => {
482+
it("should serialize quotes in string in object", () => {
483+
const o: ObjWithString = { s: '"""' };
484+
const s = '{"s":"\\"\\"\\""}';
485+
canSer(o, s);
486+
});
487+
it("should deserialize quotes in string in object", () => {
488+
const o: ObjWithString = { s: '"""' };
489+
const s = '{"s":"\\"\\"\\""}';
490+
canDeser(s, o);
491+
});
492+
it("should serialize backslashes in string in object", () => {
493+
const o: ObjWithString = { s: "\\\\\\" };
494+
const s = '{"s":"\\\\\\\\\\\\"}';
495+
canSer(o, s);
496+
});
497+
it("should deserialize backslashes in string in object", () => {
498+
const o: ObjWithString = { s: "\\\\\\" };
499+
const s = '{"s":"\\\\\\\\\\\\"}';
500+
canDeser(s, o);
501+
});
502+
503+
it("should deserialize slashes in string in object", () => {
504+
const o: ObjWithString = { s: "//" };
505+
const s = '{"s":"/\\/"}';
506+
canDeser(s, o);
507+
});
508+
it("should deserialize slashes in string in array", () => {
509+
const a = ["/", "/"];
510+
const s = '["/","\/"]';
511+
canDeser(s, a);
512+
});
513+
514+
it("should ser/de short escape sequences in strings in objects", () => {
515+
const o: ObjWithString = { s: "\n\r\t\b\f" };
516+
const s = '{"s":"\\n\\r\\t\\b\\f"}';
517+
canSerde(o, s);
518+
});
519+
520+
it("should ser/de short escape sequences in string arrays", () => {
521+
const a = ["\n", "\r", "\t", "\b", "\f"];
522+
const s = '["\\n","\\r","\\t","\\b","\\f"]';
523+
canSerde(a, s);
524+
});
525+
526+
it("should ser/de short escape sequences in string arrays in objects", () => {
527+
const o: ObjectWithStringArray = { sa: ["\n", "\r", "\t", "\b", "\f"] };
528+
const s = '{"sa":["\\n","\\r","\\t","\\b","\\f"]}';
529+
canSerde(o, s);
530+
});
531+
532+
it("should ser/de long escape sequences in strings in objects", () => {
533+
const singles = ["\n", "\r", "\t", "\b", "\f"];
534+
let x = "";
535+
let y = "";
536+
for (let i = 0; i < 0x1F; i++) {
537+
const c = String.fromCharCode(i);
538+
if (singles.includes(c)) continue;
539+
x += c;
540+
y += `\\u${i.toString(16).padStart(4, "0")}`;
541+
}
542+
const o: ObjWithString = { s: x };
543+
const s = `{"s":"${y}"}`;
544+
canSerde(o, s);
545+
});
546+
547+
it("should ser/de long escape sequences in strings in arrays", () => {
548+
const singles = ["\n", "\r", "\t", "\b", "\f"];
549+
let x: string[] = [];
550+
let y: string[] = [];
551+
for (let i = 0; i < 0x1F; i++) {
552+
const c = String.fromCharCode(i);
553+
if (singles.includes(c)) continue;
554+
x.push(c);
555+
y.push(`\\u${i.toString(16).padStart(4, "0")}`);
556+
}
557+
const a = x;
558+
const s = `["${y.join('","')}"]`;
559+
canSerde(a, s);
560+
});
561+
562+
it("should ser/de long escape sequences in string arrays in objects", () => {
563+
const singles = ["\n", "\r", "\t", "\b", "\f"];
564+
let x: string[] = [];
565+
let y: string[] = [];
566+
for (let i = 0; i < 0x1F; i++) {
567+
const c = String.fromCharCode(i);
568+
if (singles.includes(c)) continue;
569+
x.push(c);
570+
y.push(`\\u${i.toString(16).padStart(4, "0")}`);
571+
}
572+
const o: ObjectWithStringArray = { sa: x };
573+
const s = `{"sa":["${y.join('","')}"]}`;
574+
canSerde(o, s);
575+
});
576+
577+
});
578+
579+
describe("Ser/de special strings in object keys", () => {
580+
581+
it("should ser/de escape sequences in key of object with int value", () => {
582+
const o: ObjWithStrangeKey<i32> = { data: 123 };
583+
const s = '{"a\\\\\\t\\"\\u0002b`c":123}';
584+
canSerde(o, s);
585+
});
586+
587+
it("should ser/de escape sequences in key of object with float value", () => {
588+
const o: ObjWithStrangeKey<f64> = { data: 123.4 };
589+
const s = '{"a\\\\\\t\\"\\u0002b`c":123.4}';
590+
canSerde(o, s);
591+
});
592+
593+
it("should ser/de escape sequences in key of object with string value", () => {
594+
const o: ObjWithStrangeKey<string> = { data: "abc" };
595+
const s = '{"a\\\\\\t\\"\\u0002b`c":"abc"}';
596+
canSerde(o, s);
597+
});
598+
599+
// Something buggy in as-pect needs a dummy value reflected here
600+
// or the subsequent test fails. It's not used in any test.
601+
Reflect.toReflectedValue(0);
602+
603+
it("should ser/de escape sequences in map key", () => {
604+
const m = new Map<string, string>();
605+
m.set('a\\\t"\x02b', 'abc');
606+
const s = '{"a\\\\\\t\\"\\u0002b":"abc"}';
607+
canSerde(m, s);
608+
});
609+
it("should ser/de escape sequences in map value", () => {
610+
const m = new Map<string, string>();
611+
m.set('abc', 'a\\\t"\x02b');
612+
const s = '{"abc":"a\\\\\\t\\"\\u0002b"}';
613+
canSerde(m, s);
614+
});
615+
});
616+
617+
@json
618+
class ObjWithString {
619+
s!: string;
620+
}
621+
622+
@json
623+
class ObjectWithStringArray {
624+
sa!: string[];
625+
}
626+
627+
@json
628+
class ObjectWithFloat {
629+
f!: f64;
630+
}
631+
632+
@json
633+
class ObjectWithFloatArray {
634+
fa!: f64[];
635+
}
636+
637+
@json
638+
class ObjWithStrangeKey<T> {
639+
@alias('a\\\t"\x02b`c')
640+
data!: T;
641+
}

‎assembly/src/chars.ts

+12-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
@inline export const sCode = 115;
3636
// @ts-ignore = Decorator is valid here
3737
@inline export const nCode = 110;
38+
// @ts-ignore = Decorator is valid here
39+
@inline export const bCode = 98;
3840
// Strings
3941
// @ts-ignore: Decorator is valid here
4042
@inline export const trueWord = "true";
@@ -58,6 +60,15 @@
5860
@inline export const rightBracketWord = "]";
5961
// @ts-ignore: Decorator is valid here
6062
@inline export const quoteWord = "\"";
63+
6164
// Escape Codes
6265
// @ts-ignore: Decorator is valid here
63-
@inline export const newLineCode = 10;
66+
@inline export const backspaceCode = 8; // \b
67+
// @ts-ignore: Decorator is valid here
68+
@inline export const tabCode = 9; // \t
69+
// @ts-ignore: Decorator is valid here
70+
@inline export const newLineCode = 10; // \n
71+
// @ts-ignore: Decorator is valid here
72+
@inline export const formFeedCode = 12; // \f
73+
// @ts-ignore: Decorator is valid here
74+
@inline export const carriageReturnCode = 13; // \r

0 commit comments

Comments
 (0)
Please sign in to comment.