Skip to content

Commit 8b6cbdd

Browse files
committed
Interpret escapes in symbols
1 parent 5509804 commit 8b6cbdd

File tree

2 files changed

+65
-22
lines changed

2 files changed

+65
-22
lines changed

src/parser/index.js

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
import { parser } from "./rules.js";
22

3+
const ESCAPES = {
4+
"0": "\0",
5+
"n": "\n",
6+
"r": "\r",
7+
"v": "\v",
8+
"t": "\t",
9+
"b": "\b",
10+
"f": "\f",
11+
"\n": "",
12+
"\r": ""
13+
};
14+
15+
function interpretEscapes(str) {
16+
return str.replaceAll(/\\(?:x([0-9a-fA-F]{2})|u(?:\{([0-9a-fA-F]+)\}|([0-9a-fA-F]{4})))/g, function(_, hex, u1, u2) {
17+
return String.fromCodePoint(parseInt(hex || u1 || u2, 16));
18+
}).replaceAll(/\\(.)/gs, function(_, char) {
19+
return ESCAPES[char] || char;
20+
});
21+
}
22+
323
export default function(src) {
424
let tree = parser.parse(src);
525
let cursor = tree.cursor();
@@ -27,7 +47,7 @@ export default function(src) {
2747
production.push(symbol);
2848
}
2949
} else if (cursor.name === "QuotedSymbol") {
30-
let symbol = src.slice(cursor.from + 1, cursor.to - 1);
50+
let symbol = interpretEscapes(src.slice(cursor.from + 1, cursor.to - 1));
3151

3252
if (typeof production === "undefined") {
3353
head = symbol;

test/parser.test.js

+44-21
Original file line numberDiff line numberDiff line change
@@ -37,33 +37,56 @@ describe("parser", function() {
3737
});
3838

3939
it("quoted symbols can contain escapes", function() {
40-
assert.deepStrictEqual(parser(`"\\"" -> .`), { productions: [[`\\"`]] });
41-
assert.deepStrictEqual(parser(`"\\'" -> .`), { productions: [[`\\'`]] });
42-
assert.deepStrictEqual(parser(`'\\'' -> .`), { productions: [[`\\'`]] });
43-
assert.deepStrictEqual(parser(`'\\"' -> .`), { productions: [[`\\"`]] });
44-
assert.deepStrictEqual(parser(`"\\\\" -> .`), { productions: [[`\\\\`]] });
45-
46-
assert.deepStrictEqual(parser(`"\\b" -> .`), { productions: [[`\\b`]] });
47-
assert.deepStrictEqual(parser(`"\\f" -> .`), { productions: [[`\\f`]] });
48-
assert.deepStrictEqual(parser(`"\\n" -> .`), { productions: [[`\\n`]] });
49-
assert.deepStrictEqual(parser(`"\\r" -> .`), { productions: [[`\\r`]] });
50-
assert.deepStrictEqual(parser(`"\\t" -> .`), { productions: [[`\\t`]] });
51-
assert.deepStrictEqual(parser(`"\\v" -> .`), { productions: [[`\\v`]] });
52-
assert.deepStrictEqual(parser(`"\\0" -> .`), { productions: [[`\\0`]] });
53-
54-
assert.deepStrictEqual(parser(`"\\\n" -> .`), { productions: [[`\\\n`]] });
40+
// QUOTATION MARK and APOSTROPHE
41+
assert.deepStrictEqual(parser(`"\\"" -> .`), { productions: [[`"`]] });
42+
assert.deepStrictEqual(parser(`"\\'" -> .`), { productions: [[`'`]] });
43+
assert.deepStrictEqual(parser(`'\\'' -> .`), { productions: [[`'`]] });
44+
assert.deepStrictEqual(parser(`'\\"' -> .`), { productions: [[`"`]] });
45+
assert.deepStrictEqual(parser(`"\\\\" -> .`), { productions: [[`\\`]] });
46+
47+
// C-style escapes
48+
assert.deepStrictEqual(parser(`"\\b" -> .`), { productions: [[`\b`]] });
49+
assert.deepStrictEqual(parser(`"\\f" -> .`), { productions: [[`\f`]] });
50+
assert.deepStrictEqual(parser(`"\\n" -> .`), { productions: [[`\n`]] });
51+
assert.deepStrictEqual(parser(`"\\r" -> .`), { productions: [[`\r`]] });
52+
assert.deepStrictEqual(parser(`"\\t" -> .`), { productions: [[`\t`]] });
53+
assert.deepStrictEqual(parser(`"\\v" -> .`), { productions: [[`\v`]] });
54+
assert.deepStrictEqual(parser(`"\\0" -> .`), { productions: [[`\0`]] });
55+
56+
// Identity escapes
57+
assert.deepStrictEqual(parser(`"\\z" -> .`), { productions: [[`z`]] });
58+
59+
// Escaped line terminator
60+
assert.deepStrictEqual(parser(`"\\\n" -> .`), { productions: [[`\n`]] });
61+
assert.deepStrictEqual(parser(`"\\\r" -> .`), { productions: [[`\r`]] });
5562

5663
// COPYRIGHT SIGN
57-
assert.deepStrictEqual(parser(`"\\xA9" -> .`), { productions: [[`\\xA9`]] });
58-
assert.deepStrictEqual(parser(`"\\xa9" -> .`), { productions: [[`\\xa9`]] });
59-
assert.deepStrictEqual(parser(`"\\u00A9" -> .`), { productions: [[`\\u00A9`]] });
60-
assert.deepStrictEqual(parser(`"\\u00a9" -> .`), { productions: [[`\\u00a9`]] });
64+
assert.deepStrictEqual(parser(`"\\xA9" -> .`), { productions: [[`\xA9`]] });
65+
assert.deepStrictEqual(parser(`"\\xa9" -> .`), { productions: [[`\xa9`]] });
66+
assert.deepStrictEqual(parser(`"\\u00A9" -> .`), { productions: [[`\u00A9`]] });
67+
assert.deepStrictEqual(parser(`"\\u00a9" -> .`), { productions: [[`\u00a9`]] });
68+
assert.deepStrictEqual(parser(`"\\u00a9" -> .`), { productions: [[`\u{00A9}`]] });
69+
assert.deepStrictEqual(parser(`"\\u00a9" -> .`), { productions: [[`\u{00a9}`]] });
6170

6271
// BLACK HEART SUIT
63-
assert.deepStrictEqual(parser(`"\\u2665" -> .`), { productions: [["\\u2665"]] });
72+
assert.deepStrictEqual(parser(`"\\u2665" -> .`), { productions: [[`\u2665`]] });
73+
assert.deepStrictEqual(parser(`"\\u{2665}" -> .`), { productions: [[`\u{2665}`]] });
6474

6575
// TETRAGRAM FOR CENTRE
66-
assert.deepStrictEqual(parser(`"\\u{1D306}" -> .`), { productions: [["\\u{1D306}"]] });
76+
assert.deepStrictEqual(parser(`"\\u{1D306}" -> .`), { productions: [[`\u{1D306}`]] });
77+
78+
// AMPERSAND (\x and two hex digits)
79+
assert.deepStrictEqual(parser(`"\\x2665" -> .`), { productions: [[`\x2665`]] });
80+
81+
// MODIFIER LETTER CAPITAL D (\u without braces)
82+
assert.deepStrictEqual(parser(`"\\u1D306" -> .`), { productions: [[`\u1D306`]] });
83+
});
84+
85+
it("quoted symbols can contain multiple escapes", function() {
86+
assert.deepStrictEqual(
87+
parser(`"\\" \\0 \\xA9 \\u00A9 \\u{2665}" -> .`),
88+
{ productions: [[`" \0 \xA9 \u00A9 \u{2665}`]] }
89+
);
6790
});
6891

6992
it("nonterminals don't need to be capitalized", function() {

0 commit comments

Comments
 (0)