From a91b44fd8f17137b6b50101bae366ce6da9a998e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A8le?= Date: Sun, 22 Dec 2024 20:16:55 +0100 Subject: [PATCH] Fix Regex Character Class Escape Tests For each character class escape (\d, \D, \s, \S, \w, \W), check positive cases (the escape matches all characters it's supposed to match) and negative cases (the escape doesn't match any of the characters it should not match). Each of these checks is also done in Unicode mode and with the v flag. This uses regenerate.js from the unicode-property-escapes-tests repo to generate strings that contain exactly the characters that are supposed to be matched or not matched for each escape. Comparison is done with regex test instead of regex replace to optimize the tests. This is part of my work at the SYSTEMF lab at EPFL. --- ...lass-digit-class-escape-negative-cases.js} | 39 ++-- ...lass-digit-class-escape-positive-cases.js} | 36 ++-- ...er-class-non-digit-class-escape-flags-u.js | 75 -------- ...-non-digit-class-escape-negative-cases.js} | 36 ++-- ...it-class-escape-plus-quantifier-flags-u.js | 75 -------- ...-non-digit-class-escape-plus-quantifier.js | 75 -------- ...s-non-digit-class-escape-positive-cases.js | 83 ++++++++ .../character-class-non-digit-class-escape.js | 75 -------- ...ass-non-whitespace-class-escape-flags-u.js | 84 -------- ...-whitespace-class-escape-negative-cases.js | 90 +++++++++ ...ce-class-escape-plus-quantifier-flags-u.js | 84 -------- ...whitespace-class-escape-plus-quantifier.js | 84 -------- ...-whitespace-class-escape-positive-cases.js | 92 +++++++++ ...acter-class-non-whitespace-class-escape.js | 84 -------- ...ter-class-non-word-class-escape-flags-u.js | 77 -------- ...s-non-word-class-escape-negative-cases.js} | 40 ++-- ...rd-class-escape-plus-quantifier-flags-u.js | 77 -------- ...s-non-word-class-escape-plus-quantifier.js | 77 -------- ...ss-non-word-class-escape-positive-cases.js | 87 +++++++++ .../character-class-non-word-class-escape.js | 77 -------- ...r-class-whitespace-class-escape-flags-u.js | 82 -------- ...-whitespace-class-escape-negative-cases.js | 92 +++++++++ ...ce-class-escape-plus-quantifier-flags-u.js | 82 -------- ...whitespace-class-escape-plus-quantifier.js | 82 -------- ...-whitespace-class-escape-positive-cases.js | 90 +++++++++ ...character-class-whitespace-class-escape.js | 82 -------- ...aracter-class-word-class-escape-flags-u.js | 74 ------- ...-class-word-class-escape-negative-cases.js | 87 +++++++++ ...rd-class-escape-plus-quantifier-flags-u.js | 74 ------- ...class-word-class-escape-plus-quantifier.js | 74 ------- ...-class-word-class-escape-positive-cases.js | 84 ++++++++ .../character-class-word-class-escape.js | 74 ------- tools/regexp-generator/header.mjs | 1 + tools/regexp-generator/index.mjs | 181 ++++++++---------- tools/regexp-generator/regenerate.mjs | 63 ++++++ 35 files changed, 939 insertions(+), 1730 deletions(-) rename test/built-ins/RegExp/CharacterClassEscapes/{character-class-digit-class-escape.js => character-class-digit-class-escape-negative-cases.js} (69%) rename test/built-ins/RegExp/CharacterClassEscapes/{character-class-digit-class-escape-plus-quantifier-flags-u.js => character-class-digit-class-escape-positive-cases.js} (71%) delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js rename test/built-ins/RegExp/CharacterClassEscapes/{character-class-digit-class-escape-flags-u.js => character-class-non-digit-class-escape-negative-cases.js} (72%) delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-flags-u.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier-flags-u.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-flags-u.js rename test/built-ins/RegExp/CharacterClassEscapes/{character-class-digit-class-escape-plus-quantifier.js => character-class-non-word-class-escape-negative-cases.js} (70%) delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier-flags-u.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-flags-u.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier-flags-u.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-flags-u.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier-flags-u.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier.js create mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js delete mode 100644 test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape.js create mode 100644 tools/regexp-generator/regenerate.mjs diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-negative-cases.js similarity index 69% rename from test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js rename to test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-negative-cases.js index 1d95fd4740f..9ed4d361ef7 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-negative-cases.js @@ -1,10 +1,11 @@ // Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. // This code is governed by the BSD license found in the LICENSE file. /*--- esid: prod-CharacterClassEscape description: > - Compare range for digit class escape \d with flags g + Check negative cases of digit class escape \d. info: | This is a generated test. Please check out https://github.com/tc39/test262/tree/main/tools/regexp-generator/ @@ -45,22 +46,32 @@ includes: [regExpUtils.js] flags: [generated] ---*/ -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000030, 0x000039], - ], -}); +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x00DBFF], + [0x00E000, 0x10FFFF] + ] +} +); -const re = /\d/g; +const standard = /\d/; +const unicode = /\d/u; +const vflag = /\d/v; +const regexes = [standard,unicode,vflag]; const errors = []; -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } } } } @@ -68,5 +79,5 @@ if (!re.test(str)) { assert.sameValue( errors.length, 0, - 'Expected matching code points, but received: ' + errors.join(',') + 'Expected no match, but matched: ' + errors.join(',') ); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-positive-cases.js similarity index 71% rename from test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js rename to test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-positive-cases.js index 63902cf76e5..7f04a0bae58 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-positive-cases.js @@ -1,10 +1,11 @@ // Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. // This code is governed by the BSD license found in the LICENSE file. /*--- esid: prod-CharacterClassEscape description: > - Compare range for digit class escape \d+ with flags ug + Check positive cases of digit class escape \d. info: | This is a generated test. Please check out https://github.com/tc39/test262/tree/main/tools/regexp-generator/ @@ -45,22 +46,29 @@ includes: [regExpUtils.js] flags: [generated] ---*/ -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000030, 0x000039], - ], -}); +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x000030, 0x000039] + ] +} +); -const re = /\d+/ug; +const standard = /^\d+$/; +const unicode = /^\d+$/u; +const vflag = /^\d+$/v; +const regexes = [standard,unicode,vflag]; const errors = []; -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } } } } @@ -68,5 +76,5 @@ if (!re.test(str)) { assert.sameValue( errors.length, 0, - 'Expected matching code points, but received: ' + errors.join(',') + 'Expected full match, but did not match: ' + errors.join(',') ); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js deleted file mode 100644 index 7b3b98b4fca..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-flags-u.js +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-digit class escape \D with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x00DBFF], - [0x00E000, 0x10FFFF], - ], -}); - -const re = /\D/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-negative-cases.js similarity index 72% rename from test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js rename to test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-negative-cases.js index c3259250e6c..215e2177c85 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-flags-u.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-negative-cases.js @@ -1,10 +1,11 @@ // Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. // This code is governed by the BSD license found in the LICENSE file. /*--- esid: prod-CharacterClassEscape description: > - Compare range for digit class escape \d with flags ug + Check negative cases of non-digit class escape \D. info: | This is a generated test. Please check out https://github.com/tc39/test262/tree/main/tools/regexp-generator/ @@ -45,22 +46,29 @@ includes: [regExpUtils.js] flags: [generated] ---*/ -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000030, 0x000039], - ], -}); +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x000030, 0x000039] + ] +} +); -const re = /\d/ug; +const standard = /\D/; +const unicode = /\D/u; +const vflag = /\D/v; +const regexes = [standard,unicode,vflag]; const errors = []; -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } } } } @@ -68,5 +76,5 @@ if (!re.test(str)) { assert.sameValue( errors.length, 0, - 'Expected matching code points, but received: ' + errors.join(',') + 'Expected no match, but matched: ' + errors.join(',') ); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js deleted file mode 100644 index 56791800cdf..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u.js +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-digit class escape \D+ with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x00DBFF], - [0x00E000, 0x10FFFF], - ], -}); - -const re = /\D+/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js deleted file mode 100644 index 249d5598e38..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier.js +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-digit class escape \D+ with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x00DBFF], - [0x00E000, 0x00FFFF], - ], -}); - -const re = /\D+/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js new file mode 100644 index 00000000000..f5527dfeb33 --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-positive-cases.js @@ -0,0 +1,83 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check positive cases of non-digit class escape \D. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x00DBFF], + [0x00E000, 0x10FFFF] + ] +} +); + +const standard = /^\D+$/; +const unicode = /^\D+$/u; +const vflag = /^\D+$/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected full match, but did not match: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js deleted file mode 100644 index 94b0bb98a99..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape.js +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-digit class escape \D with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x00DBFF], - [0x00E000, 0x00FFFF], - ], -}); - -const re = /\D/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-flags-u.js deleted file mode 100644 index 35cd6a97481..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-flags-u.js +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-whitespace class escape \S with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x000008], - [0x00000E, 0x00001F], - [0x000021, 0x00009F], - [0x0000A1, 0x00167F], - [0x001681, 0x001FFF], - [0x00200B, 0x002027], - [0x00202A, 0x00202E], - [0x002030, 0x00205E], - [0x002060, 0x002FFF], - [0x003001, 0x00DBFF], - [0x00E000, 0x00FEFE], - [0x00FF00, 0x10FFFF], - ], -}); - -const re = /\S/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js new file mode 100644 index 00000000000..5c886e11966 --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-negative-cases.js @@ -0,0 +1,90 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check negative cases of non-whitespace class escape \S. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [ + 0x000020, + 0x0000A0, + 0x001680, + 0x00202F, + 0x00205F, + 0x003000, + 0x00FEFF + ], + ranges: [ + [0x000009, 0x00000D], + [0x002000, 0x00200A], + [0x002028, 0x002029] + ] +} +); + +const standard = /\S/; +const unicode = /\S/u; +const vflag = /\S/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected no match, but matched: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier-flags-u.js deleted file mode 100644 index a296c4f34c8..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier-flags-u.js +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-whitespace class escape \S+ with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x000008], - [0x00000E, 0x00001F], - [0x000021, 0x00009F], - [0x0000A1, 0x00167F], - [0x001681, 0x001FFF], - [0x00200B, 0x002027], - [0x00202A, 0x00202E], - [0x002030, 0x00205E], - [0x002060, 0x002FFF], - [0x003001, 0x00DBFF], - [0x00E000, 0x00FEFE], - [0x00FF00, 0x10FFFF], - ], -}); - -const re = /\S+/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier.js deleted file mode 100644 index 0865bce22a7..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier.js +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-whitespace class escape \S+ with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x000008], - [0x00000E, 0x00001F], - [0x000021, 0x00009F], - [0x0000A1, 0x00167F], - [0x001681, 0x001FFF], - [0x00200B, 0x002027], - [0x00202A, 0x00202E], - [0x002030, 0x00205E], - [0x002060, 0x002FFF], - [0x003001, 0x00DBFF], - [0x00E000, 0x00FEFE], - [0x00FF00, 0x00FFFF], - ], -}); - -const re = /\S+/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js new file mode 100644 index 00000000000..8821e3a567a --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-positive-cases.js @@ -0,0 +1,92 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check positive cases of non-whitespace class escape \S. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x000008], + [0x00000E, 0x00001F], + [0x000021, 0x00009F], + [0x0000A1, 0x00167F], + [0x001681, 0x001FFF], + [0x00200B, 0x002027], + [0x00202A, 0x00202E], + [0x002030, 0x00205E], + [0x002060, 0x002FFF], + [0x003001, 0x00DBFF], + [0x00E000, 0x00FEFE], + [0x00FF00, 0x10FFFF] + ] +} +); + +const standard = /^\S+$/; +const unicode = /^\S+$/u; +const vflag = /^\S+$/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected full match, but did not match: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape.js deleted file mode 100644 index 09a6a556d47..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape.js +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-whitespace class escape \S with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x000008], - [0x00000E, 0x00001F], - [0x000021, 0x00009F], - [0x0000A1, 0x00167F], - [0x001681, 0x001FFF], - [0x00200B, 0x002027], - [0x00202A, 0x00202E], - [0x002030, 0x00205E], - [0x002060, 0x002FFF], - [0x003001, 0x00DBFF], - [0x00E000, 0x00FEFE], - [0x00FF00, 0x00FFFF], - ], -}); - -const re = /\S/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-flags-u.js deleted file mode 100644 index 60560ef61b0..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-flags-u.js +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-word class escape \W with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x000060], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x000040], - [0x00005B, 0x00005E], - [0x00007B, 0x00DBFF], - [0x00E000, 0x10FFFF], - ], -}); - -const re = /\W/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-negative-cases.js similarity index 70% rename from test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js rename to test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-negative-cases.js index fe930f650d5..ad64fe91654 100644 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-digit-class-escape-plus-quantifier.js +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-negative-cases.js @@ -1,10 +1,11 @@ // Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. // This code is governed by the BSD license found in the LICENSE file. /*--- esid: prod-CharacterClassEscape description: > - Compare range for digit class escape \d+ with flags g + Check negative cases of non-word class escape \W. info: | This is a generated test. Please check out https://github.com/tc39/test262/tree/main/tools/regexp-generator/ @@ -45,22 +46,33 @@ includes: [regExpUtils.js] flags: [generated] ---*/ -const str = buildString({ - loneCodePoints: [], - ranges: [ - [0x000030, 0x000039], - ], -}); +const str = buildString( +{ + loneCodePoints: [ + 0x00005F + ], + ranges: [ + [0x000030, 0x000039], + [0x000041, 0x00005A], + [0x000061, 0x00007A] + ] +} +); -const re = /\d+/g; +const standard = /\W/; +const unicode = /\W/u; +const vflag = /\W/v; +const regexes = [standard,unicode,vflag]; const errors = []; -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } } } } @@ -68,5 +80,5 @@ if (!re.test(str)) { assert.sameValue( errors.length, 0, - 'Expected matching code points, but received: ' + errors.join(',') + 'Expected no match, but matched: ' + errors.join(',') ); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier-flags-u.js deleted file mode 100644 index 2cca79f8a26..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier-flags-u.js +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-word class escape \W+ with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x000060], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x000040], - [0x00005B, 0x00005E], - [0x00007B, 0x00DBFF], - [0x00E000, 0x10FFFF], - ], -}); - -const re = /\W+/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier.js deleted file mode 100644 index 598d0e8c059..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier.js +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-word class escape \W+ with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x000060], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x000040], - [0x00005B, 0x00005E], - [0x00007B, 0x00DBFF], - [0x00E000, 0x00FFFF], - ], -}); - -const re = /\W+/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js new file mode 100644 index 00000000000..650fd50b1c5 --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-positive-cases.js @@ -0,0 +1,87 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check positive cases of non-word class escape \W. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [ + 0x000060 + ], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x000040], + [0x00005B, 0x00005E], + [0x00007B, 0x00DBFF], + [0x00E000, 0x10FFFF] + ] +} +); + +const standard = /^\W+$/; +const unicode = /^\W+$/u; +const vflag = /^\W+$/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected full match, but did not match: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape.js deleted file mode 100644 index 1b6d6b238f7..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape.js +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for non-word class escape \W with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x000060], - ranges: [ - [0x00DC00, 0x00DFFF], - [0x000000, 0x00002F], - [0x00003A, 0x000040], - [0x00005B, 0x00005E], - [0x00007B, 0x00DBFF], - [0x00E000, 0x00FFFF], - ], -}); - -const re = /\W/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-flags-u.js deleted file mode 100644 index 531a50c5f93..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-flags-u.js +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for whitespace class escape \s with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [ - 0x000020, - 0x0000A0, - 0x001680, - 0x00202F, - 0x00205F, - 0x003000, - 0x00FEFF, - ], - ranges: [ - [0x000009, 0x00000D], - [0x002000, 0x00200A], - [0x002028, 0x002029], - ], -}); - -const re = /\s/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js new file mode 100644 index 00000000000..adc6c63e03d --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-negative-cases.js @@ -0,0 +1,92 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check negative cases of whitespace class escape \s. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x000008], + [0x00000E, 0x00001F], + [0x000021, 0x00009F], + [0x0000A1, 0x00167F], + [0x001681, 0x001FFF], + [0x00200B, 0x002027], + [0x00202A, 0x00202E], + [0x002030, 0x00205E], + [0x002060, 0x002FFF], + [0x003001, 0x00DBFF], + [0x00E000, 0x00FEFE], + [0x00FF00, 0x10FFFF] + ] +} +); + +const standard = /\s/; +const unicode = /\s/u; +const vflag = /\s/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected no match, but matched: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier-flags-u.js deleted file mode 100644 index a46c7b85029..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier-flags-u.js +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for whitespace class escape \s+ with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [ - 0x000020, - 0x0000A0, - 0x001680, - 0x00202F, - 0x00205F, - 0x003000, - 0x00FEFF, - ], - ranges: [ - [0x000009, 0x00000D], - [0x002000, 0x00200A], - [0x002028, 0x002029], - ], -}); - -const re = /\s+/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier.js deleted file mode 100644 index e9cdeed5051..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-plus-quantifier.js +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for whitespace class escape \s+ with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [ - 0x000020, - 0x0000A0, - 0x001680, - 0x00202F, - 0x00205F, - 0x003000, - 0x00FEFF, - ], - ranges: [ - [0x000009, 0x00000D], - [0x002000, 0x00200A], - [0x002028, 0x002029], - ], -}); - -const re = /\s+/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js new file mode 100644 index 00000000000..d1aba303bca --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape-positive-cases.js @@ -0,0 +1,90 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check positive cases of whitespace class escape \s. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [ + 0x000020, + 0x0000A0, + 0x001680, + 0x00202F, + 0x00205F, + 0x003000, + 0x00FEFF + ], + ranges: [ + [0x000009, 0x00000D], + [0x002000, 0x00200A], + [0x002028, 0x002029] + ] +} +); + +const standard = /^\s+$/; +const unicode = /^\s+$/u; +const vflag = /^\s+$/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected full match, but did not match: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape.js deleted file mode 100644 index b9e1dcd5f43..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-whitespace-class-escape.js +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for whitespace class escape \s with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [ - 0x000020, - 0x0000A0, - 0x001680, - 0x00202F, - 0x00205F, - 0x003000, - 0x00FEFF, - ], - ranges: [ - [0x000009, 0x00000D], - [0x002000, 0x00200A], - [0x002028, 0x002029], - ], -}); - -const re = /\s/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-flags-u.js deleted file mode 100644 index 1cedbcc3040..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-flags-u.js +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for word class escape \w with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x00005F], - ranges: [ - [0x000030, 0x000039], - [0x000041, 0x00005A], - [0x000061, 0x00007A], - ], -}); - -const re = /\w/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js new file mode 100644 index 00000000000..46a280173dc --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-negative-cases.js @@ -0,0 +1,87 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check negative cases of word class escape \w. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [ + 0x000060 + ], + ranges: [ + [0x00DC00, 0x00DFFF], + [0x000000, 0x00002F], + [0x00003A, 0x000040], + [0x00005B, 0x00005E], + [0x00007B, 0x00DBFF], + [0x00E000, 0x10FFFF] + ] +} +); + +const standard = /\w/; +const unicode = /\w/u; +const vflag = /\w/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected no match, but matched: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier-flags-u.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier-flags-u.js deleted file mode 100644 index 2959480dc3f..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier-flags-u.js +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for word class escape \w+ with flags ug -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x00005F], - ranges: [ - [0x000030, 0x000039], - [0x000041, 0x00005A], - [0x000061, 0x00007A], - ], -}); - -const re = /\w+/ug; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier.js deleted file mode 100644 index 5a756fd9cd7..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-plus-quantifier.js +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for word class escape \w+ with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x00005F], - ranges: [ - [0x000030, 0x000039], - [0x000041, 0x00005A], - [0x000061, 0x00007A], - ], -}); - -const re = /\w+/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js new file mode 100644 index 00000000000..b136af9c11d --- /dev/null +++ b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape-positive-cases.js @@ -0,0 +1,84 @@ +// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. +// This code is governed by the BSD license found in the LICENSE file. + +/*--- +esid: prod-CharacterClassEscape +description: > + Check positive cases of word class escape \w. +info: | + This is a generated test. Please check out + https://github.com/tc39/test262/tree/main/tools/regexp-generator/ + for any changes. + + CharacterClassEscape[UnicodeMode] :: + d + D + s + S + w + W + [+UnicodeMode] p{ UnicodePropertyValueExpression } + [+UnicodeMode] P{ UnicodePropertyValueExpression } + + 22.2.2.9 Runtime Semantics: CompileToCharSet + + CharacterClassEscape :: d + 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, + 5, 6, 7, 8, and 9. + CharacterClassEscape :: D + 1. Let S be the CharSet returned by CharacterClassEscape :: d. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: s + 1. Return the CharSet containing all characters corresponding to a code + point on the right-hand side of the WhiteSpace or LineTerminator + productions. + CharacterClassEscape :: S + 1. Let S be the CharSet returned by CharacterClassEscape :: s. + 2. Return CharacterComplement(rer, S). + CharacterClassEscape :: w + 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). + CharacterClassEscape :: W + 1. Let S be the CharSet returned by CharacterClassEscape :: w. + 2. Return CharacterComplement(rer, S). +features: [String.fromCodePoint] +includes: [regExpUtils.js] +flags: [generated] +---*/ + +const str = buildString( +{ + loneCodePoints: [ + 0x00005F + ], + ranges: [ + [0x000030, 0x000039], + [0x000041, 0x00005A], + [0x000061, 0x00007A] + ] +} +); + +const standard = /^\w+$/; +const unicode = /^\w+$/u; +const vflag = /^\w+$/v; +const regexes = [standard,unicode,vflag]; + +const errors = []; + +for (const regex of regexes) { + if (!regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (!regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } + } + } +} + +assert.sameValue( + errors.length, + 0, + 'Expected full match, but did not match: ' + errors.join(',') +); diff --git a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape.js b/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape.js deleted file mode 100644 index da4db67850b..00000000000 --- a/test/built-ins/RegExp/CharacterClassEscapes/character-class-word-class-escape.js +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2018 Leo Balter. All rights reserved. -// This code is governed by the BSD license found in the LICENSE file. - -/*--- -esid: prod-CharacterClassEscape -description: > - Compare range for word class escape \w with flags g -info: | - This is a generated test. Please check out - https://github.com/tc39/test262/tree/main/tools/regexp-generator/ - for any changes. - - CharacterClassEscape[UnicodeMode] :: - d - D - s - S - w - W - [+UnicodeMode] p{ UnicodePropertyValueExpression } - [+UnicodeMode] P{ UnicodePropertyValueExpression } - - 22.2.2.9 Runtime Semantics: CompileToCharSet - - CharacterClassEscape :: d - 1. Return the ten-element CharSet containing the characters 0, 1, 2, 3, 4, - 5, 6, 7, 8, and 9. - CharacterClassEscape :: D - 1. Let S be the CharSet returned by CharacterClassEscape :: d. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: s - 1. Return the CharSet containing all characters corresponding to a code - point on the right-hand side of the WhiteSpace or LineTerminator - productions. - CharacterClassEscape :: S - 1. Let S be the CharSet returned by CharacterClassEscape :: s. - 2. Return CharacterComplement(rer, S). - CharacterClassEscape :: w - 1. Return MaybeSimpleCaseFolding(rer, WordCharacters(rer)). - CharacterClassEscape :: W - 1. Let S be the CharSet returned by CharacterClassEscape :: w. - 2. Return CharacterComplement(rer, S). -features: [String.fromCodePoint] -includes: [regExpUtils.js] -flags: [generated] ----*/ - -const str = buildString({ - loneCodePoints: [0x00005F], - ranges: [ - [0x000030, 0x000039], - [0x000041, 0x00005A], - [0x000061, 0x00007A], - ], -}); - -const re = /\w/g; - -const errors = []; - -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); - } - } -} - -assert.sameValue( - errors.length, - 0, - 'Expected matching code points, but received: ' + errors.join(',') -); diff --git a/tools/regexp-generator/header.mjs b/tools/regexp-generator/header.mjs index 33f342492ed..5e407302450 100644 --- a/tools/regexp-generator/header.mjs +++ b/tools/regexp-generator/header.mjs @@ -1,5 +1,6 @@ export default description => { let header = `// Copyright (C) 2018 Leo Balter. All rights reserved. +// Copyright (C) 2024 Aurèle Barrière. All rights reserved. // This code is governed by the BSD license found in the LICENSE file. /*--- diff --git a/tools/regexp-generator/index.mjs b/tools/regexp-generator/index.mjs index 877c087b4df..8c7bfc241bc 100644 --- a/tools/regexp-generator/index.mjs +++ b/tools/regexp-generator/index.mjs @@ -1,99 +1,100 @@ import filenamify from 'filenamify'; import fs from 'node:fs'; -import regenerate from 'regenerate'; +import regenerate from './regenerate.mjs'; import rewritePattern from 'regexpu-core'; import ESCAPE_SETS from 'regexpu-core/data/character-class-escape-sets.js'; import slugify from 'slugify'; import header from './header.mjs'; +// The different character class escapes. const patterns = { - 'whitespace class escape': '\\s', - 'non-whitespace class escape': '\\S', - 'word class escape': '\\w', - 'non-word class escape': '\\W', - 'digit class escape': '\\d', - 'non-digit class escape': '\\D', + 's': 'whitespace class escape', + 'S': 'non-whitespace class escape', + 'w': 'word class escape', + 'W': 'non-word class escape', + 'd': 'digit class escape', + 'D': 'non-digit class escape', }; -// Pretty-printing code adapted from unicode-property-escapes-tests. -// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js - -function toHex(codePoint) { - return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6); -}; - -function toTestData(reg) { - const data = reg.data; - // Iterate over the data per `(start, end)` pair. - let index = 0; - const length = data.length; - const loneCodePoints = []; - const ranges = []; - while (index < length) { - let start = data[index]; - let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. - if (start == end) { - loneCodePoints.push(start); - } else { - ranges.push([start, end]); - } - index += 2; - } - return [ loneCodePoints, ranges ]; +const negation = { + 's': 'S', + 'S': 's', + 'w': 'W', + 'W': 'w', + 'd': 'D', + 'D': 'd', } -function prettyPrint([ loneCodePoints, ranges ]) { - const indent = ' '; - loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint)); - ranges = ranges.map( - (range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]` - ); - const loneCodePointsOutput = loneCodePoints.length ? - loneCodePoints.length === 1 ? `[${loneCodePoints[0]}]` : - `[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) },\n${indent}]` : - `[]`; - const rangesOutput = ranges.length ? - `[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) },\n${indent}]` : - `[]`; - return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput },\n}`; +// In each test file, test all these flag configurations. +const flags_configs = { + 'standard': '', + 'unicode': 'u', + 'vflag': 'v', } -const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF); +// For each character class escape, test positive and negative cases. +const test_cases = [ + { positivity: true, + suffix: '-positive-cases' }, + { positivity: false, + suffix: '-negative-cases' }, +] -function buildString(escapeChar, flags) { - const isUnicode = flags.includes('u'); - let escapeData = ESCAPE_SETS[isUnicode ? 'UNICODE' : 'REGULAR'].get(escapeChar); +function buildRegex(pattern, positivity) { + return positivity ? `^\\${pattern}+$` : `\\${pattern}`; +} - const lowSurrogates = escapeData.clone().intersection(LOW_SURROGATES); - if (lowSurrogates.data.length === 0) { - return prettyPrint(toTestData(escapeData)); +function buildRegexes(pattern, positivity) { + let regex = buildRegex(pattern, positivity); + let reg_str = ''; + for (const [regexname, flags] of Object.entries(flags_configs)) { +reg_str += `const ${regexname} = /${regex}/${flags};\n`; } - const rest = escapeData.clone().remove(LOW_SURROGATES); - const [ lowLoneCodePoints, lowRanges ] = toTestData(lowSurrogates); - const [ loneCodePoints, ranges ] = toTestData(rest); - loneCodePoints.unshift(...lowLoneCodePoints); - ranges.unshift(...lowRanges); - return prettyPrint([ loneCodePoints, ranges ]); + let all_regexes = Object.keys(flags_configs).toString(); + reg_str += `const regexes = [${all_regexes}];`; + return reg_str; } -function buildContent(desc, pattern, flags) { - let string = buildString(pattern[1], flags); +function buildString(pattern, positivity) { + let escape = positivity ? pattern : negation[pattern]; + let escape_data = ESCAPE_SETS.UNICODE.get(escape); + return escape_data.toTestCode(); +} - let content = header(`Compare range for ${desc} ${pattern} with flags ${flags}`); +function buildDescr(pattern, positivity) { + let name = patterns[pattern]; + let descr = positivity ? 'Check positive cases of' : 'Check negative cases of'; + return `${descr} ${name} \\${pattern}.`; +} + +function buildContent(pattern, positivity) { + + let regexes = buildRegexes(pattern, positivity); + let string = buildString(pattern, positivity); + let descr = buildDescr(pattern, positivity); + let test_negate = positivity ? '!' : ''; + let err_msg = positivity ? 'Expected full match, but did not match: ' : + 'Expected no match, but matched: '; + + let content = header(`${descr}`); content += ` -const str = buildString(${string}); +const str = buildString( +${string} +); -const re = /${pattern}/${flags}; +${regexes} const errors = []; -if (!re.test(str)) { - // Error, let's find out where - for (const char of str) { - if (!re.test(char)) { - errors.push('0x' + char.codePointAt(0).toString(16)); +for (const regex of regexes) { + if (${test_negate}regex.test(str)) { + // Error, let's find out where + for (const char of str) { + if (${test_negate}regex.test(char)) { + errors.push('0x' + char.codePointAt(0).toString(16)); + } } } } @@ -101,11 +102,10 @@ if (!re.test(str)) { assert.sameValue( errors.length, 0, - 'Expected matching code points, but received: ' + errors.join(',') + '${err_msg}' + errors.join(',') ); `; - - return content; + return content; } function writeFile(desc, content, suffix = '') { @@ -114,40 +114,9 @@ function writeFile(desc, content, suffix = '') { fs.writeFileSync(filename, content); } -// No additions -for (const [desc, escape] of Object.entries(patterns)) { - [ - { - quantifier: '', - flags: '', - }, - { - quantifier: '+', - flags: '', - suffix: '-plus-quantifier', - }, - { - quantifier: '', - flags: 'u', - suffix: '-flags-u', - }, - { - quantifier: '+', - flags: 'u', - suffix: '-plus-quantifier-flags-u', - }, - ].forEach(({quantifier, flags, suffix}) => { - flags += 'g'; - - const pattern = `${escape}${quantifier}`; - const range = rewritePattern(pattern, flags, { - unicodeFlag: flags.includes('u') ? 'transform' : false, - }); - - console.log(`${pattern} => ${range}, flags: ${flags}`); - - const content = buildContent(desc, pattern, flags); - +for (const [pattern, desc] of Object.entries(patterns)) { + test_cases.forEach(({positivity, suffix}) => { + const content = buildContent(pattern, positivity); writeFile(desc, content, suffix); }); } diff --git a/tools/regexp-generator/regenerate.mjs b/tools/regexp-generator/regenerate.mjs new file mode 100644 index 00000000000..212ab09ae0a --- /dev/null +++ b/tools/regexp-generator/regenerate.mjs @@ -0,0 +1,63 @@ +// From unicode-property-escapes-tests +// https://github.com/mathiasbynens/unicode-property-escapes-tests/blob/60f2dbec2b2a840ee67aa04dbd3449bb90fd2999/regenerate.js +import regenerate from 'regenerate'; + +const toHex = (codePoint) => { + return '0x' + ('00000' + codePoint.toString(16).toUpperCase()).slice(-6); +}; + +regenerate.prototype.isEmpty = function() { + return this.data.length === 0; +}; + +regenerate.prototype.toTestData = function() { + const data = this.data; + // Iterate over the data per `(start, end)` pair. + let index = 0; + const length = data.length; + const loneCodePoints = []; + const ranges = []; + while (index < length) { + let start = data[index]; + let end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive. + if (start == end) { + loneCodePoints.push(start); + } else { + ranges.push([start, end]); + } + index += 2; + } + return [ loneCodePoints, ranges ]; +}; + +const LOW_SURROGATES = regenerate().addRange(0xDC00, 0xDFFF); + +regenerate.prototype.toTestCode = function() { + const lowSurrogates = this.clone().intersection(LOW_SURROGATES); + if (lowSurrogates.isEmpty()) { + return prettyPrint([...this.toTestData()]); + } + const rest = this.clone().remove(LOW_SURROGATES); + const [ lowLoneCodePoints, lowRanges ] = lowSurrogates.toTestData(); + const [ loneCodePoints, ranges ] = rest.toTestData(); + loneCodePoints.unshift(...lowLoneCodePoints); + ranges.unshift(...lowRanges); + return prettyPrint([ loneCodePoints, ranges ]); +}; + +const prettyPrint = ([ loneCodePoints, ranges ]) => { + const indent = ' '; // Test 262 uses two-space indents. + loneCodePoints = loneCodePoints.map((codePoint) => toHex(codePoint)); + ranges = ranges.map( + (range) => `[${ toHex(range[0]) }, ${ toHex(range[1]) }]` + ); + const loneCodePointsOutput = loneCodePoints.length ? + `[\n${indent}${indent}${ loneCodePoints.join(`,\n${indent}${indent}`) }\n${indent}]` : + `[]`; + const rangesOutput = ranges.length ? + `[\n${indent}${indent}${ ranges.join(`,\n${indent}${indent}`) }\n${indent}]` : + `[]`; + return `{\n${indent}loneCodePoints: ${ loneCodePointsOutput },\n${indent}ranges: ${ rangesOutput }\n}`; +}; + +export default regenerate;