Skip to content

Commit b9bd724

Browse files
committed
TRegex: multiple fixes for look-behind merging across main expression
boundary.
1 parent d5e3b52 commit b9bd724

File tree

12 files changed

+248
-115
lines changed

12 files changed

+248
-115
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/InputStringGeneratorTests.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -42,7 +42,6 @@
4242

4343
import java.util.Random;
4444

45-
import com.oracle.truffle.api.strings.TruffleString;
4645
import org.graalvm.polyglot.Value;
4746
import org.junit.Assert;
4847
import org.junit.Test;
@@ -86,7 +85,7 @@ public void testBenchmarkRegexes() {
8685
testInputStringGenerator("(?<=(a))\\1");
8786
}
8887

89-
private TruffleString generateInputString(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed) {
88+
private InputStringGenerator.InputString generateInputString(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed) {
9089
String sourceString = createSourceString(pattern, flags, options, encoding);
9190
Source source = Source.newBuilder("regex", sourceString, "regexSource").build();
9291
RegexSource regexSource = RegexLanguage.createRegexSource(source);
@@ -105,10 +104,12 @@ void testInputStringGenerator(String pattern, String flags, String options, Enco
105104

106105
private void testInputStringGenerator(String pattern, String flags, String options, Encodings.Encoding encoding, long rngSeed, Value compiledRegex) {
107106
for (int i = 0; i < 20; i++) {
108-
TruffleString input = generateInputString(pattern, flags, options, encoding, rngSeed);
107+
InputStringGenerator.InputString input = generateInputString(pattern, flags, options, encoding, rngSeed + i);
109108
Assert.assertNotNull(input);
110-
Value result = execRegex(compiledRegex, encoding, input, 0);
111-
Assert.assertTrue(result.getMember("isMatch").asBoolean());
109+
Value result = execRegex(compiledRegex, encoding, input.input(), input.fromIndex());
110+
if (!result.getMember("isMatch").asBoolean()) {
111+
Assert.assertTrue(execRegex(compiledRegex, encoding, input.input(), input.matchStart()).getMember("isMatch").asBoolean());
112+
}
112113
}
113114
}
114115
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -319,6 +319,12 @@ public void emptyTransitionMergedWithLookAhead() {
319319
test("a?(?=b(?<=ab)()|)", "", "a", 0, true, 0, 1, -1, -1);
320320
}
321321

322+
@Test
323+
public void gr60222() {
324+
test("(?<=a)b|", "m", "aaabaaa", 3, true, 3, 4);
325+
test("(?=^(?:[^])+){3}|(?:(^)+(?!\\b(([^]))+))*", "m", "\u00ea\u9bbb\n\n\n\u00ea\u9bbb\n\n\n\u00ea\u9bbb\n\n\n\u00ea\u9bbb\n\n\n", 10, true, 10, 10, -1, -1, -1, -1, -1, -1);
326+
}
327+
322328
@Test
323329
public void generatedTests() {
324330
/* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
@@ -544,6 +550,13 @@ public void generatedTests() {
544550
test("a(?:|[0-9]+?a|[0-9a]){11,13}?[ab]", "", "a372a466a109585878b", 0, true, 0, 5);
545551
test("(?<=ab(?:c|$){8,8})", "", "abccccc", 0, true, 7, 7);
546552
test("(?:^a|$){1,72}a", "", "aaaaaaaa", 0, true, 0, 2);
553+
test("(?<=a)b|", "", "aaabaaa", 3, true, 3, 4);
554+
test("^a|(?:^)*", "m", "aa\n\n\naa\n\n\naa\n\n\naa\n\n\n", 10, true, 10, 11);
555+
test("(?<=[ab][a])", "", "ababab", 2, true, 3, 3);
556+
test("[ab]*(?<=a)$", "", "bbabaa", 1, true, 1, 6);
557+
test("[\u7514-\ua3e3\ub107]*(?<=\\S)$", "", "\u76a3\u782b\u782b\ub107\u782b\u9950\u76a3\ub107\u9950\u76a3\u9a36", 3, true, 3, 11);
558+
test("$(?<=a)", "y", "aaaaa", 5, true, 5, 5);
559+
test("^abc[^]", "m", "abcdabc", 1, false);
547560

548561
/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */
549562
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -1620,6 +1620,7 @@ public void generatedTests() {
16201620
test("(?:a()|b??){22,26}c", "", "aabbbaabaaaaaabaaaac", 0, true, 19, 20, 19, 19, -1, -1);
16211621
test("b()(a\\1|){4,4}\\2c", "", "baaaac", 0, false);
16221622
test("a((?=b()|)[a-d])+", "", "abbbcbd", 0, true, 0, 7, 6, 7, 6, 6, -1, -1);
1623+
test("[[.\ue09f.][=\ud800\udc00=]]", "", "\ud800\udc00", 0, true, 0, 4);
16231624
test("(a{1100,1100})\\1", "i", "a".repeat(2400), 0, true, 0, 2200, 0, 1100);
16241625

16251626
/* GENERATED CODE END - KEEP THIS MARKER FOR AUTOMATIC UPDATES */

0 commit comments

Comments
 (0)