Skip to content

Commit

Permalink
[GR-17176] TruffleStrings: bugfixes in indexof operations.
Browse files Browse the repository at this point in the history
PullRequest: graal/11120
  • Loading branch information
djoooooe committed Feb 25, 2022
2 parents 94f5156 + a096ebb commit 6e6fe68
Show file tree
Hide file tree
Showing 16 changed files with 231 additions and 136 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@

import java.nio.ByteOrder;

import com.oracle.truffle.api.strings.InternalByteArray;
import org.graalvm.shadowed.org.jcodings.Encoding;
import org.junit.Assert;
import org.junit.Test;

import com.oracle.truffle.api.strings.AbstractTruffleString;
import com.oracle.truffle.api.strings.InternalByteArray;
import com.oracle.truffle.api.strings.MutableTruffleString;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.api.strings.TruffleStringBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@

import static com.oracle.truffle.api.strings.test.TStringTestUtil.byteArray;

import com.oracle.truffle.api.strings.MutableTruffleString;
import org.junit.Assert;
import org.junit.Test;

import com.oracle.truffle.api.strings.MutableTruffleString;
import com.oracle.truffle.api.strings.TruffleString;

public class TStringCornerCaseTests extends TStringTestBase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,17 @@ public interface TestStrings {
}

public interface TestIndexOfString {
void run(AbstractTruffleString b, int expectedIndex);

int run(AbstractTruffleString b, int fromIndex, int toIndex);

default void run(AbstractTruffleString b, int fromIndex, int toIndex, int expectedResult) {
if (expectedResult < 0) {
int result = run(b, fromIndex, toIndex);
Assert.assertTrue("expected: negative value, actual: " + result, result < 0);
} else {
Assert.assertEquals(expectedResult, run(b, fromIndex, toIndex));
}
}
}

public interface TestS {
Expand Down Expand Up @@ -460,30 +470,61 @@ protected static void testIndexOfString(AbstractTruffleString a, byte[] array, b
// ignore broken strings
return;
}
int lastCodepoint = codepoints[codepoints.length - 1];
TruffleString first = TruffleString.fromCodePointUncached(codepoints[0], encoding);
int lastCPI = codepoints.length - 1;
int firstCodepoint = codepoints[0];
int lastCodepoint = codepoints[lastCPI];
TruffleString first = TruffleString.fromCodePointUncached(firstCodepoint, encoding);
TruffleString firstSubstring = a.substringByteIndexUncached(0, codepoints.length == 1 ? array.length : byteIndices[1], encoding, true);
TruffleString last = TruffleString.fromCodePointUncached(lastCodepoint, encoding);
TruffleString lastSubstring = a.substringByteIndexUncached(byteIndices[codepoints.length - 1], array.length - byteIndices[codepoints.length - 1], encoding, true);
int expectedFirst = lastIndex ? lastIndexOfCodePoint(codepoints, byteIndices, byteIndex, codepoints[0]) : 0;
int expectedLast = lastIndex ? byteIndex ? byteIndices[codepoints.length - 1] : codepoints.length - 1 : indexOfCodePoint(codepoints, byteIndices, byteIndex, lastCodepoint);
test.run(first, expectedFirst);
test.run(firstSubstring, expectedFirst);
test.run(last, expectedLast);
test.run(lastSubstring, expectedLast);
TruffleString lastSubstring = a.substringByteIndexUncached(byteIndices[lastCPI], array.length - byteIndices[lastCPI], encoding, true);
int expectedFirst = lastIndex ? lastIndexOfCodePoint(codepoints, byteIndices, byteIndex, codepoints.length, 0, firstCodepoint) : 0;
int expectedLast = lastIndex ? byteIndex ? byteIndices[lastCPI] : lastCPI : indexOfCodePoint(codepoints, byteIndices, byteIndex, 0, codepoints.length, lastCodepoint);
int fromIndex;
int toIndex;
if (lastIndex) {
fromIndex = byteIndex ? array.length : codepoints.length;
toIndex = 0;
} else {
fromIndex = 0;
toIndex = byteIndex ? array.length : codepoints.length;
}
test.run(first, fromIndex, toIndex, expectedFirst);
test.run(firstSubstring, fromIndex, toIndex, expectedFirst);
test.run(last, fromIndex, toIndex, expectedLast);
test.run(lastSubstring, fromIndex, toIndex, expectedLast);
test.run(first, 0, 0, -1);

int i1 = byteIndex ? byteIndices[1] : 1;
int iLast1 = byteIndex ? byteIndices[codepoints.length - 1] : codepoints.length - 1;

if (lastIndex) {
expectedFirst = lastIndexOfCodePoint(codepoints, byteIndices, byteIndex, codepoints.length, 1, firstCodepoint);
expectedLast = lastIndexOfCodePoint(codepoints, byteIndices, byteIndex, codepoints.length - 1, 0, lastCodepoint);
test.run(first, fromIndex, i1, expectedFirst);
test.run(firstSubstring, fromIndex, i1, expectedFirst);
test.run(last, iLast1, toIndex, expectedLast);
test.run(lastSubstring, iLast1, toIndex, expectedLast);
} else {
expectedFirst = indexOfCodePoint(codepoints, byteIndices, byteIndex, 1, codepoints.length, firstCodepoint);
expectedLast = indexOfCodePoint(codepoints, byteIndices, byteIndex, 0, codepoints.length - 1, lastCodepoint);
test.run(first, i1, toIndex, expectedFirst);
test.run(firstSubstring, i1, toIndex, expectedFirst);
test.run(last, fromIndex, iLast1, expectedLast);
test.run(lastSubstring, fromIndex, iLast1, expectedLast);
}
}

private static int indexOfCodePoint(int[] codepoints, int[] byteIndices, boolean byteIndex, int cp) {
for (int i = 0; i < codepoints.length; i++) {
private static int indexOfCodePoint(int[] codepoints, int[] byteIndices, boolean byteIndex, int fromIndex, int toIndex, int cp) {
for (int i = fromIndex; i < toIndex; i++) {
if (codepoints[i] == cp) {
return byteIndex ? byteIndices[i] : i;
}
}
return -1;
}

private static int lastIndexOfCodePoint(int[] codepoints, int[] byteIndices, boolean byteIndex, int cp) {
for (int i = codepoints.length - 1; i >= 0; i--) {
private static int lastIndexOfCodePoint(int[] codepoints, int[] byteIndices, boolean byteIndex, int fromIndex, int toIndex, int cp) {
for (int i = fromIndex - 1; i >= toIndex; i--) {
if (codepoints[i] == cp) {
return byteIndex ? byteIndices[i] : i;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@

package com.oracle.truffle.api.strings.test;

import static com.oracle.truffle.api.strings.TruffleString.Encoding.UTF_8;

import java.nio.charset.StandardCharsets;

import org.junit.Assert;
Expand Down Expand Up @@ -95,29 +97,75 @@ private static byte[] utf8Encode(int codepoint) {
@Test
public void testValid() {
for (byte[] arr : VALID) {
Assert.assertTrue(TStringTestUtil.hex(arr), TruffleString.fromByteArrayUncached(arr, 0, arr.length, TruffleString.Encoding.UTF_8, false).isValidUncached(TruffleString.Encoding.UTF_8));
Assert.assertTrue(TStringTestUtil.hex(arr), TruffleString.fromByteArrayUncached(arr, 0, arr.length, UTF_8, false).isValidUncached(UTF_8));
}
}

@Test
public void testInvalid() {
for (byte[] arr : INVALID) {
Assert.assertFalse(TStringTestUtil.hex(arr), TruffleString.fromByteArrayUncached(arr, 0, arr.length, TruffleString.Encoding.UTF_8, false).isValidUncached(TruffleString.Encoding.UTF_8));
Assert.assertFalse(TStringTestUtil.hex(arr), TruffleString.fromByteArrayUncached(arr, 0, arr.length, UTF_8, false).isValidUncached(UTF_8));
}
}

@Test
public void testCodePointLength1() {
byte[] arr = TStringTestUtil.byteArray(0xf4, 0x90, 0x80, 0x80, 0x7f, 0x7f);
TruffleString a = TruffleString.fromByteArrayUncached(arr, 0, arr.length, TruffleString.Encoding.UTF_8, false);
TruffleString a = TruffleString.fromByteArrayUncached(arr, 0, arr.length, UTF_8, false);
a.toString();
Assert.assertEquals(6, a.codePointLengthUncached(TruffleString.Encoding.UTF_8));
Assert.assertEquals(6, a.codePointLengthUncached(UTF_8));
}

@Test
public void testCodePointLength2() {
byte[] arr = TStringTestUtil.byteArray(0, 0, 0xc0, 0xbf);
TruffleString a = TruffleString.fromByteArrayUncached(arr, 0, arr.length, TruffleString.Encoding.UTF_8, false);
Assert.assertEquals(4, a.codePointLengthUncached(TruffleString.Encoding.UTF_8));
TruffleString a = TruffleString.fromByteArrayUncached(arr, 0, arr.length, UTF_8, false);
Assert.assertEquals(4, a.codePointLengthUncached(UTF_8));
}

@Test
public void testIndexOf() {
TruffleString s1 = TruffleString.fromJavaStringUncached("aaa", UTF_8);
TruffleString s2 = TruffleString.fromJavaStringUncached("a", UTF_8);
Assert.assertEquals(-1, s1.byteIndexOfStringUncached(s2, 1, 1, UTF_8));
}

@Test
public void testIndexOf2() {
TruffleString a = TruffleString.fromCodePointUncached(0x102, UTF_8);
TruffleString b = TruffleString.fromCodePointUncached(0x10_0304, UTF_8);
TruffleString s1 = a.repeatUncached(10, UTF_8);
TruffleString s2 = a.concatUncached(b, UTF_8, false);
Assert.assertEquals(-1, s1.byteIndexOfStringUncached(s2, 0, s1.byteLength(UTF_8), UTF_8));
Assert.assertEquals(-1, s1.indexOfStringUncached(s2, 0, s1.codePointLengthUncached(UTF_8), UTF_8));
}

@Test
public void testIndexOf3() {
TruffleString a = TruffleString.fromJavaStringUncached("aaa", UTF_8);
TruffleString b = TruffleString.fromJavaStringUncached("baa", UTF_8);
Assert.assertEquals(-1, a.lastIndexOfStringUncached(b, 3, 0, UTF_8));
}

@Test
public void testIndexOf4() {
TruffleString a = TruffleString.fromJavaStringUncached("defghiabc", UTF_8);
TruffleString b = TruffleString.fromJavaStringUncached("def", UTF_8);
Assert.assertEquals(-1, a.lastIndexOfStringUncached(b, 9, 1, UTF_8));
}

@Test
public void testIndexOf5() {
TruffleString ts1 = TruffleString.fromJavaStringUncached("a\u00A3b\u00A3", UTF_8);
TruffleString ts2 = TruffleString.fromJavaStringUncached("a\u00A3", UTF_8);
Assert.assertEquals(-1, ts1.lastIndexOfStringUncached(ts2, 4, 1, UTF_8));
Assert.assertEquals(-1, ts1.lastByteIndexOfStringUncached(ts2, 6, 1, UTF_8));
}

@Test
public void testIndexOf6() {
TruffleString ts1 = TruffleString.fromJavaStringUncached("<......\u043c...", UTF_8);
TruffleString ts2 = TruffleString.fromJavaStringUncached("<", UTF_8);
Assert.assertEquals(0, ts1.lastIndexOfStringUncached(ts2, ts1.codePointLengthUncached(UTF_8), 0, UTF_8));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ public static Iterable<Object[]> data() {
@Test
public void testAll() throws Exception {
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, true, false, (b, expectedIndex) -> {
Assert.assertEquals(expectedIndex, node.execute(a, b, 0, array.length, encoding));
});
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, true, false, (b, fromIndex, toIndex) -> node.execute(a, b, fromIndex, toIndex, encoding));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@

import java.util.Arrays;

import com.oracle.truffle.api.strings.InternalByteArray;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import com.oracle.truffle.api.strings.InternalByteArray;
import com.oracle.truffle.api.strings.TruffleString;
import com.oracle.truffle.api.strings.test.TStringTestBase;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@

import java.util.Arrays;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
Expand All @@ -67,9 +66,7 @@ public static Iterable<TruffleString.IndexOfStringNode> data() {
@Test
public void testAll() throws Exception {
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, false, false, (b, expectedIndex) -> {
Assert.assertEquals(expectedIndex, node.execute(a, b, 0, codepoints.length, encoding));
});
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, false, false, (b, fromIndex, toIndex) -> node.execute(a, b, fromIndex, toIndex, encoding));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ public static Iterable<TruffleString.LastByteIndexOfStringNode> data() {
@Test
public void testAll() throws Exception {
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, true, true, (b, expectedIndex) -> {
Assert.assertEquals(expectedIndex, node.execute(a, b, array.length, 0, encoding));
});
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, true, true, (b, fromIndex, toIndex) -> node.execute(a, b, fromIndex, toIndex, encoding));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@

import java.util.Arrays;

import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
Expand All @@ -67,9 +66,7 @@ public static Iterable<TruffleString.LastIndexOfStringNode> data() {
@Test
public void testAll() throws Exception {
forAllStrings(true, (a, array, codeRange, isValid, encoding, codepoints, byteIndices) -> {
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, false, true, (b, expectedIndex) -> {
Assert.assertEquals(expectedIndex, node.execute(a, b, codepoints.length, 0, encoding));
});
testIndexOfString(a, array, isValid, encoding, codepoints, byteIndices, false, true, (b, fromIndex, toIndex) -> node.execute(a, b, fromIndex, toIndex, encoding));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ static byte[] utf8Encode(int codepoint) {
return ret;
}

static byte[] utf8EncodeNonAscii(int codepoint, int encodedSize) {
assert encodedSize == utf8EncodedSize(codepoint);
assert encodedSize > 1;
byte[] ret = new byte[encodedSize];
utf8Encode(codepoint, encodedSize, ret, 0);
return ret;
}

static void utf8Encode(int codepoint, byte[] buffer, int index, int length) {
assert length == utf8EncodedSize(codepoint);
if (length == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,16 @@ static boolean isFixedWidth(int codeRangeA, int codeRangeB) {
return isFixedWidth(codeRangeA) && isFixedWidth(codeRangeB);
}

static boolean indexOfCannotMatch(AbstractTruffleString a, int codeRangeA, AbstractTruffleString b, int codeRangeB, byte[] mask) {
return a.length() < b.length() || mask == null &&
static boolean indexOfCannotMatch(int codeRangeA, AbstractTruffleString b, int codeRangeB, int regionLength, TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNodeB) {
return regionLength < getCodePointLengthNodeB.execute(b) || codeRangesCannotMatch(codeRangeA, codeRangeB, null);
}

static boolean indexOfCannotMatch(int codeRangeA, AbstractTruffleString b, int codeRangeB, byte[] mask, int regionLength) {
return regionLength < b.length() || codeRangesCannotMatch(codeRangeA, codeRangeB, mask);
}

private static boolean codeRangesCannotMatch(int codeRangeA, int codeRangeB, byte[] mask) {
return mask == null &&
!TSCodeRange.isBrokenMultiByteOrUnknown(codeRangeA) &&
!TSCodeRange.isBrokenMultiByteOrUnknown(codeRangeB) &&
TSCodeRange.isMoreRestrictiveThan(codeRangeA, codeRangeB);
Expand Down
Loading

0 comments on commit 6e6fe68

Please sign in to comment.