Skip to content

Commit

Permalink
TRegex: add support for byte array string constants in LATIN-1/BYTES …
Browse files Browse the repository at this point in the history
…mode
  • Loading branch information
djoooooe committed Aug 20, 2020
1 parent 1bf6a60 commit 3f4af08
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -465,13 +465,13 @@ public boolean isFixedCodePointWidth(CodePointSet set) {
}

@Override
public StringBufferUTF16 createStringBuffer(int capacity) {
return new StringBufferUTF16(capacity);
public StringBufferLATIN1 createStringBuffer(int capacity) {
return new StringBufferLATIN1(capacity);
}

@Override
public LoopOptimizationNode extractLoopOptNode(CodePointSet cps) {
return new LoopOptIndexOfAnyCharNode(cps.inverseToCharArray(this));
return new LoopOptIndexOfAnyByteNode(cps.inverseToByteArray(this));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) 2020, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.regex.tregex.string;

import com.oracle.truffle.regex.tregex.buffer.ByteArrayBuffer;
import com.oracle.truffle.regex.tregex.string.Encodings.Encoding;

public final class StringBufferLATIN1 extends ByteArrayBuffer implements AbstractStringBuffer {

public StringBufferLATIN1() {
this(16);
}

public StringBufferLATIN1(int capacity) {
super(capacity);
}

@Override
public Encoding getEncoding() {
return Encodings.LATIN_1;
}

@Override
public void append(int codepoint) {
assert codepoint <= Encodings.LATIN_1.getMaxValue();
add((byte) codepoint);
}

@Override
public void appendOR(int c1, int c2) {
assert c1 <= Encodings.LATIN_1.getMaxValue();
assert c2 <= Encodings.LATIN_1.getMaxValue();
add((byte) (c1 | c2));
}

@Override
public void appendXOR(int c1, int c2) {
assert c1 <= Encodings.LATIN_1.getMaxValue();
assert c2 <= Encodings.LATIN_1.getMaxValue();
add((byte) (c1 ^ c2));
}

@Override
public StringLATIN1 materialize() {
return new StringLATIN1(toArray());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright (c) 2020, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.regex.tregex.string;

import java.util.Arrays;

import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;

public final class StringLATIN1 implements AbstractString {

@CompilationFinal(dimensions = 1) private final byte[] str;

public StringLATIN1(byte[] str) {
this.str = str;
}

@Override
public int encodedLength() {
return str.length;
}

@Override
public Object content() {
return str;
}

@Override
public String toString() {
return defaultToString();
}

@Override
public StringLATIN1 substring(int start, int end) {
return new StringLATIN1(Arrays.copyOfRange(str, start, end));
}

@Override
public boolean regionMatches(int offset, AbstractString other, int ooffset, int encodedLength) {
byte[] o = ((StringLATIN1) other).str;
if (offset + encodedLength > str.length || ooffset + encodedLength > o.length) {
return false;
}
for (int i = 0; i < encodedLength; i++) {
if (str[offset + i] != o[ooffset + i]) {
return false;
}
}
return true;
}

@Override
public AbstractStringIterator iterator() {
return new StringLATIN1Iterator(str);
}

private static final class StringLATIN1Iterator extends AbstractStringIterator {

private final byte[] str;

private StringLATIN1Iterator(byte[] str) {
this.str = str;
}

@Override
public boolean hasNext() {
return i < str.length;
}

@Override
public int nextInt() {
return Byte.toUnsignedInt(str[i++]);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@

import java.util.Arrays;

import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;

public final class StringUTF32 implements AbstractString {

private final int[] str;
@CompilationFinal(dimensions = 1) private final int[] str;

public StringUTF32(int[] str) {
this.str = str;
Expand Down

0 comments on commit 3f4af08

Please sign in to comment.