JDK-8202533: StringUTF16.compress() and StringLatin1.inflate().

This is essentially a re-implementation of the low-level intrinsic generation, aligned with the new AMD64 assembly support introduced.
oldcwj · Aug 27, 2018 · 0b82932 · 0b82932
1 parent 0741215
commit 0b82932
Show file tree

Hide file tree

Showing 17 changed files with 1,729 additions and 104 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+*~
 *.aux
 *.bak
 *.bgv

diff --git a/compiler/mx.compiler/suite.py b/compiler/mx.compiler/suite.py
@@ -899,6 +899,7 @@
       "dependencies" : [
         "org.graalvm.compiler.core.test",
         "org.graalvm.compiler.replacements",
+        "org.graalvm.compiler.replacements.amd64",
       ],
       "annotationProcessors" : [
         "GRAAL_NODEINFO_PROCESSOR",

diff --git a/...src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java b/...src/org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64Assembler.java
diff --git a/...org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64BaseAssembler.java b/...org.graalvm.compiler.asm.amd64/src/org/graalvm/compiler/asm/amd64/AMD64BaseAssembler.java
@@ -50,6 +50,7 @@
 import static org.graalvm.compiler.asm.amd64.AMD64BaseAssembler.VEXPrefixConfig.WIG;
 import static org.graalvm.compiler.core.common.NumUtil.isByte;
 
+import jdk.vm.ci.code.Register.RegisterCategory;
 import org.graalvm.compiler.asm.Assembler;
 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
 import org.graalvm.compiler.asm.amd64.AVXKind.AVXSize;
@@ -269,8 +270,12 @@ public final boolean supports(CPUFeature feature) {
         return ((AMD64) target.arch).getFeatures().contains(feature);
     }
 
+    protected static boolean inRC(RegisterCategory rc, Register r) {
+        return r.getRegisterCategory().equals(rc);
+    }
+
     protected static int encode(Register r) {
-        assert r.encoding >= 0 && (r.getRegisterCategory().equals(XMM) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
+        assert r.encoding >= 0 && (inRC(XMM, r) ? r.encoding < 32 : r.encoding < 16) : "encoding out of range: " + r.encoding;
         return r.encoding & 0x7;
     }
 
@@ -296,6 +301,10 @@ private static class Prefix {
         private static final int REXWRB = 0x4D;
         private static final int REXWRX = 0x4E;
         private static final int REXWRXB = 0x4F;
+
+        private static final int VEX2 = 0xC5;
+        private static final int VEX3 = 0xC4;
+        private static final int EVEX = 0x62;
     }
 
     protected final void rexw() {
@@ -797,11 +806,16 @@ private int opcodeEscapePrefixToMMMMM(int opcodeEscapePrefix) {
 
         @Override
         public void simdPrefix(Register reg, Register nds, AMD64Address rm, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
+            assert reg.encoding < 16 : "encoding out of range: " + reg.encoding;
+            // XXX nds.encoding < 16 is checked by emitEVEX
             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(reg, rm), nds.isValid() ? nds.encoding : 0);
         }
 
         @Override
         public void simdPrefix(Register dst, Register nds, Register src, int sizePrefix, int opcodeEscapePrefix, boolean isRexW) {
+            assert dst.encoding < 16 : "encoding out of range: " + dst.encoding;
+            assert src.encoding < 16 : "encoding out of range: " + src.encoding;
+            // XXX nds.encoding < 16 is checked by emitEVEX
             emitVEX(L128, sizePrefixToPP(sizePrefix), opcodeEscapePrefixToMMMMM(opcodeEscapePrefix), isRexW ? W1 : W0, getRXB(dst, src), nds.isValid() ? nds.encoding : 0);
         }
     }
@@ -822,6 +836,46 @@ protected final void simdPrefix(Register dst, Register nds, Register src, Operan
         simdEncoder.simdPrefix(dst, nds, src, size.sizePrefix, opcodeEscapePrefix, isRexW);
     }
 
+ // @formatter:off
+ //
+ // Instruction Format and VEX illustrated below (optional []):
+ //
+ // #of bytes:    2,3      1       1       1       1,2,4       1
+ // [Prefixes]    VEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
+ //                                             [Disp16,32]
+ //
+ // VEX: 0xC4 | P1 | P2
+ //
+ //     7   6   5   4   3   2   1   0
+ // P1  R   X   B   m   m   m   m   m      P[ 7:0]
+ // P2  W   v   v   v   v   L   p   p      P[15:8]
+ //
+ // VEX: 0xC5 | B1
+ //
+ //     7   6   5   4   3   2   1   0
+ // P1  R   v   v   v   v   L   p   p      P[7:0]
+ //
+ // Figure. Bit Field Layout of the VEX Prefix
+ //
+ // Table. VEX Prefix Bit Field Functional Grouping
+ //
+ // Notation        Bit field Group        Position        Comment
+ // ----------  -------------------------  --------  -------------------
+ // VEX.RXB     Next-8 register specifier  P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
+ // VEX.R       REX.R inverse              P[7]      Combine with EVEX.R and ModR/M.reg.
+ // VEX.X       REX.X inverse              P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
+ // VEX.B       REX.B inverse              P[5]
+ // VEX.mmmmmm  0F, 0F_38, 0F_3A encoding  P[4:0]    b01/0x0F, b10/0F_38, b11/0F_3A (all other reserved)
+ //
+ // VEX.W       Opcode specific            P[15]
+ // VEX.vvvv    A register specifier       P[14:11]  In inverse form, b1111 if not used.
+ //                                        P[6:3]
+ // VEX.L       Vector length/RC           P[10]     b0/scalar or 128b vec, b1/256b vec.
+ //                                        P[2]
+ // VEX.pp      Compressed legacy prefix   P[9:8]    b00/None, b01/0x66, b10/0xF3, b11/0xF2
+ //                                        P[1:0]
+ // @formatter:on
+
     /**
      * Low-level function to encode and emit the VEX prefix.
      * <p>
@@ -867,7 +921,7 @@ protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv)
             byte2 |= l << 2;
             byte2 |= pp;
 
-            emitByte(0xC5);
+            emitByte(Prefix.VEX2);
             emitByte(byte2);
         } else {
             // 3 byte encoding
@@ -881,7 +935,7 @@ protected final void emitVEX(int l, int pp, int mmmmm, int w, int rxb, int vvvv)
             byte3 |= l << 2;
             byte3 |= pp;
 
-            emitByte(0xC4);
+            emitByte(Prefix.VEX3);
             emitByte(byte2);
             emitByte(byte3);
         }
@@ -985,6 +1039,51 @@ public int getDisp8ScalingFactor(AVXSize size) {
         }
     }
 
+ // @formatter:off
+ //
+ // Instruction Format and EVEX illustrated below (optional []):
+ //
+ // #of bytes:      4       1       1       1       1,2,4       1
+ // [Prefixes]    EVEX   OpCode   ModR/M  [SIB]   [Disp8*N] [Immediate]
+ //                                              [Disp16,32]
+ //
+ // The EVEX prefix is a 4-byte prefix, with the first two bytes derived from unused encoding
+ // form of the 32-bit-mode-only BOUND instruction. The layout of the EVEX prefix is shown in
+ // the figure below. The first byte must be 0x62, followed by three pay-load bytes, denoted
+ // as P1, P2, and P3 individually or collectively as P[23:0] (see below).
+ //
+ // EVEX: 0x62 | P1 | P2 | P3
+ //
+ //     7   6   5   4   3   2   1   0
+ // P1  R   X   B   R'  0   0   m   m      P[ 7: 0]
+ // P2  W   v   v   v   v   1   p   p      P[15: 8]
+ // P3  z   L'  L   b   V'  a   a   a      P[23:16]
+ //
+ // Figure. Bit Field Layout of the EVEX Prefix
+ //
+ // Table. EVEX Prefix Bit Field Functional Grouping
+ //
+ // Notation        Bit field Group        Position        Comment
+ // ---------  --------------------------  --------  -----------------------
+ // EVEX.RXB   Next-8 register specifier   P[7:5]    Combine with ModR/M.reg, ModR/M.rm (base, index/vidx).
+ // EVEX.X     High-16 register specifier  P[6]      Combine with EVEX.B and ModR/M.rm, when SIB/VSIB absent.
+ // EVEX.R'    High-16 register specifier  P[4]      Combine with EVEX.R and ModR/M.reg.
+ // --         Reserved                    P[3:2]    Must be 0.
+ // EVEX.mm    Compressed legacy escape    P[1:0]    Identical to low two bits of VEX.mmmmm.
+ //
+ // EVEX.W     Osize promotion/Opcode ext  P[15]
+ // EVEX.vvvv  NDS register specifier      P[14:11]  Same as VEX.vvvv.
+ // --         Fixed Value                 P[10]     Must be 1.
+ // EVEX.pp    Compressed legacy prefix    P[9:8]    Identical to VEX.pp.
+ //
+ // EVEX.z     Zeroing/Merging             P[23]
+ // EVEX.L'L   Vector length/RC            P[22:21]
+ // EVEX.b     Broadcast/RC/SAE Context    P[20]
+ // EVEX.V'    High-16 NDS/VIDX register   P[19]     Combine with EVEX.vvvv or VSIB when present.
+ // EVEX.aaa   Embedded opmask register    P[18:16]
+ //
+ // @formatter:on
+
     /**
      * Low-level function to encode and emit the EVEX prefix.
      * <p>
@@ -1020,13 +1119,13 @@ private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv,
 
         assert (rxb & 0x07) == rxb : "invalid value for EVEX.RXB";
         assert (reg & 0x1F) == reg : "invalid value for EVEX.R'";
-        assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.vvvvv";
+        assert (vvvvv & 0x1F) == vvvvv : "invalid value for EVEX.V'vvvv";
 
         assert z == Z0 || z == Z1 : "invalid value for EVEX.z";
         assert b == B0 || b == B1 : "invalid value for EVEX.b";
         assert (aaa & 0x07) == aaa : "invalid value for EVEX.aaa";
 
-        emitByte(0x62);
+        emitByte(Prefix.EVEX);
         int p1 = 0;
         p1 |= ((rxb ^ 0x07) & 0x07) << 5;
         p1 |= reg < 16 ? 0x10 : 0;
@@ -1036,7 +1135,7 @@ private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv,
         int p2 = 0;
         p2 |= w << 7;
         p2 |= ((vvvvv ^ 0x0F) & 0x0F) << 3;
-        p2 |= 0x4;
+        p2 |= 0x04;
         p2 |= pp;
         emitByte(p2);
 
@@ -1049,6 +1148,11 @@ private void emitEVEX(int l, int pp, int mm, int w, int rxb, int reg, int vvvvv,
         emitByte(p3);
     }
 
+    /**
+     * Get RXB bits for register-register instructions in EVEX encoding, Where ModRM.rm contains
+     * a register index. The R bit extends the ModRM.reg field and the X and B bits extends the
+     * ModRM.rm field.
+     */
     private static int getRXBForEVEX(Register reg, Register rm) {
         int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
         rxb |= (rm == null ? 0 : rm.encoding & 0x018) >> 3;
@@ -1059,7 +1163,7 @@ private static int getRXBForEVEX(Register reg, Register rm) {
      * Helper method for emitting EVEX prefix in the form of RRRR.
      */
     protected final void evexPrefix(Register dst, Register mask, Register nds, Register src, AVXSize size, int pp, int mm, int w, int z, int b) {
-        assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
+        assert !mask.isValid() || inRC(MASK, mask);
         emitEVEX(getLFlag(size), pp, mm, w, getRXBForEVEX(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
     }
 
@@ -1070,7 +1174,7 @@ protected final void evexPrefix(Register dst, Register mask, Register nds, Regis
      * {@link #emitEVEXOperandHelper(Register, AMD64Address, int, int)}.
      */
     protected final void evexPrefix(Register dst, Register mask, Register nds, AMD64Address src, AVXSize size, int pp, int mm, int w, int z, int b) {
-        assert !mask.isValid() || mask.getRegisterCategory().equals(MASK);
+        assert !mask.isValid() || inRC(MASK, mask);
         emitEVEX(getLFlag(size), pp, mm, w, getRXB(dst, src), dst.encoding, nds.isValid() ? nds.encoding() : 0, z, b, mask.isValid() ? mask.encoding : 0);
     }
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    *~
     *.aux
     *.bak
     *.bgv
@@ Expand Down @@