Merge pull request apache#535 from afs/tdb2-xsd_float

afs · web-flow · commit d5545fea5819 · 2019-02-22T08:50:27.000Z
JENA-1674: Don't sign extend Float.floatToIntBits.
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DoubleNode62.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DoubleNode62.java
@@ -26,37 +26,49 @@
  * Uses java's 64 bit long format (which is IEEE754 binary64) except that 2 bits are taken
  * from the exponent. This keeps the precision but reduces the range.
  * <p>
- * <b>Java
- * (<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">IEEE
- * 754 binary64</a>)</b>
+ * <b> 
+ * <a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">IEEE 754 binary64</a>
+ * </b>
  * 
  * <pre>
- * bit 63 : sign bit
+ * bit  63    : sign bit
  * bits 52-62 : exponent, 11 bits, the power of 2, bias -1023.
  * bits 0-51  : mantissa (significand) 52 bits (the leading one is not stored).
  * 
  * Exponents are 11 bits, with values -1022 to +1023 held as 1 to 2046 (11 bits, bias -1023)
+ * Exponents 0x000 and 0x7ff have a special meaning: 
  *    0x000 is signed zero.
- *    0x7FF is +/- infinity.
+ *    0x7FF is +/- infinity when the mantissa is zero
+ *    0x7FF is NaN if the the mantissa is not zero
+ * The canonical NaN is 0x7FF8000000000000L, i.e. mantissa 0x8000...
+ * The different NaN values.
  * </pre>
  * 
- * for a maximum value of 1.797693e+308 = (2-2^-52)*2^1023 and smallest denormlized of
+ * The different NaN bit patterns are not distinguishable in Java 
+ * by floating point operations, only by {@link Double#doubleToRawLongBits}.
+ * 
+ * The maximum value is 1.797693e+308 = (2-2^-52)*2^1023 and smallest denormalized of
  * (1-2^-52)*2^-1022 = 2.225...e-308.
  * <p>
  * <b>DoubleNode62</b>
  * <p>
  * In a 62 bit double:
- * 
  * <pre>
- * bit 63 : pointer bit.
- * bit 62 : double type bit.
- * bit 61 : sign bit 
+ * <i>NodeId</i> 
+ * bit 63    : pointer or value bit.
+ * bit 62    : double type bit
+ * 
+ * <i>Double62</i>
+ * bit  61    : sign bit 
  * bits 52-60 : exponent, 9 bits, the power of 2, bias -255
  * bits 0-51  : mantissa (significand) 52 bits (the leading one is not stored).
  * 
  * Exponents are 9 bits, with values -254 to 255, held as 1 to 512 (9 bits, bias -255)
+ * Exponents 0x000 and 0x1ff have a special meaning: 
  *    0x000 is signed zero.
- *    0x1FF is +/- infinity.
+ *    0x1FF is +/- infinity if the mantissa is zero
+ *    0x1FF is NaN if the the mantissa is not zero
+ * The canonical NaN is 0x1FF8000000000000L, i.e. mantissa 0x8000...
  * </pre>
  * 
  * for a maximum value of (2-2^-52)*2^255 = 1.157921e+77 and smallest denormlized of
@@ -66,7 +78,7 @@
  * <p>
  * "No encoding" is 0xFF00_0000_0000_0000L which would otherwise be the smallest (most negative) denormalized value: 
  *  -3.5336941295567687E72
- * <p>All unencodeable numbers will endup in the node table in full lexical form.  
+ * <p>All unencodeable numbers will end up in the node table in full lexical form.  
  */  
 public class DoubleNode62 {
     /**
@@ -80,7 +92,8 @@ public class DoubleNode62 {
      * The top two bits are zero if packing was possible.
      */
     public static long pack(double v) {
-        long x = Double.doubleToRawLongBits(v);
+        // Not "raw" , so NaNs end up as the same bit pattern when packed. 
+        long x = Double.doubleToLongBits(v);
         long sign = BitsLong.unpack(x, 63, 64);
         long exp11 = BitsLong.unpack(x, 52, 63);
         long exp9 = encode11to9(exp11);
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/FloatNode.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/FloatNode.java
@@ -19,10 +19,15 @@
 package org.apache.jena.tdb2.store.value;
 
 public class FloatNode {
+    // Floats,being 32 bits are always encodable.
+    //public static long NO_ENCODING = 0xFF00_0000_0000_0000L;
+    
     // 32 bits of value; collapses NaNs to a single value.
 
     public static long pack(float v) {
-        return Float.floatToIntBits(v);
+        // Not "raw" , so NaNs end up as the same bit pattern when packed. 
+        int x = Float.floatToIntBits(v);
+        return Integer.toUnsignedLong(x);
     }
 
     public static float unpack(long v) {
diff --git a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/TS_Store.java b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/TS_Store.java
@@ -20,6 +20,7 @@
 
 import org.apache.jena.dboe.base.block.FileMode;
 import org.apache.jena.tdb2.store.value.TestDoubleNode62;
+import org.apache.jena.tdb2.store.value.TestFloatNode;
 import org.apache.jena.tdb2.store.value.TestNodeIdInline;
 import org.apache.jena.tdb2.sys.SystemTDB;
 import org.apache.jena.tdb2.sys.TestOps;
@@ -33,6 +34,7 @@
     TestNodeId.class
     , TestNodeIdInline.class
     , TestDoubleNode62.class
+    , TestFloatNode.class
     , TestTripleTable.class
     , TestGraphTDB.class
     , TestGraphNamedTDB.class
diff --git a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestDoubleNode62.java b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestDoubleNode62.java
@@ -25,6 +25,7 @@
 import static org.junit.Assert.*;
 
 public class TestDoubleNode62 {
+    // See also TestNodeIdInline.nodeId_double_*
     @Test public void double_01() { testRoundTripDouble(1d); }
     @Test public void double_02() { testRoundTripDouble(-1d); }
     @Test public void double_03() { testRoundTripDouble(-1111111111e50d); }
@@ -49,14 +50,18 @@ public class TestDoubleNode62 {
     @Test public void double_22() { testRoundTripDouble(Double.NaN); }
     @Test public void double_23() { testNoEncoding(Double.MAX_VALUE); }
     @Test public void double_24() { testNoEncoding(Double.MIN_NORMAL); }
-    @Test public void double_25() { testNoEncoding(Double.MIN_VALUE); }
+    // Despite being out of the normal range of DoubleNode62,
+    // this does encode and round trip even though out of range.
+    // Its encoding is long value 1.
+    @Test public void double_25() { testRoundTripDouble(Double.MIN_VALUE); }
 
     @Test public void double_30() { testRoundTripDouble(DoubleNode62.POSITIVE_INFINITY); }
     @Test public void double_31() { testRoundTripDouble(DoubleNode62.NEGATIVE_INFINITY); }
     @Test public void double_32() { testRoundTripDouble(DoubleNode62.NaN); }
-    @Test public void double_33() { testNoEncoding(DoubleNode62.MAX_VALUE); }
-    @Test public void double_34() { testNoEncoding(DoubleNode62.MIN_NORMAL); }
-    @Test public void double_35() { testNoEncoding(DoubleNode62.MIN_VALUE); }
+    
+    @Test public void double_33() { testRoundTripDouble(DoubleNode62.MAX_VALUE); }
+    @Test public void double_34() { testRoundTripDouble(DoubleNode62.MIN_NORMAL); }
+    @Test public void double_35() { testRoundTripDouble(DoubleNode62.MIN_VALUE); }
 
     @Test public void double_40() { sameValue(DoubleNode62.POSITIVE_INFINITY, Double.POSITIVE_INFINITY); }
     @Test public void double_41() { sameValue(DoubleNode62.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY); }
@@ -70,47 +75,27 @@ public class TestDoubleNode62 {
     @Test public void double_55() { testConst(DoubleNode62.MIN_VALUE_BITS,  0x01L); }
     
     private void sameValue(double d1, double d2) {
-        // Not d1 == d2 - NaN != NaN 
+        // Not d1 == d2 because NaN != NaN 
         assertEquals(Double.valueOf(d1), Double.valueOf(d2));  
     }
     
     private static void testConst(long x, long expected) {
-        //print(expected);
-        //print(x);
         assertEquals(expected, x);
         double d = DoubleNode62.unpack(x);
         long z = DoubleNode62.pack(d);
         assertEquals(expected, z);
     }
     
     private void testNoEncoding(double d) {
-        testRoundTripDouble(d, false); 
+        long x = DoubleNode62.pack(d);
+        assertEquals("Expected no encoding", x, DoubleNode62.NO_ENCODING);
     }
 
     private static void testRoundTripDouble(double d) {
-        testRoundTripDouble(d, true); 
-    }
-    
-    private static void testRoundTripDouble(double d, boolean valid) {
-        //System.out.printf("Double: %.2e\n", d);
-        long x0 = Double.doubleToRawLongBits(d);
-        //print(x0);
         long x = DoubleNode62.pack(d);
-        //print(x);
-        if ( x == DoubleNode62.NO_ENCODING ) {
-            if ( valid )
-                fail("Expect no encoding");
-            
-            //System.out.println("No encoding");
-            //System.out.println();
-            return;
-        }
-        
+        assertNotEquals("Expected encoding", x, DoubleNode62.NO_ENCODING);
         double d2 = DoubleNode62.unpack(x);
-        
-        Double double1 = d ;
-        Double double2 = d2 ;
-        assertEquals(double1, double2);
+        assertEquals(d, d2, 0);
     }
 
     private static void print(long x) {
diff --git a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestFloatNode.java b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestFloatNode.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.tdb2.store.value;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class TestFloatNode {
+    // Floats can always be encoded.
+    // See also TestNodeIdInline.nodeId_float_*
+    @Test public void float_01() { testRoundTripFloat(1f); }
+    @Test public void float_02() { testRoundTripFloat(-1f); }
+    @Test public void float_03() { testRoundTripFloat(-1111111111e20f); }
+    @Test public void float_04() { testRoundTripFloat(1111111111e20f); }
+
+    @Test public void float_10() { testRoundTripFloat(Float.POSITIVE_INFINITY); }
+    @Test public void float_11() { testRoundTripFloat(Float.NEGATIVE_INFINITY); }
+    @Test public void float_12() { testRoundTripFloat(Float.NaN); }
+    @Test public void float_13() { testRoundTripFloat(Float.MAX_VALUE); }
+    @Test public void float_14() { testRoundTripFloat(Float.MIN_NORMAL); }
+    @Test public void float_15() { testRoundTripFloat(Float.MIN_VALUE); }
+
+    private static void testRoundTripFloat(float f) {
+        long x0 = Float.floatToRawIntBits(f);
+        long x = FloatNode.pack(f);
+        // No high part.
+        assertTrue( (x & 0xFFFFFFFF00000000L) == 0 );
+        float f2 = FloatNode.unpack(x);
+        assertEquals(f, f2, 0);
+    }
+}
diff --git a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java
@@ -257,6 +257,24 @@ public class TestNodeIdInline
     @Test public void nodeId_float_5()
     { test("'1.1E9'^^xsd:float") ; }
 
+    @Test public void nodeId_float_6()
+    { test("'-1'^^xsd:float", "'-1.0'^^xsd:float") ; }
+
+    @Test public void nodeId_float_7()
+    { test("'-1.0'^^xsd:float") ; }
+
+    @Test public void nodeId_float_8()
+    { test("'-0.0'^^xsd:float") ; }
+
+    @Test public void nodeId_float_9()
+    { test("'INF'^^xsd:float") ; }
+
+    @Test public void nodeId_float_10()
+    { test("'-INF'^^xsd:float") ; }
+
+    @Test public void nodeId_float_11()
+    { test("'NaN'^^xsd:float") ; }
+
     private void test(String x) { test(x, x) ; }
     
     private void test(String x, String expected) {