Preparing new release

MorphStore · May 21, 2014 · 7d606ef · 7d606ef
1 parent ac0d04c
commit 7d606ef
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
 # The FastPFOR C++ library : Fast integer compression
 by Daniel Lemire, Leonid Boytsov, Owen Kaser, Maxime Caron, Louis Dionne, Michel Lemay, Erik Kruus, Andrea Bedini
 
-
 ## What is this?
 
 A research library with integer compression schemes.
@@ -19,6 +18,17 @@ ClueWeb Tools (https://github.com/lintool/clueweb).
 Apache Lucene version 4.6.x uses a compression format derived from our FastPFOR
 scheme (see http://lucene.apache.org/core/4_6_1/core/org/apache/lucene/util/PForDeltaDocIdSet.html).
 
+## Myths
+
+Myth: SIMD compression requires very large blocks of integers (1024 or more).
+
+Fact: This is not true. Our fastest scheme (SIMDBinaryPacking) works over blocks of 128 integers.
+
+Myth: SIMD compression means high speed but less compression.
+
+Fact: This is wrong. Some schemes cannot easily be accelerated
+with SIMD instructions, but many that do compress very well.
+
 ## Working with sorted lists of integers
 
 If you are working primarily with sorted lists of integers, then 

diff --git a/headers/simdbinarypacking.h b/headers/simdbinarypacking.h
@@ -18,10 +18,7 @@ namespace FastPFor {
  *
  * Designed by D. Lemire with ideas from Leonid Boystov. This scheme is NOT patented.
  *
- * Code data in miniblocks of 128 integers.
- * To preserve alignment, we use regroup
- * 8 such miniblocks into a block of 8 * 128 = 1024
- * integers.
+ * Compresses data in blocks of 128 integers.
  *
  * Reference and documentation:
  *
@@ -33,7 +30,7 @@ class SIMDBinaryPacking: public IntegerCODEC {
     static const uint32_t CookiePadder = 123456;
     static const uint32_t MiniBlockSize = 128;
     static const uint32_t HowManyMiniBlocks = 16;
-    static const uint32_t BlockSize = HowManyMiniBlocks * MiniBlockSize;
+    static const uint32_t BlockSize = MiniBlockSize;
 
     /**
      * The way this code is written, it will automatically "pad" the
@@ -47,8 +44,10 @@ class SIMDBinaryPacking: public IntegerCODEC {
         *out++ = static_cast<uint32_t>(length);
         while(needPaddingTo128Bits(out)) *out++ = CookiePadder;
         uint32_t Bs[HowManyMiniBlocks];
-        for (const uint32_t * const final = in + length; in + BlockSize
-                <= final; in += BlockSize) {
+        const uint32_t *const final = in + length;
+        for (; in + HowManyMiniBlocks * MiniBlockSize
+                 <= final; in += HowManyMiniBlocks * MiniBlockSize) {
+
             for (uint32_t i = 0; i < HowManyMiniBlocks; ++i)
                 Bs[i] = maxbits(in + i * MiniBlockSize,
                         in + (i + 1) * MiniBlockSize);
@@ -61,12 +60,34 @@ class SIMDBinaryPacking: public IntegerCODEC {
             *out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8)
                             | Bs[15];
             for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) {
-                // D.L. : is the reinterpret_cast safe here?
                 SIMD_fastpackwithoutmask_32(in + i * MiniBlockSize, reinterpret_cast<__m128i *>(out),
                                 Bs[i]);
                 out += MiniBlockSize/32 * Bs[i];
             }
         }
+        if (in < final) {
+            const size_t howmany = (final - in) / MiniBlockSize;
+            memset(&Bs[0], 0, HowManyMiniBlocks * sizeof(uint32_t));
+            for (uint32_t i = 0; i < howmany; ++i)
+                Bs[i] = maxbits(in + i * MiniBlockSize,
+                        in + (i + 1) * MiniBlockSize);
+            *out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8)
+                     | Bs[3];
+            *out++ = (Bs[4] << 24) | (Bs[5] << 16) | (Bs[6] << 8)
+                     | Bs[7];
+            *out++ = (Bs[8] << 24) | (Bs[9] << 16) | (Bs[10] << 8)
+                     | Bs[11];
+            *out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8)
+                     | Bs[15];
+            for (uint32_t i = 0; i < howmany; ++i) {
+                SIMD_fastpackwithoutmask_32(in + i * MiniBlockSize, reinterpret_cast<__m128i *>(out),
+                                Bs[i]);
+                out += MiniBlockSize/32 * Bs[i];
+            }
+            in += howmany * MiniBlockSize;
+            assert(in == final);
+        }
+
         nvalue = out - initout;
     }
 
@@ -80,7 +101,8 @@ class SIMDBinaryPacking: public IntegerCODEC {
         }
         const uint32_t * const initout(out);
         uint32_t Bs[HowManyMiniBlocks];
-        for (; out < initout + actuallength; out += BlockSize) {
+        for (; out < initout + actuallength / (HowManyMiniBlocks * MiniBlockSize) *HowManyMiniBlocks * MiniBlockSize ;
+             out += HowManyMiniBlocks * MiniBlockSize) {
             for(uint32_t i = 0; i < 4 ; ++i,++in) {
                 Bs[0 + 4 * i] = static_cast<uint8_t>(in[0] >> 24);
                 Bs[1 + 4 * i] = static_cast<uint8_t>(in[0] >> 16);
@@ -93,6 +115,21 @@ class SIMDBinaryPacking: public IntegerCODEC {
                 in += MiniBlockSize/32 * Bs[i];
             }
         }
+        if (out < initout + actuallength) {
+            const size_t howmany = (initout + actuallength - out) / MiniBlockSize;
+            for (uint32_t i = 0; i < 4 ; ++i, ++in) {
+                Bs[0 + 4 * i] = static_cast<uint8_t>(in[0] >> 24);
+                Bs[1 + 4 * i] = static_cast<uint8_t>(in[0] >> 16);
+                Bs[2 + 4 * i] = static_cast<uint8_t>(in[0] >> 8);
+                Bs[3 + 4 * i] = static_cast<uint8_t>(in[0]);
+            }
+            for (uint32_t i = 0; i < howmany; ++i) {
+                SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in), out + i * MiniBlockSize, Bs[i]);
+                         in += MiniBlockSize/32 * Bs[i];
+            }
+            out += howmany * MiniBlockSize;
+            assert(out ==  initout + actuallength);
+        }
         nvalue = out - initout;
         return in;
     }
@@ -104,60 +141,6 @@ class SIMDBinaryPacking: public IntegerCODEC {
 };
 
 
-class SIMDGlobalBinaryPacking: public IntegerCODEC {
-public:
-    static const uint32_t CookiePadder = 123456;
-    static const uint32_t BlockSize = 128;
-
-    /**
-     * The way this code is written, it will automatically "pad" the
-     * header according to the alignment of the out pointer. So if you
-     * move the data around, you should preserve the alignment.
-     */
-    void encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
-            size_t &nvalue) {
-        checkifdivisibleby(length, BlockSize);
-        const uint32_t * const initout(out);
-        *out++ = static_cast<uint32_t>(length);
-        uint32_t Bs = maxbits(in,in + length);
-        *out++ = Bs;
-        while(needPaddingTo128Bits(out)) *out++ = CookiePadder;
-        for (const uint32_t * const final = in + length; in + BlockSize
-                <= final; in += BlockSize, out +=  4 * Bs) {
-                SIMD_fastpackwithoutmask_32(in, reinterpret_cast<__m128i *>(out),
-                                Bs);
-        }
-        nvalue = out - initout;
-    }
-
-    const uint32_t * decodeArray(const uint32_t *in, const size_t /*length*/,
-            uint32_t *out, size_t & nvalue) {
-        const uint32_t actuallength = *in++;
-        const uint32_t Bs = *in++;
-        if(needPaddingTo128Bits(out)) throw std::runtime_error("bad initial output align");
-        while(needPaddingTo128Bits(in)) {
-            if(in[0] != CookiePadder) throw std::logic_error("SIMDBinaryPacking alignment issue.");
-            ++in;
-        }
-        for (uint32_t k = 0; k < actuallength / 128; ++k) {
-               SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in + 4 * Bs * k), out + 128 * k, Bs);
-        }
-        nvalue = actuallength;
-        return in + 4* Bs * actuallength / 128;
-        /*const uint32_t * const initout(out);
-        for (; out < initout + actuallength; out += BlockSize, in += 4 * Bs) {
-                SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in), out , Bs);
-        }
-        nvalue = out - initout;
-        return in;*/
-    }
-
-    std::string name() const {
-        return "SIMDGlobalBinaryPacking";
-    }
-
-};
-
 } // namespace FastPFor
 
 #endif /* SIMDBINARYPACKING_H_ */