Skip to content

Commit

Permalink
Preparing new release
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed May 21, 2014
1 parent ac0d04c commit 7d606ef
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 64 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# The FastPFOR C++ library : Fast integer compression
by Daniel Lemire, Leonid Boytsov, Owen Kaser, Maxime Caron, Louis Dionne, Michel Lemay, Erik Kruus, Andrea Bedini


## What is this?

A research library with integer compression schemes.
Expand All @@ -19,6 +18,17 @@ ClueWeb Tools (https://github.com/lintool/clueweb).
Apache Lucene version 4.6.x uses a compression format derived from our FastPFOR
scheme (see http://lucene.apache.org/core/4_6_1/core/org/apache/lucene/util/PForDeltaDocIdSet.html).

## Myths

Myth: SIMD compression requires very large blocks of integers (1024 or more).

Fact: This is not true. Our fastest scheme (SIMDBinaryPacking) works over blocks of 128 integers.

Myth: SIMD compression means high speed but less compression.

Fact: This is wrong. Some schemes cannot easily be accelerated
with SIMD instructions, but many that do compress very well.

## Working with sorted lists of integers

If you are working primarily with sorted lists of integers, then
Expand Down
109 changes: 46 additions & 63 deletions headers/simdbinarypacking.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ namespace FastPFor {
*
* Designed by D. Lemire with ideas from Leonid Boystov. This scheme is NOT patented.
*
* Code data in miniblocks of 128 integers.
* To preserve alignment, we use regroup
* 8 such miniblocks into a block of 8 * 128 = 1024
* integers.
* Compresses data in blocks of 128 integers.
*
* Reference and documentation:
*
Expand All @@ -33,7 +30,7 @@ class SIMDBinaryPacking: public IntegerCODEC {
static const uint32_t CookiePadder = 123456;
static const uint32_t MiniBlockSize = 128;
static const uint32_t HowManyMiniBlocks = 16;
static const uint32_t BlockSize = HowManyMiniBlocks * MiniBlockSize;
static const uint32_t BlockSize = MiniBlockSize;

/**
* The way this code is written, it will automatically "pad" the
Expand All @@ -47,8 +44,10 @@ class SIMDBinaryPacking: public IntegerCODEC {
*out++ = static_cast<uint32_t>(length);
while(needPaddingTo128Bits(out)) *out++ = CookiePadder;
uint32_t Bs[HowManyMiniBlocks];
for (const uint32_t * const final = in + length; in + BlockSize
<= final; in += BlockSize) {
const uint32_t *const final = in + length;
for (; in + HowManyMiniBlocks * MiniBlockSize
<= final; in += HowManyMiniBlocks * MiniBlockSize) {

for (uint32_t i = 0; i < HowManyMiniBlocks; ++i)
Bs[i] = maxbits(in + i * MiniBlockSize,
in + (i + 1) * MiniBlockSize);
Expand All @@ -61,12 +60,34 @@ class SIMDBinaryPacking: public IntegerCODEC {
*out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8)
| Bs[15];
for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) {
// D.L. : is the reinterpret_cast safe here?
SIMD_fastpackwithoutmask_32(in + i * MiniBlockSize, reinterpret_cast<__m128i *>(out),
Bs[i]);
out += MiniBlockSize/32 * Bs[i];
}
}
if (in < final) {
const size_t howmany = (final - in) / MiniBlockSize;
memset(&Bs[0], 0, HowManyMiniBlocks * sizeof(uint32_t));
for (uint32_t i = 0; i < howmany; ++i)
Bs[i] = maxbits(in + i * MiniBlockSize,
in + (i + 1) * MiniBlockSize);
*out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8)
| Bs[3];
*out++ = (Bs[4] << 24) | (Bs[5] << 16) | (Bs[6] << 8)
| Bs[7];
*out++ = (Bs[8] << 24) | (Bs[9] << 16) | (Bs[10] << 8)
| Bs[11];
*out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8)
| Bs[15];
for (uint32_t i = 0; i < howmany; ++i) {
SIMD_fastpackwithoutmask_32(in + i * MiniBlockSize, reinterpret_cast<__m128i *>(out),
Bs[i]);
out += MiniBlockSize/32 * Bs[i];
}
in += howmany * MiniBlockSize;
assert(in == final);
}

nvalue = out - initout;
}

Expand All @@ -80,7 +101,8 @@ class SIMDBinaryPacking: public IntegerCODEC {
}
const uint32_t * const initout(out);
uint32_t Bs[HowManyMiniBlocks];
for (; out < initout + actuallength; out += BlockSize) {
for (; out < initout + actuallength / (HowManyMiniBlocks * MiniBlockSize) *HowManyMiniBlocks * MiniBlockSize ;
out += HowManyMiniBlocks * MiniBlockSize) {
for(uint32_t i = 0; i < 4 ; ++i,++in) {
Bs[0 + 4 * i] = static_cast<uint8_t>(in[0] >> 24);
Bs[1 + 4 * i] = static_cast<uint8_t>(in[0] >> 16);
Expand All @@ -93,6 +115,21 @@ class SIMDBinaryPacking: public IntegerCODEC {
in += MiniBlockSize/32 * Bs[i];
}
}
if (out < initout + actuallength) {
const size_t howmany = (initout + actuallength - out) / MiniBlockSize;
for (uint32_t i = 0; i < 4 ; ++i, ++in) {
Bs[0 + 4 * i] = static_cast<uint8_t>(in[0] >> 24);
Bs[1 + 4 * i] = static_cast<uint8_t>(in[0] >> 16);
Bs[2 + 4 * i] = static_cast<uint8_t>(in[0] >> 8);
Bs[3 + 4 * i] = static_cast<uint8_t>(in[0]);
}
for (uint32_t i = 0; i < howmany; ++i) {
SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in), out + i * MiniBlockSize, Bs[i]);
in += MiniBlockSize/32 * Bs[i];
}
out += howmany * MiniBlockSize;
assert(out == initout + actuallength);
}
nvalue = out - initout;
return in;
}
Expand All @@ -104,60 +141,6 @@ class SIMDBinaryPacking: public IntegerCODEC {
};


class SIMDGlobalBinaryPacking: public IntegerCODEC {
public:
static const uint32_t CookiePadder = 123456;
static const uint32_t BlockSize = 128;

/**
* The way this code is written, it will automatically "pad" the
* header according to the alignment of the out pointer. So if you
* move the data around, you should preserve the alignment.
*/
void encodeArray(const uint32_t *in, const size_t length, uint32_t *out,
size_t &nvalue) {
checkifdivisibleby(length, BlockSize);
const uint32_t * const initout(out);
*out++ = static_cast<uint32_t>(length);
uint32_t Bs = maxbits(in,in + length);
*out++ = Bs;
while(needPaddingTo128Bits(out)) *out++ = CookiePadder;
for (const uint32_t * const final = in + length; in + BlockSize
<= final; in += BlockSize, out += 4 * Bs) {
SIMD_fastpackwithoutmask_32(in, reinterpret_cast<__m128i *>(out),
Bs);
}
nvalue = out - initout;
}

const uint32_t * decodeArray(const uint32_t *in, const size_t /*length*/,
uint32_t *out, size_t & nvalue) {
const uint32_t actuallength = *in++;
const uint32_t Bs = *in++;
if(needPaddingTo128Bits(out)) throw std::runtime_error("bad initial output align");
while(needPaddingTo128Bits(in)) {
if(in[0] != CookiePadder) throw std::logic_error("SIMDBinaryPacking alignment issue.");
++in;
}
for (uint32_t k = 0; k < actuallength / 128; ++k) {
SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in + 4 * Bs * k), out + 128 * k, Bs);
}
nvalue = actuallength;
return in + 4* Bs * actuallength / 128;
/*const uint32_t * const initout(out);
for (; out < initout + actuallength; out += BlockSize, in += 4 * Bs) {
SIMD_fastunpack_32(reinterpret_cast<const __m128i *>(in), out , Bs);
}
nvalue = out - initout;
return in;*/
}

std::string name() const {
return "SIMDGlobalBinaryPacking";
}

};

} // namespace FastPFor

#endif /* SIMDBINARYPACKING_H_ */

0 comments on commit 7d606ef

Please sign in to comment.