Skip to content

Commit

Permalink
Merge pull request #10 from HudsonAlpha/interleave_bitvec
Browse files Browse the repository at this point in the history
Interleave bitvec
  • Loading branch information
holtjma authored Feb 26, 2021
2 parents 518089b + 7ac514d commit 5fc230f
Showing 3 changed files with 45 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fmlrc"
version = "0.1.3"
version = "0.1.4"
authors = ["holtjma <[email protected]>"]
edition = "2018"
license = "MIT OR Apache-2.0"
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -81,12 +81,12 @@ The actual corrections are _nearly_ identical (there are slight differences not
However, FMLRC v2 runs in less than half the time from both real time and CPU time perspectives.
While not explicitly measured, FMLRC v2 does use ~1GB of extra memory due to the 10-mer cache (`-C 10`).

| Metric | FMLRC v1.0.0 | FMLRC2 v0.1.3 (`-C 10`) |
| Metric | FMLRC v1.0.0 | FMLRC2 v0.1.4 (`-C 10`) |
| - | - | - |
| Recall | 0.9825 | 0.9826 |
| Precision | 0.9815 | 0.9816 |
| Real time | 7m29.908s | **3m0.885s** |
| CPU time | 51m34.704s | **20m35.792s** |
| Real time | 7m29.908s | **2m57.711s** |
| CPU time | 51m34.704s | **19m12.348s** |

## Reference
FMLRC v2 does not currently have a pre-print or paper. If you use FMLRC v2, please cite the FMLRC v1 paper:
73 changes: 41 additions & 32 deletions src/indexed_bit_vec.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

use math::round;

/*
@@ -39,12 +38,18 @@ const INDEX_SHIFT: usize = 6;
/// the flag for selecting
const INDEX_LOWER_MASK: u64 = LOW_SET_FLAGS[INDEX_SHIFT];

/// A block containing both the bits in the block and the rank before the block
#[derive(Clone,Default)]
pub struct BitVecBlock {
pub bits: u64,
pub index: u64
}

/// Represents a bit vector with an very fast index on top of it.
/// For a vector of length N, the index takes up O(N) bits.
pub struct IndexedBitVec {
index_size: usize,
bitvec: Vec<u64>,
index: Vec<u64>
data_vec: Vec<BitVecBlock>
}

impl IndexedBitVec {
@@ -59,11 +64,9 @@ impl IndexedBitVec {
#[inline]
pub fn with_capacity(size: usize) -> Self {
let index_size: usize = (round::ceil((size as f64) / 64.0, 0) as usize)+1;
let bitvec = vec![0; index_size];
Self {
Self {
index_size,
bitvec,
index: vec![0; index_size]
data_vec: vec![Default::default(); index_size]
}
}

@@ -78,13 +81,13 @@ impl IndexedBitVec {
/// ```
#[inline]
pub fn set_bit(&mut self, pos: usize) {
self.bitvec[pos >> INDEX_SHIFT] |= 0x1 << (pos & INDEX_LOWER_MASK as usize);
self.data_vec[pos >> INDEX_SHIFT].bits |= 0x1 << (pos & INDEX_LOWER_MASK as usize);
}

/// Currently, this function is strictly for testing
#[inline]
pub fn get_bit(&mut self, pos: usize) -> bool {
((self.bitvec[pos >> INDEX_SHIFT] >> (pos & INDEX_LOWER_MASK as usize)) & 0x1) != 0
((self.data_vec[pos >> INDEX_SHIFT].bits >> (pos & INDEX_LOWER_MASK as usize)) & 0x1) != 0
}

/// Builds an index for the array to perform rank queries.
@@ -100,11 +103,13 @@ impl IndexedBitVec {
/// ```
pub fn build_index(&mut self, initial_rank: u64) {
let mut current_rank = initial_rank;
for (i, bv_val) in self.bitvec.iter().enumerate().take(self.index_size-1) {
self.index[i] = current_rank;
current_rank += bv_val.count_ones() as u64
//iterate through each block, set the rank and then add the bit count
for bv_val in self.data_vec.iter_mut().take(self.index_size-1) {
bv_val.index = current_rank;
current_rank += bv_val.bits.count_ones() as u64;
}
self.index[self.index_size-1] = current_rank;
//set the final one to the total count
self.data_vec[self.index_size-1].index = current_rank;
}

/// Performs a rank-1 query, returned the number of set bits up to but NOT including `pos`
@@ -123,7 +128,9 @@ impl IndexedBitVec {
#[inline]
pub fn rank(&self, pos: usize) -> u64 {
let offset: usize = pos >> INDEX_SHIFT;
self.index[offset] + (self.bitvec[offset] & LOW_SET_FLAGS[pos & INDEX_LOWER_MASK as usize]).count_ones() as u64
//rank until block + rank up until particular bit
self.data_vec[offset].index +
(self.data_vec[offset].bits & LOW_SET_FLAGS[pos & INDEX_LOWER_MASK as usize]).count_ones() as u64
}

/// Performs a rank-1 query without bounds checking, returned the number of set bits up to but NOT including `pos`
@@ -146,25 +153,27 @@ impl IndexedBitVec {
#[inline]
pub unsafe fn rank_unchecked(&self, pos:usize) -> u64 {
let vec_offset: usize = pos >> INDEX_SHIFT;
self.index.get_unchecked(vec_offset) + (self.bitvec.get_unchecked(vec_offset) & LOW_SET_FLAGS.get_unchecked(pos & INDEX_LOWER_MASK as usize)).count_ones() as u64
//rank until block + rank up until particular bit
self.data_vec.get_unchecked(vec_offset).index +
(self.data_vec.get_unchecked(vec_offset).bits & LOW_SET_FLAGS.get_unchecked(pos & INDEX_LOWER_MASK as usize)).count_ones() as u64
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_allocation() {
//just test index allocation at the corners
let ibv = IndexedBitVec::with_capacity(63);
assert_eq!(ibv.index.len(), 2);
assert_eq!(ibv.data_vec.len(), 2);
let ibv = IndexedBitVec::with_capacity(64);
assert_eq!(ibv.index.len(), 2);
assert_eq!(ibv.data_vec.len(), 2);
let ibv = IndexedBitVec::with_capacity(65);
assert_eq!(ibv.index.len(), 3);
assert_eq!(ibv.data_vec.len(), 3);
}

#[test]
fn test_set_bit() {
//array with 128 "1"s, then 128 "0"s, then 1 "1"
@@ -184,17 +193,17 @@ mod tests {
assert_eq!(ibv.get_bit(256), true);

//check that our slicing is correct also
let slice: &[u64] = ibv.bitvec.as_slice();
let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
assert_eq!(slice.len(), 6);
for i in 0..2 {
assert_eq!(slice[i], 0xffffffffffffffff);
assert_eq!(slice[i].bits, 0xffffffffffffffff);
}
for i in 2..4 {
assert_eq!(slice[i], 0x0);
assert_eq!(slice[i].bits, 0x0);
}
assert_eq!(slice[4], 0x1)
assert_eq!(slice[4].bits, 0x1)
}

#[test]
fn test_indexing() {
//array with 128 "1"s, then 128 "0"s, then 1 "1"
@@ -205,23 +214,23 @@ mod tests {
ibv.set_bit(256);

ibv.build_index(0);
let slice: &[u64] = ibv.index.as_slice();
let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
assert_eq!(slice.len(), 6);
let counts = vec![0, 64, 128, 128, 128, 129];
for i in 0..slice.len() {
assert_eq!(counts[i], slice[i]);
assert_eq!(counts[i], slice[i].index);
}

//progressively adding bits
let mut ibv = IndexedBitVec::with_capacity(64);
for i in 0..64 {
ibv.set_bit(i);
ibv.build_index(0);
let slice: &[u64] = ibv.index.as_slice();
assert_eq!(slice[1], (i+1) as u64);
let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
assert_eq!(slice[1].index, (i+1) as u64);
}
}

#[test]
fn test_offset() {
//progressively adding bits
@@ -230,8 +239,8 @@ mod tests {
ibv.set_bit(i);
}
ibv.build_index(100);
let slice: &[u64] = ibv.index.as_slice();
assert_eq!(slice[1], 164);
let slice: &[BitVecBlock] = ibv.data_vec.as_slice();
assert_eq!(slice[1].index, 164);
}

#[test]

0 comments on commit 5fc230f

Please sign in to comment.