-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
678 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
#include <string.h> | ||
#include <assert.h> | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include "rle.h" | ||
|
||
const uint8_t rle_auxtab[8] = { 0x01, 0x11, 0x21, 0x31, 0x03, 0x13, 0x07, 0x17 }; | ||
|
||
// insert symbol $a after $x symbols in $str; marginal counts added to $cnt; returns the size increase | ||
int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]) | ||
{ | ||
uint16_t *nptr = (uint16_t*)block; | ||
int diff; | ||
|
||
block += 2; // skip the first 2 counting bytes | ||
if (*nptr == 0) { | ||
memset(cnt, 0, 48); | ||
diff = rle_enc1(block, a, rl); | ||
} else { | ||
uint8_t *p, *end = block + *nptr, *q; | ||
int64_t pre, z, l = 0, tot, beg_l; | ||
int c = -1, n_bytes = 0, n_bytes2, t = 0; | ||
uint8_t tmp[24]; | ||
beg_l = bc[0] + bc[1] + bc[2] + bc[3] + bc[4] + bc[5]; | ||
tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; | ||
if (x < beg_l) { | ||
beg_l = 0, *beg = 0; | ||
memset(bc, 0, 48); | ||
} | ||
if (x == beg_l) { | ||
p = q = block + (*beg); z = beg_l; | ||
memcpy(cnt, bc, 48); | ||
} else if (x - beg_l <= ((tot-beg_l)>>1) + ((tot-beg_l)>>3)) { // forward | ||
z = beg_l; p = block + (*beg); | ||
memcpy(cnt, bc, 48); | ||
while (z < x) { | ||
rle_dec1(p, c, l); | ||
z += l; cnt[c] += l; | ||
} | ||
for (q = p - 1; *q>>6 == 2; --q); | ||
} else { // backward | ||
memcpy(cnt, ec, 48); | ||
z = tot; p = end; | ||
while (z >= x) { | ||
--p; | ||
if (*p>>6 != 2) { | ||
l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; | ||
z -= l; cnt[*p&7] -= l; | ||
l = 0; t = 0; | ||
} else { | ||
l |= (*p&0x3fL) << t; | ||
t += 6; | ||
} | ||
} | ||
q = p; | ||
rle_dec1(p, c, l); | ||
z += l; cnt[c] += l; | ||
} | ||
*beg = q - block; | ||
memcpy(bc, cnt, 48); | ||
bc[c] -= l; | ||
n_bytes = p - q; | ||
if (x == z && a != c && p < end) { // then try the next run | ||
int tc; | ||
int64_t tl; | ||
q = p; | ||
rle_dec1(q, tc, tl); | ||
if (a == tc) | ||
c = tc, n_bytes = q - p, l = tl, z += l, p = q, cnt[tc] += tl; | ||
} | ||
if (z != x) cnt[c] -= z - x; | ||
pre = x - (z - l); p -= n_bytes; | ||
if (a == c) { // insert to the same run | ||
n_bytes2 = rle_enc1(tmp, c, l + rl); | ||
} else if (x == z) { // at the end; append to the existing run | ||
p += n_bytes; n_bytes = 0; | ||
n_bytes2 = rle_enc1(tmp, a, rl); | ||
} else { // break the current run | ||
n_bytes2 = rle_enc1(tmp, c, pre); | ||
n_bytes2 += rle_enc1(tmp + n_bytes2, a, rl); | ||
n_bytes2 += rle_enc1(tmp + n_bytes2, c, l - pre); | ||
} | ||
if (n_bytes != n_bytes2 && end != p + n_bytes) // size changed | ||
memmove(p + n_bytes2, p + n_bytes, end - p - n_bytes); | ||
memcpy(p, tmp, n_bytes2); | ||
diff = n_bytes2 - n_bytes; | ||
} | ||
return (*nptr += diff); | ||
} | ||
|
||
int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6]) | ||
{ | ||
int beg = 0; | ||
int64_t bc[6]; | ||
memset(bc, 0, 48); | ||
return rle_insert_cached(block, x, a, rl, cnt, ec, &beg, bc); | ||
} | ||
|
||
void rle_split(uint8_t *block, uint8_t *new_block) | ||
{ | ||
int n = *(uint16_t*)block; | ||
uint8_t *end = block + 2 + n, *q = block + 2 + (n>>1); | ||
while (*q>>6 == 2) --q; | ||
memcpy(new_block + 2, q, end - q); | ||
*(uint16_t*)new_block = end - q; | ||
*(uint16_t*)block = q - block - 2; | ||
} | ||
|
||
void rle_count(const uint8_t *block, int64_t cnt[6]) | ||
{ | ||
const uint8_t *q = block + 2, *end = q + *(uint16_t*)block; | ||
while (q < end) { | ||
int c; | ||
int64_t l; | ||
rle_dec1(q, c, l); | ||
cnt[c] += l; | ||
} | ||
} | ||
|
||
void rle_print(const uint8_t *block, int expand) | ||
{ | ||
const uint16_t *p = (const uint16_t*)block; | ||
const uint8_t *q = block + 2, *end = block + 2 + *p; | ||
while (q < end) { | ||
int c; | ||
int64_t l, x; | ||
rle_dec1(q, c, l); | ||
if (expand) for (x = 0; x < l; ++x) putchar("$ACGTN"[c]); | ||
else printf("%c%ld", "$ACGTN"[c], (long)l); | ||
} | ||
putchar('\n'); | ||
} | ||
|
||
void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]) | ||
{ | ||
int a; | ||
int64_t tot, cnt[6]; | ||
const uint8_t *p; | ||
|
||
y = y >= x? y : x; | ||
tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5]; | ||
if (tot == 0) return; | ||
if (x <= (tot - y) + (tot>>3)) { | ||
int c = 0; | ||
int64_t l, z = 0; | ||
memset(cnt, 0, 48); | ||
p = block + 2; | ||
while (z < x) { | ||
rle_dec1(p, c, l); | ||
z += l; cnt[c] += l; | ||
} | ||
for (a = 0; a != 6; ++a) cx[a] += cnt[a]; | ||
cx[c] -= z - x; | ||
if (cy) { | ||
while (z < y) { | ||
rle_dec1(p, c, l); | ||
z += l; cnt[c] += l; | ||
} | ||
for (a = 0; a != 6; ++a) cy[a] += cnt[a]; | ||
cy[c] -= z - y; | ||
} | ||
} else { | ||
#define move_backward(_x) \ | ||
while (z >= (_x)) { \ | ||
--p; \ | ||
if (*p>>6 != 2) { \ | ||
l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; \ | ||
z -= l; cnt[*p&7] -= l; \ | ||
l = 0; t = 0; \ | ||
} else { \ | ||
l |= (*p&0x3fL) << t; \ | ||
t += 6; \ | ||
} \ | ||
} \ | ||
|
||
int t = 0; | ||
int64_t l = 0, z = tot; | ||
memcpy(cnt, ec, 48); | ||
p = block + 2 + *(const uint16_t*)block; | ||
if (cy) { | ||
move_backward(y) | ||
for (a = 0; a != 6; ++a) cy[a] += cnt[a]; | ||
cy[*p&7] += y - z; | ||
} | ||
move_backward(x) | ||
for (a = 0; a != 6; ++a) cx[a] += cnt[a]; | ||
cx[*p&7] += x - z; | ||
|
||
#undef move_backward | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#ifndef RLE6_H_ | ||
#define RLE6_H_ | ||
|
||
#include <stdint.h> | ||
|
||
#ifdef __GNUC__ | ||
#define LIKELY(x) __builtin_expect((x),1) | ||
#else | ||
#define LIKELY(x) (x) | ||
#endif | ||
#ifdef __cplusplus | ||
|
||
extern "C" { | ||
#endif | ||
|
||
int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]); | ||
int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t end_cnt[6]); | ||
void rle_split(uint8_t *block, uint8_t *new_block); | ||
void rle_count(const uint8_t *block, int64_t cnt[6]); | ||
void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]); | ||
#define rle_rank1a(block, x, cx, ec) rle_rank2a(block, x, -1, cx, 0, ec) | ||
|
||
void rle_print(const uint8_t *block, int expand); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
/****************** | ||
*** 43+3 codec *** | ||
******************/ | ||
|
||
const uint8_t rle_auxtab[8]; | ||
|
||
#define RLE_MIN_SPACE 18 | ||
#define rle_nptr(block) ((uint16_t*)(block)) | ||
|
||
// decode one run (c,l) and move the pointer p | ||
#define rle_dec1(p, c, l) do { \ | ||
(c) = *(p) & 7; \ | ||
if (LIKELY((*(p)&0x80) == 0)) { \ | ||
(l) = *(p)++ >> 3; \ | ||
} else if (LIKELY(*(p)>>5 == 6)) { \ | ||
(l) = (*(p)&0x18L)<<3L | ((p)[1]&0x3fL); \ | ||
(p) += 2; \ | ||
} else { \ | ||
int n = ((*(p)&0x10) >> 2) + 4; \ | ||
(l) = *(p)++ >> 3 & 1; \ | ||
while (--n) (l) = ((l)<<6) | (*(p)++&0x3fL); \ | ||
} \ | ||
} while (0) | ||
|
||
static inline int rle_enc1(uint8_t *p, int c, int64_t l) | ||
{ | ||
if (l < 1LL<<4) { | ||
*p = l << 3 | c; | ||
return 1; | ||
} else if (l < 1LL<<8) { | ||
*p = 0xC0 | l >> 6 << 3 | c; | ||
p[1] = 0x80 | (l & 0x3f); | ||
return 2; | ||
} else if (l < 1LL<<19) { | ||
*p = 0xE0 | l >> 18 << 3 | c; | ||
p[1] = 0x80 | (l >> 12 & 0x3f); | ||
p[2] = 0x80 | (l >> 6 & 0x3f); | ||
p[3] = 0x80 | (l & 0x3f); | ||
return 4; | ||
} else { | ||
int i, shift = 36; | ||
*p = 0xF0 | l >> 42 << 3 | c; | ||
for (i = 1; i < 8; ++i, shift -= 6) | ||
p[i] = 0x80 | (l>>shift & 0x3f); | ||
return 8; | ||
} | ||
} | ||
|
||
#endif |
Oops, something went wrong.