Skip to content

Commit f12692f

Browse files
author
pep04706
committed
from wifionICE
1 parent e41f252 commit f12692f

File tree

4 files changed

+61
-20
lines changed

4 files changed

+61
-20
lines changed

include/bloom.h

+17
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,21 @@ typedef struct _bfilter {
5454
unsigned char *filter; /**< filter sequence*/
5555
} Bfilter;
5656

57+
58+
/**
59+
* @brief global parameters needed for the processed reads
60+
*
61+
* */
62+
typedef struct _bloom_par {
63+
int kmersize; /**< kmer size (number of elements)*/
64+
int hashNum; /**< number of hash functions used to construct the filter*/
65+
int kmersizeBytes; /**< Bytes needed to store the kmer (4bases ~ 1byte) */
66+
int halfsizeBytes; /**< half size in bytes(needed to decide whether
67+
to store a kmer or its reverse complement) */
68+
int hangingBases; /**< number of hanging bases that don't complete a byte*/
69+
int hasOverhead; /**< kmer has overhead when kmersize % 4!=0 */
70+
} Bloom_par;
71+
5772
/**
5873
* @brief stores a processed kmer (2 bits pro nucleotide)
5974
*
@@ -79,6 +94,8 @@ void free_Bfilter(Bfilter *ptr_bf);
7994

8095
Procs_kmer *init_procs(int kmersize, int hashNum);
8196

97+
Procs_kmer *init_procs(int kmersize, int hashNum);
98+
8299
void free_procs(Procs_kmer *procs);
83100

84101
double score_read_in_filter(unsigned char *read, int L, Procs_kmer *procs,

include/defines.h

+18-18
Original file line numberDiff line numberDiff line change
@@ -29,52 +29,52 @@
2929
#ifndef DEFINES_H_
3030
#define DEFINES_H_
3131

32-
// General
32+
// General
3333
#define B_LEN 131072 /**< buffer size */
3434
#define MAX_FILENAME 300 /**< Maximum # chars in a filename */
35-
#define bool short /**< define a bool type */
35+
#define bool int16_t /**< define a bool type */
3636
#define true 1 /**< assign true to 1 */
3737
#define false 0 /**< assign false to 0 */
3838

3939
#ifndef max
40-
#define max( a, b ) ( ((a) > (b)) ? (a) : (b) ) /**< max function */
40+
#define max(a, b) (((a) > (b)) ? (a) : (b)) /**< max function */
4141
#endif
4242

4343
#ifndef min
44-
#define min( a, b ) ( ((a) < (b)) ? (a) : (b) ) /**< min function */
44+
#define min(a, b) (((a) < (b)) ? (a) : (b)) /**< min function */
4545
#endif
4646

4747
#ifndef mem_usageMB
48-
#define mem_usageMB() fprintf(stderr, \
48+
#define mem_usageMB() fprintf(stderr, \
4949
"- Current allocated memory: %ld MB.\n", \
5050
alloc_mem >> 20) /**< returns allocated memory in MB */
5151
#endif
5252

5353
#ifndef mem_usage
54-
#define mem_usage() fprintf(stderr, \
54+
#define mem_usage() fprintf(stderr, \
5555
"- Current allocated memory: %ld Bytes.\n", \
5656
alloc_mem) /**< returns allocated memory in Bytes */
5757
#endif
5858

5959

6060
// Q_report, S_report
61-
#define DEFAULT_MINQ 27 /**< Minimum quality threshold */
62-
#define DEFAULT_NTILES 96 /**< Default number of tiles */
63-
#define DEFAULT_NQ 46 /**< Default number of different quality values */
64-
#define ZEROQ 33 /**< ASCII code of lowest quality value (!) */
65-
#define N_ACGT 5 /**< Number of different nucleotides in the fq file */
61+
#define DEFAULT_MINQ 27 /**< Minimum quality threshold */
62+
#define DEFAULT_NTILES 96 /**< Default number of tiles */
63+
#define DEFAULT_NQ 46 /**< Default number of different quality values */
64+
#define ZEROQ 33 /**< ASCII code of lowest quality value (!) */
65+
#define N_ACGT 5 /**< Number of different nucleotides in the fq file */
6666
#define MAX_RCOMMAND 4000 /**< Maximum # chars in R command*/
6767

6868

6969
// Fasta files
7070
#define FA_ENTRY_BUF 20 /**< buffer for fasta entries*/
7171

72-
// Tree
72+
// Tree
7373
#define T_ACGT 4 /**< Number of children per node in tree*/
7474
#define NPOOL_1D 1048576 /**< Number of Node structs allocated in inner dim */
7575
#define NPOOL_2D 16 /**< Number of *Node allocated in outer dim */
7676
#define MAX_FASZ_TREE 1e7 /**< Maximum fasta size for constructing a tree.
77-
DECIDE A SENSIBLE SIZE! */
77+
DECIDE A SENSIBLE SIZE */
7878
// BloomFilter
7979
#define BITSPERCHAR 8 /**< number of bits in a char */
8080
#define BASESPERCHAR 4 /**< number of nucleotides that can fit in a char */
@@ -90,17 +90,17 @@
9090
#define STRIP 3 /**< Looks for the largest N-free sequence */
9191
// trimQ only
9292
#define FRAC 3 /**< Discards a read if it contains > percent lowQ bases*/
93-
#define ENDSFRAC 4 /**< trims at the ends and discards a read if the
93+
#define ENDSFRAC 4 /**< trims at the ends and discards a read if the
9494
remaining part has more than > percent lowQ bases */
95-
#define GLOBAL 5 /**< Trims a fixed # bases from e left and right*/
95+
#define GLOBAL 5 /**< Trims a fixed # bases from e left and right*/
9696

9797
#define TREE 1 /**< Use a tree to look for contaminations*/
9898
#define SA 2 /**< Use a suffix array to look for contaminations*/
9999
#define BLOOM 3 /**< Use a bloom filter to look for contaminations*/
100100

101101
#define ERROR 1000 /**< Encodes an error when reading in trimN, trimQ, method
102102
options in trimFilter */
103-
#define DEFAULT_MINL 25 /**< Default minimum length under which we discard
103+
#define DEFAULT_MINL 25 /**< Default minimum length under which we discard
104104
the reads */
105105

106106
// Classification of filters
@@ -110,8 +110,8 @@
110110
#define NNNN 3 /**< N's presence filter */
111111
#define GOOD 4 /**< Good reads */
112112

113-
//Number of filters
114-
#define NFILTERS 4 /**< total number of filters */
113+
// Number of filters
114+
#define NFILTERS 4 /**< total number of filters */
115115

116116
#endif // endif DEFINES_H_
117117

include/init_trimFilter.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
#ifndef INIT_TRIMFILTER_H_
3131
#define INIT_TRIMFILTER_H_
3232

33+
#include <uintstd.h>
3334
#include "defines.h"
3435

35-
3636
/**
3737
* @ brief adapter struct
3838
* @ note UNFINISHED!

src/bloom.c

+25-1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,30 @@ void free_Bfilter(Bfilter * ptr_bf) {
139139
alloc_mem -= ptr_bf -> bfsizeBytes;
140140
}
141141

142+
/**
143+
* @brief initializes bloom parameters
144+
* @param kmersize number of elements of the kmer
145+
* @param hashNum number of hash functions to be computed
146+
* @return Bloom_par structure
147+
*
148+
* */
149+
Bloom_par init_bloom_par(int kmersize, int hashNum) {
150+
Bloom_par res;
151+
res.kmersizeBytes = kmersize / 4;
152+
res.halfsizeBytes = kmersize / 8;
153+
res.hangingBases = 0;
154+
res.hasOverhead = 0;
155+
res.hashNum = hashNum;
156+
if (kmersize % 8 != 0) {
157+
res.halfsizeBytes++;
158+
if ((res.hangingBases = kmersize % 4) > 0) {
159+
res.kmersizeBytes++;
160+
res.hasOverhead = 1;
161+
}
162+
}
163+
return res;
164+
}
165+
142166
/**
143167
* @brief initializes a Procs structure, given the kmersize and the
144168
* number of hash functions
@@ -448,7 +472,7 @@ bool insert_and_fetch(Bfilter *ptr_bf, Procs_kmer* procs) {
448472
int i = 0;
449473
uint64_t modValue;
450474
// iterates through hashed values adding it to the filter
451-
for (i = 0; i < procs -> hashNum; i++) {
475+
for (i = 0; i < ptr_bf -> hashNum; i++) {
452476
modValue = (procs -> hashValues[i]) % (ptr_bf -> bfsizeBits);
453477
result &= ((__sync_fetch_and_or(&(ptr_bf->filter[modValue/BITSPERCHAR]),
454478
bitMask[modValue % BITSPERCHAR]))>>(modValue % BITSPERCHAR)) & 1;

0 commit comments

Comments
 (0)