Skip to content

Commit

Permalink
Switch to new header API.
Browse files Browse the repository at this point in the history
  • Loading branch information
valeriuo authored and daviesrob committed Aug 19, 2019
1 parent 37c3c70 commit b44ffdf
Show file tree
Hide file tree
Showing 26 changed files with 206 additions and 1,533 deletions.
14 changes: 6 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ LZ4_LDFLAGS = -L$(LZ4DIR)



LOBJS= bam_aux.o bam.o bam_import.o sam.o \
sam_header.o bam_plbuf.o
LOBJS= bam_aux.o bam.o sam.o \
bam_plbuf.o
AOBJS= bam_index.o bam_plcmd.o sam_view.o bam_fastq.o \
bam_cat.o bam_md.o bam_reheader.o bam_sort.o bedidx.o \
bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \
Expand Down Expand Up @@ -162,7 +162,7 @@ samtools_h = samtools.h $(htslib_hts_defs_h) $(htslib_sam_h)
stats_isize_h = stats_isize.h $(htslib_khash_h)
tmp_file_h = tmp_file.h $(htslib_sam_h) $(LZ4DIR)/lz4.h

bam.o: bam.c config.h $(bam_h) $(htslib_kstring_h) sam_header.h
bam.o: bam.c config.h $(bam_h) $(htslib_kstring_h)
bam2bcf.o: bam2bcf.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h)
bam2bcf_indel.o: bam2bcf_indel.c config.h $(htslib_hts_h) $(htslib_sam_h) $(bam2bcf_h) $(htslib_khash_h) $(htslib_ksort_h)
bam2depth.o: bam2depth.c config.h $(htslib_sam_h) $(samtools_h) $(sam_opts_h)
Expand All @@ -172,13 +172,12 @@ bam_aux.o: bam_aux.c config.h $(bam_h)
bam_cat.o: bam_cat.c config.h $(htslib_bgzf_h) $(htslib_sam_h) $(htslib_cram_h) $(htslib_khash_h) $(samtools_h)
bam_color.o: bam_color.c config.h $(bam_h)
bam_fastq.o: bam_fastq.c config.h $(htslib_sam_h) $(htslib_klist_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(htslib_thread_pool_h) $(samtools_h) $(sam_opts_h)
bam_import.o: bam_import.c config.h $(htslib_kstring_h) $(bam_h) $(htslib_kseq_h)
bam_index.o: bam_index.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_h) $(samtools_h) $(sam_opts_h)
bam_lpileup.o: bam_lpileup.c config.h $(bam_plbuf_h) $(bam_lpileup_h) $(htslib_ksort_h)
bam_mate.o: bam_mate.c config.h $(htslib_thread_pool_h) $(sam_opts_h) $(htslib_kstring_h) $(htslib_sam_h) $(samtools_h)
bam_md.o: bam_md.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_thread_pool_h) $(sam_opts_h) $(samtools_h)
bam_plbuf.o: bam_plbuf.c config.h $(htslib_hts_h) $(htslib_sam_h) $(bam_plbuf_h)
bam_plcmd.o: bam_plcmd.c config.h $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) sam_header.h $(samtools_h) $(sam_opts_h) $(bam2bcf_h) $(sample_h)
bam_plcmd.o: bam_plcmd.c config.h $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(samtools_h) $(sam_opts_h) $(bam2bcf_h) $(sample_h)
bam_quickcheck.o: bam_quickcheck.c config.h $(htslib_hts_h) $(htslib_sam_h)
bam_reheader.o: bam_reheader.c config.h $(htslib_bgzf_h) $(htslib_sam_h) $(htslib_hfile_h) $(htslib_cram_h) $(samtools_h)
bam_rmdup.o: bam_rmdup.c config.h $(htslib_sam_h) $(sam_opts_h) $(samtools_h) $(bam_h) $(htslib_khash_h)
Expand All @@ -197,16 +196,15 @@ bedidx.o: bedidx.c config.h $(bedidx_h) $(htslib_ksort_h) $(htslib_kseq_h) $(hts
cut_target.o: cut_target.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h) $(samtools_h) $(sam_opts_h)
dict.o: dict.c config.h $(htslib_kseq_h) $(htslib_hts_h)
faidx.o: faidx.c config.h $(htslib_faidx_h) $(htslib_hts_h) $(htslib_hfile_h) $(htslib_kstring_h) $(samtools_h)
padding.o: padding.c config.h $(htslib_kstring_h) $(htslib_sam_h) $(htslib_faidx_h) sam_header.h $(sam_opts_h) $(samtools_h)
padding.o: padding.c config.h $(htslib_kstring_h) $(htslib_sam_h) $(htslib_faidx_h) $(sam_opts_h) $(samtools_h)
phase.o: phase.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(sam_opts_h) $(samtools_h) $(htslib_hts_os_h) $(htslib_kseq_h) $(htslib_khash_h) $(htslib_ksort_h)
sam.o: sam.c config.h $(htslib_faidx_h) $(sam_h)
sam_header.o: sam_header.c config.h sam_header.h $(htslib_khash_h)
sam_opts.o: sam_opts.c config.h $(sam_opts_h)
sam_utils.o: sam_utils.c config.h $(samtools_h)
sam_view.o: sam_view.c config.h $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_thread_pool_h) $(samtools_h) $(sam_opts_h) $(bedidx_h)
sample.o: sample.c config.h $(sample_h) $(htslib_khash_h)
stats_isize.o: stats_isize.c config.h $(stats_isize_h) $(htslib_khash_h)
stats.o: stats.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_hts_h) $(htslib_hts_defs_h) sam_header.h $(htslib_khash_str2int_h) $(samtools_h) $(htslib_khash_h) $(htslib_kstring_h) $(stats_isize_h) $(sam_opts_h) $(bedidx_h)
stats.o: stats.c config.h $(htslib_faidx_h) $(htslib_sam_h) $(htslib_hts_h) $(htslib_hts_defs_h) $(htslib_khash_str2int_h) $(samtools_h) $(htslib_khash_h) $(htslib_kstring_h) $(stats_isize_h) $(sam_opts_h) $(bedidx_h)
bam_markdup.o: bam_markdup.c config.h $(htslib_thread_pool_h) $(htslib_sam_h) $(sam_opts_h) $(samtools_h) $(htslib_khash_h) $(htslib_klist_h) $(htslib_kstring_h) $(tmp_file_h)
tmp_file.o: tmp_file.c config.h $(tmp_file_h) $(htslib_sam_h)

Expand Down
54 changes: 10 additions & 44 deletions bam.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ DEALINGS IN THE SOFTWARE. */
#include <errno.h>
#include "bam.h"
#include "htslib/kstring.h"
#include "sam_header.h"

char *bam_format1(const bam_header_t *header, const bam1_t *b)
{
Expand Down Expand Up @@ -59,7 +58,7 @@ int bam_validate1(const bam_header_t *header, const bam1_t *b)
char *s;

if (b->core.tid < -1 || b->core.mtid < -1) return 0;
if (header && (b->core.tid >= header->n_targets || b->core.mtid >= header->n_targets)) return 0;
if (header && (b->core.tid >= sam_hdr_nref(header) || b->core.mtid >= sam_hdr_nref(header))) return 0;

if (b->data_len < b->core.l_qname) return 0;
s = memchr(bam1_qname(b), '\0', b->core.l_qname);
Expand All @@ -77,60 +76,27 @@ int bam_validate1(const bam_header_t *header, const bam1_t *b)
// FIXME: we should also check the LB tag associated with each alignment
const char *bam_get_library(bam_header_t *h, const bam1_t *b)
{
// Slow and inefficient. Rewrite once we get a proper header API.
const char *rg;
char *cp = h->text;
kstring_t lib = { 0, 0, NULL };
rg = (char *)bam_aux_get(b, "RG");

if (!rg)
return NULL;
else
rg++;

// Header is guaranteed to be nul terminated, so this is valid.
while (*cp) {
char *ID, *LB;
char last = '\t';

// Find a @RG line
if (strncmp(cp, "@RG", 3) != 0) {
while (*cp && *cp != '\n') cp++; // skip line
if (*cp) cp++;
continue;
}

// Find ID: and LB: keys
cp += 4;
ID = LB = NULL;
while (*cp && *cp != '\n') {
if (last == '\t') {
if (strncmp(cp, "LB:", 3) == 0)
LB = cp+3;
else if (strncmp(cp, "ID:", 3) == 0)
ID = cp+3;
}
last = *cp++;
}

if (!ID || !LB)
continue;
if (sam_hdr_find_tag_id(h, "RG", "ID", rg, "LB", &lib) < 0)
return NULL;

// Check it's the correct ID
if (strncmp(rg, ID, strlen(rg)) != 0 || ID[strlen(rg)] != '\t')
continue;
static char LB_text[1024];
int len = lib.l < sizeof(LB_text) - 1 ? lib.l : sizeof(LB_text) - 1;

// Valid until next query
static char LB_text[1024];
for (cp = LB; *cp && *cp != '\t' && *cp != '\n'; cp++)
;
strncpy(LB_text, LB, MIN(cp-LB, 1023));
LB_text[MIN(cp-LB, 1023)] = 0;
memcpy(LB_text, lib.s, len);
LB_text[len] = 0;

// Return it; valid until the next query.
return LB_text;
}
free(lib.s);

return NULL;
return LB_text;
}

int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
Expand Down
12 changes: 1 addition & 11 deletions bam.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,6 @@ extern "C" {
*/
// int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b);

/*!
@abstract Read header information from a TAB-delimited list file.
@param fn_list file name for the list
@return a pointer to the header structure
@discussion Each line in this file consists of chromosome name and
the length of chromosome.
*/
bam_header_t *sam_header_read2(const char *fn_list);

/*!
@abstract Read header from a SAM file (if present)
@param fp SAM file handler
Expand Down Expand Up @@ -277,7 +267,7 @@ extern "C" {
@param header pointer to the header structure
@return always 0 currently
*/
static inline int bam_header_write(bamFile fp, const bam_header_t *header) { return bam_hdr_write(fp, header); }
static inline int bam_header_write(bamFile fp, bam_header_t *header) { return bam_hdr_write(fp, header); }

/*!
@abstract Read an alignment from BAM.
Expand Down
76 changes: 12 additions & 64 deletions bam_addrprg.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,20 +148,6 @@ static char *dup_substring(const char *s, const char *slim, size_t *lenp)
return ns;
}

// These are to be replaced by samtools header parser
// Extracts the first @RG line from a string.
static char* get_rg_line(const char* text, size_t* last)
{
const char* rg = text;
if (rg[0] != '@' || rg[1] != 'R' || rg[2] != 'G' ) {
if ((rg = (const char*)strstr(text,"\n@RG")) == NULL) {
return NULL;
}
rg++;//skip initial \n
}
// duplicate the line for return
return dup_substring(rg, strchr(rg, '\n'), last);
}

// Given a @RG line return the id
static char* get_rg_id(const char *line)
Expand All @@ -173,44 +159,6 @@ static char* get_rg_id(const char *line)
return dup_substring(id, strchr(id, '\t'), NULL);
}

// Confirms the existance of an RG line with a given ID in a bam header
static bool confirm_rg( const bam_hdr_t *hdr, const char* rgid )
{
assert( hdr != NULL && rgid != NULL );

const char *ptr = hdr->text;
bool found = false;
while (ptr != NULL && *ptr != '\0' && found == false ) {
size_t end = 0;
char* line = get_rg_line(ptr, &end);
if (line == NULL) break; // No more @RG
char* id;
if (((id = get_rg_id(line)) != NULL) && !strcmp(id, rgid)) {
found = true;
}
free(id);
free(line);
ptr += end;
}
return found;
}

static char* get_first_rgid( const bam_hdr_t *hdr )
{
assert( hdr != NULL );
const char *ptr = hdr->text;
char* found = NULL;
while (ptr != NULL && *ptr != '\0' && found == NULL ) {
size_t end = 0;
char* line = get_rg_line(ptr, &end);
if ( line ) {
found = get_rg_id(line);
} else break;
free(line);
ptr += end;
}
return found;
}

static void usage(FILE *fp)
{
Expand Down Expand Up @@ -395,34 +343,34 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
if (opts->rg_line) {
// Append new RG line to header.
// Check does not already exist
if ( confirm_rg(retval->output_header, opts->rg_id) ) {
kstring_t hdr_line = { 0, 0, NULL };
if (sam_hdr_find_line_id(retval->output_header, "RG", "ID", opts->rg_id, &hdr_line) == 0) {
fprintf(stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
free(hdr_line.s);
return false;
}
retval->rg_id = strdup(opts->rg_id);
size_t new_len = strlen( retval->output_header->text ) + strlen( opts->rg_line ) + 2;
char* new_header = malloc(new_len);
if (!new_header) {
fprintf(stderr, "[init] Out of memory whilst writing new header.\n");
if (-1 == sam_hdr_add_lines(retval->output_header, opts->rg_line, strlen(opts->rg_line))) {
fprintf(stderr, "[init] Error adding RG line with ID:%s to the output header.\n", opts->rg_id);
return false;
}
sprintf(new_header,"%s%s\n", retval->output_header->text, opts->rg_line);
free(retval->output_header->text);
retval->output_header->text = new_header;
retval->output_header->l_text = (int)new_len - 1;
retval->rg_id = strdup(opts->rg_id);
} else {
if (opts->rg_id) {
// Confirm what has been supplied exists
if ( !confirm_rg(retval->output_header, opts->rg_id) ) {
kstring_t hdr_line = { 0, 0, NULL };
if (sam_hdr_find_line_id(retval->output_header, "RG", "ID", opts->rg_id, &hdr_line) < 0) {
fprintf(stderr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
return false;
}
retval->rg_id = strdup(opts->rg_id);
free(hdr_line.s);
} else {
if ((retval->rg_id = get_first_rgid(retval->output_header)) == NULL ) {
kstring_t rg_id = { 0, 0, NULL };
if (sam_hdr_find_tag_id(retval->output_header, "RG", NULL, NULL, "ID", &rg_id) < 0) {
fprintf(stderr, "No RG specified on command line or in existing header.\n");
return false;
}
retval->rg_id = ks_release(&rg_id);
}
}

Expand Down
10 changes: 4 additions & 6 deletions bam_cat.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ static khash_s2i *hash_rg(const bam_hdr_t *h) {
* Returns updated header on success;
* NULL on failure.
*/
static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t *h,
static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, bam_hdr_t *h,
khash_s2i **rg2id, int *vers_maj_p, int *vers_min_p) {
int i, vers_maj = -1, vers_min = -1;
bam_hdr_t *new_h = NULL;
Expand Down Expand Up @@ -289,7 +289,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
* huffman code. In this situation we can change the meta-data in the
* compression header to renumber an RG value..
*/
int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
int cram_cat(int nfn, char * const *fn, bam_hdr_t *h, const char* outcram)
{
samFile *out;
cram_fd *out_c;
Expand All @@ -313,7 +313,6 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
cram_set_option(out_c, CRAM_OPT_VERSION, vers);
//fprintf(stderr, "Creating cram vers %s\n", vers);

cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text, new_h->l_text)); // needed?
if (sam_hdr_write(out, new_h) < 0) {
print_error_errno("cat", "Couldn't write header");
return -1;
Expand Down Expand Up @@ -404,9 +403,8 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
sam_close(in);
}
sam_close(out);

hash_s2i_free(rg2id);
bam_hdr_destroy(new_h);
hash_s2i_free(rg2id);

return 0;
}
Expand All @@ -419,7 +417,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)

#define BGZF_EMPTY_BLOCK_SIZE 28

int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
int bam_cat(int nfn, char * const *fn, bam_hdr_t *h, const char* outbam)
{
BGZF *fp, *in = NULL;
uint8_t *buf = NULL;
Expand Down
65 changes: 0 additions & 65 deletions bam_import.c

This file was deleted.

1 change: 0 additions & 1 deletion bam_plcmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ DEALINGS IN THE SOFTWARE. */
#include <htslib/kstring.h>
#include <htslib/klist.h>
#include <htslib/khash_str2int.h>
#include "sam_header.h"
#include "samtools.h"
#include "sam_opts.h"

Expand Down
Loading

0 comments on commit b44ffdf

Please sign in to comment.