forked from richarddurbin/pbwt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pbwtSample.c
137 lines (111 loc) · 4.23 KB
/
pbwtSample.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/* File: pbwtSample.c
* Author: Richard Durbin ([email protected])
* Copyright (C) Genome Research Limited, 2013
*-------------------------------------------------------------------
* Description: functions for samples and populations
* Exported functions:
* HISTORY:
* Last edited: Sep 22 23:07 2014 (rd)
* * Sep 22 23:07 2014 (rd): move to 64 bit arrays
* Created: Sat Nov 2 18:42:07 2013 (rd)
*-------------------------------------------------------------------
*/
#include "pbwt.h"
/* globals */
static DICT *sampleDict ;
static DICT *populationDict ;
static Array samples ;
/* functions */
void sampleInit (void)
{
sampleDict = dictCreate (4096) ;
populationDict = dictCreate (64) ;
samples = arrayCreate (4096, Sample) ;
array(samples,0,Sample).nameD = 0 ; /* so that all read samples are non-zero */
}
void sampleDestroy (void)
{
if (sampleDict) dictDestroy(sampleDict);
if (populationDict) dictDestroy(populationDict);
if (samples) arrayDestroy(samples);
}
int sampleAdd (char *name, char *father, char *mother, char *pop)
{
int k ;
if (dictAdd (sampleDict, name, &k))
arrayp(samples, k, Sample)->nameD = k ;
return k ;
}
Sample *sample (PBWT *p, int i)
{
i = arr(p->samples, i, int) ;
if (i >= arrayMax(samples))
die ("sample index %d out of range %ld", i, arrayMax(samples)) ;
return arrp(samples,i,Sample) ;
}
char* sampleName (Sample *s) { return dictName (sampleDict, s->nameD) ; }
char* popName (Sample *s) { return dictName (populationDict, s->popD) ; }
PBWT *pbwtSubSample (PBWT *pOld, Array select)
/* select[i] is the position in old of the i'th position in new */
{
if (!pOld || !pOld->yz) die ("subSample called without valid pbwt") ;
PBWT *pNew = pbwtCreate (arrayMax(select), pOld->N) ;
int i, j, nOld = 0 ;
uchar *x = myalloc (pNew->M, uchar) ;
int *ainv = myalloc (pOld->M, int) ;
PbwtCursor *uOld = pbwtCursorCreate (pOld, TRUE, TRUE) ;
PbwtCursor *uNew = pbwtCursorCreate (pNew, TRUE, TRUE) ;
for (i = 0 ; i < pOld->N ; ++i)
{ for (j = 0 ; j < pOld->M ; ++j) ainv[uOld->a[j]] = j ;
for (j = 0 ; j < pNew->M ; ++j) x[j] = uOld->y[ainv[arr(select,j,int)]] ;
for (j = 0 ; j < pNew->M ; ++j) uNew->y[j] = x[uNew->a[j]] ;
pbwtCursorWriteForwards (uNew) ;
pbwtCursorForwardsRead (uOld) ;
}
pbwtCursorToAFend (uNew, pNew) ;
/* need to do this also for missing */
if (pOld->samples)
{ pNew->samples = arrayCreate (pNew->M, int) ;
for (j = 0 ; j < pNew->M ; ++j)
array(pNew->samples,j,int) = arr(pOld->samples,arr(select,j,int),int) ;
}
pNew->chrom = pOld->chrom ; pOld->chrom = 0 ;
pNew->sites = pOld->sites ; pOld->sites = 0 ;
pbwtDestroy (pOld) ; /* destroy will free old samples */
free(x) ; free(ainv) ; pbwtCursorDestroy (uOld) ; pbwtCursorDestroy (uNew) ;
return pNew ;
}
PBWT *pbwtSubSampleInterval (PBWT *pOld, int start, int Mnew)
{
if (start < 0 || Mnew <= 0 || start + Mnew > pOld->M)
die ("bad start %d, Mnew %d in subsample", start, Mnew) ;
int i ;
Array select = arrayCreate (Mnew, int) ;
for (i = 0 ; i < Mnew ; ++i)
array(select, i, int) = start + i ;
PBWT *pNew = pbwtSubSample (pOld, select) ;
arrayDestroy (select) ;
return pNew ;
}
PBWT *pbwtSelectSamples (PBWT *pOld, FILE *fp)
{
if (!pOld || !pOld->samples) die ("pbwtSelectSamples called without pre-existing sample names") ;
int i, j ;
Array newSamples = pbwtReadSamplesFile (fp) ;
if (!arrayMax(newSamples)) return pOld ;
Array oldStart = arrayCreate (arrayMax(samples), int) ; /* start of each sample in old */
Array oldCount = arrayCreate (arrayMax(samples), int) ; /* how many of each sample in old */
for (i = 0 ; i < pOld->M ; ++i)
{ if (!array(oldCount, arr(pOld->samples,i,int), int))
array(oldStart, arr(pOld->samples,i,int), int) = i ;
++arr(oldCount, arr(pOld->samples,i,int), int) ;
}
Array select = arrayCreate (pOld->M, int) ;
for (i = 0 ; i < arrayMax(newSamples) ; ++i)
for (j = 0 ; j < array(oldCount,arr(newSamples,i,int),int) ; ++j)
array(select,arrayMax(select),int) = arr(oldStart,arr(newSamples,i,int),int)++ ;
PBWT *pNew = pbwtSubSample (pOld, select) ;
arrayDestroy (select) ; arrayDestroy (oldCount) ; arrayDestroy (oldStart) ;
return pNew ;
}
/******************* end of file ****************/