forked from uci-cbcl/D-GEX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GTEx.py
executable file
·50 lines (30 loc) · 1007 Bytes
/
GTEx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
import sys
import numpy as np
import cmap.io.gct as gct
GTEx_GCTX = 'GTEx_RNASeq_RPKM_n2921x55993.gctx'
BGEDV2_LM_ID = 'bgedv2_GTEx_1000G_lm.txt'
BGEDV2_TG_ID = 'bgedv2_GTEx_1000G_tg.txt'
def main():
GTEx_gctobj = gct.GCT(GTEx_GCTX)
GTEx_gctobj.read()
GTEx_genes = map(lambda x:x.split('.')[0], GTEx_gctobj.get_rids())
lm_id = []
infile = open(BGEDV2_LM_ID)
for line in infile:
ID = line.strip('\n').split('\t')[0]
lm_id.append(ID)
infile.close()
lm_idx = map(GTEx_genes.index, lm_id)
tg_id = []
infile = open(BGEDV2_TG_ID)
for line in infile:
ID = line.strip('\n').split('\t')[0]
tg_id.append(ID)
infile.close()
tg_idx = map(GTEx_genes.index, tg_id)
genes_idx = lm_idx + tg_idx
data = GTEx_gctobj.matrix[genes_idx, :].astype('float64')
np.save('GTEx_float64.npy', data)
if __name__ == '__main__':
main()