-
Notifications
You must be signed in to change notification settings - Fork 0
/
generarSetsSF.py
82 lines (78 loc) · 2.33 KB
/
generarSetsSF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 15 16:36:43 2016
creates the 5 sets to use CVF with the LFF method
@author: Chris
"""
import random
#load all proteins and their classification
f1 = open("sources/superfamily_level/selectedProteins-superfamilyUmbral50.txt","r")
lines = f1.readlines()
f1.close()
#name and class
allProteins = list()
#create list with both
for x in lines:
y = x.split()[0]
name = y[1:len(y)]
z = x.split()[1]
#class_only = z.split('.')[0]
#fold_only = z.split('.')[1]
#fold = class_only+"."+fold_only
pair = list()
pair.append(name)
pair.append(z)
allProteins.append(pair)
f2 = file("sources/superfamily_level/lff1_U50.txt","w")
f3 = file("sources/superfamily_level/lff2_U50.txt","w")
f4 = file("sources/superfamily_level/lff3_U50.txt","w")
f5 = file("sources/superfamily_level/lff4_U50.txt","w")
f6 = file("sources/superfamily_level/lff5_U50.txt","w")
for i in range(847):#quantity of selected proteins
#file 2
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f2.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
#file 3
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f3.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
#file 4
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f4.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
#file 5
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f5.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
#file 6
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f6.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
#file 2
idx = random.randint(0,len(allProteins)-1)
protein = allProteins[idx]
name = protein[0]
superfamily = protein[1]
f2.write(">%s %s\n"%(name,superfamily))
del allProteins[idx]
f2.close()
f3.close()
f4.close()
f5.close()
f6.close()