-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.ini
81 lines (76 loc) · 4.2 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# + FILES AND DIRECTORIES SPECIFICATION |
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# List of location where directories are to be created and where input data files are located.
# - HLA-Vec_embedding : directory location of where the learned distributed vector representation object
# is to be saved.
# - HLA-CNN_models: directory location where HLA-CNN models are to be saved.
# - results: directory location where predicted scores of peptide are to be saved in "inference" mode.
# - train_set : file of the training data set.
# - test_set : file of the test data set.
[FilesDirectories]
HLA-Vec_embedding = HLA-Vec_embedding
HLA-CNN_models = HLA-CNN_models
results = prediction_results
train_set = train_test_data/train_data/proteins.txt
#test_set = train_test_data/test_data/A0201_9mer_1028924
test_set = train_test_data/test_data/B2705
#test_set = train_test_data/test_data/B5701
#test_set = train_test_data/test_data/A0201_9mer_1028928
#test_set = train_test_data/test_data/B0702
#test_set = train_test_data/test_data/B2703
#test_set = train_test_data/test_data/A0201_9mer_1028790
#test_set = train_test_data/test_data/A0201_10mer_1028790
#test_set = train_test_data/test_data/A0202
#test_set = train_test_data/test_data/A0203_9mer
#test_set = train_test_data/test_data/A0203_10mer
#test_set = train_test_data/test_data/A0206_9mer
#test_set = train_test_data/test_data/A0206_10mer
#test_set = train_test_data/test_data/A6802_9mer
#test_set = train_test_data/test_data/A6802_10mer
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# + HLA-VEC: DISTRIBUTE REPRESENTATION SPECIFICATION |
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# List of parameters used by the skip-gram model to learn a distributed representation, HLA-Vec, for amino
# acids.
# - vec_dim : dimension of the vector representation.
# - min_count : ignore all amino acids with total counts lower than this threshold.
# - window_size : maximum distance between current and adjacent amino acid within a peptide sequence.
# - sg_model : defines the training algorithm. (sg_model = False), CBOW is used. Otherwise
# (sg_model = True), skip-gram is used.
# - iter : number of iterations (epochs) over the corpus.
[HLA-Vec]
vec_dim = 15
min_count = 2
window_size = 5
sg_model = True
iter = 10
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# + HLA-CNN SPECIFICATIONS |
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# List of parameters used by the HLA-CNN model implemented using Keras API on top of Theano backend.
# Optimizer hard coded to use Adams.
# - epochs : number of iterations over the training dataset.
# - lr : learning rate for the optimizer
# - filter_size : the dimensionality of the output of the filter
# - filter_length : the length of the filter kernel
# - dropout : precentage of dropout.
[HLA-CNN]
epochs = 100
lr = .004
filter_size = 32
filter_length = 7
dropout = .25
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# + PIPELINE SPECIFICATION |
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Steps to perform in the learning pipeline.
# - HLA_Vec: learns a distributed vector representation, HLA-Vec, with the Skip-gram model.
# - train : Trains the HLA-CNN classifier and save models to file
# - evaluate : make prediction and evaluate performance (SRCC and AUC) on 'test_set'
# - inference: makes prediction and output scores to file on 'test_set'
[Pipeline]
HLA_Vec = False
train = True
evaluate = True
inference = True