-
Notifications
You must be signed in to change notification settings - Fork 54
/
importExcelModel.m
executable file
·928 lines (857 loc) · 32.3 KB
/
importExcelModel.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
function model=importExcelModel(fileName,removeExcMets,printWarnings,ignoreErrors)
% importExcelModel
% Imports a constraint-based model from a Excel file
%
% fileName a Microsoft Excel file to import
% removeExcMets true if exchange metabolites should be removed. This is
% needed to be able to run simulations, but it could also
% be done using simplifyModel at a later stage (optional,
% default true)
% printWarnings true if warnings should be printed (optional, default true)
% ignoreErrors true if errors should be ignored. See below for details
% (optional, default false)
%
% model
% annotation
% taxonomy String with the NCBI Taxonomy ID, as valid
% identifiers.org annotation
% defaultLB Double with the default lower bound values for reactions
% defaultUB Double with the default upper bound values for reactions
% givenName String with the name of the main model author
% familyName String with the surname of the main model author
% email String with the e-mail address of the main model author
% organization String with the organization of the main model author
% note String with additional comments about the model
% name name of model
% id model ID
% rxns reaction ids
% mets metabolite ids
% S stoichiometric matrix
% lb lower bounds
% ub upper bounds
% rev reversibility vector
% c objective coefficients
% b equality constraints for the metabolite equations
% comps compartment ids
% compNames compartment names
% compOutside the id (as in comps) for the compartment
% surrounding each of the compartments
% compMiriams structure with MIRIAM information about the
% compartments
% rxnNames reaction name
% rxnComps compartments for reactions
% grRules reaction to gene rules in text form
% rxnGeneMat reaction-to-gene mapping in sparse matrix form
% subSystems subsystem name for each reaction
% eccodes EC-codes for the reactions
% rxnMiriams structure with MIRIAM information about the reactions
% rxnNotes reaction notes
% rxnReferences reaction references
% rxnConfidenceScores reaction confidence scores
% genes list of all genes
% geneComps compartments for genes
% geneMiriams structure with MIRIAM information about the genes
% geneShortNames gene alternative names (e.g. ERG10)
% metNames metabolite name
% metComps compartments for metabolites
% inchis InChI-codes for metabolites
% metFormulas metabolite chemical formula
% metMiriams structure with MIRIAM information about the metabolites
% metCharges metabolite charge
% unconstrained true if the metabolite is an exchange metabolite
%
% Loads models in the RAVEN Toolbox Excel format. A number of consistency
% checks are performed in order to ensure that the model is valid. These
% can be ignored by putting ignoreErrors to true. However, this is highly
% advised against, as it can result in errors in simulations or other
% functionalities. The RAVEN Toolbox is made to function only on consistent
% models, and the only checks performed are when the model is imported.
%
% NOTE: Most errors are checked for by checkModelStruct, but some
% are checked for in this function as well. Those are ones which relate
% to missing model elements and so on, and which would make it impossible
% to construct the model structure. Those errors cannot be ignored by
% setting ignoreErrors to true.
%
% Usage: model=importExcelModel(fileName,removeExcMets,printWarnings,ignoreErrors)
fileName=char(fileName);
if nargin<2
removeExcMets=true;
end
if nargin<3
printWarnings=true;
end
if nargin<4
ignoreErrors=false;
end
if ~isfile(fileName)
error('Excel file %s cannot be found',string(fileName));
end
%This is to match the order of the fields to those you get from importing
%from SBML
model=[];
model.id=[];
model.name=[];
model.annotation=[];
%Default bounds if not defined
model.annotation.defaultLB=-1000;
model.annotation.defaultUB=1000;
model.rxns={};
model.mets={};
model.S=[];
model.lb=[];
model.ub=[];
model.rev=[];
model.c=[];
model.b=[];
model.comps={};
model.compNames={};
model.compOutside={};
model.compMiriams={};
model.rxnNames={};
model.rxnComps={}; %Will be double later
model.grRules={};
model.rxnGeneMat=[];
model.subSystems={};
model.eccodes={};
model.rxnMiriams={};
model.rxnNotes={};
model.rxnReferences={};
model.rxnConfidenceScores={}; %Will be double later
model.genes={};
model.geneComps={}; %Will be double later
model.geneMiriams={};
model.geneShortNames={};
model.metNames={};
model.metComps=[];
model.inchis={};
model.metFormulas={};
model.metMiriams={};
model.metCharges={}; %Will be double later
model.unconstrained=[];
workbook=loadWorkbook(fileName);
[raw, flag]=loadSheet(workbook,'MODEL');
if flag<0
if printWarnings==true
EM='Could not load the MODEL sheet';
dispEM(EM,false);
end
model.id='UNKNOWN';
model.name='No model details available';
else
raw=cleanSheet(raw);
%It is assumed that the first line is labels and that the second one is
%info
raw(1,:)=upper(raw(1,:));
raw(1,:)=strrep(raw(1,:),'MODELID','ID');
raw(1,:)=strrep(raw(1,:),'MODELNAME','NAME');
raw(1,:)=strrep(raw(1,:),'DESCRIPTION','NAME');
%Loop through the labels
for i=1:numel(raw(1,:))
switch raw{1,i}
case 'ID'
if any(raw{2,i})
model.id=toStr(raw{2,i}); %Should be string already
else
EM='No model ID supplied';
dispEM(EM);
end
case 'NAME'
if any(raw{2,i})
model.name=toStr(raw{2,i}); %Should be string already
else
EM='No model name supplied';
dispEM(EM);
end
case 'DEFAULT LOWER'
if ~isempty(raw{2,i})
try
model.annotation.defaultLB=toDouble(raw{2,i},NaN);
catch
EM='DEFAULT LOWER must be numeric';
dispEM(EM);
end
else
if printWarnings==true
fprintf('NOTE: DEFAULT LOWER not supplied. Uses -1000\n');
end
model.annotation.defaultLB=-1000;
end
case 'DEFAULT UPPER'
if ~isempty(raw{2,i})
try
model.annotation.defaultUB=toDouble(raw{2,i},NaN);
catch
EM='DEFAULT UPPER must be numeric';
dispEM(EM);
end
else
if printWarnings==true
fprintf('NOTE: DEFAULT UPPER not supplied. Uses 1000\n');
end
model.annotation.defaultUB=1000;
end
case 'TAXONOMY'
if any(raw{2,i})
model.annotation.taxonomy=toStr(raw{2,i}); %Should be string already
end
case 'CONTACT GIVEN NAME'
if any(raw{2,i})
model.annotation.givenName=toStr(raw{2,i}); %Should be string already
end
case 'CONTACT FAMILY NAME'
if any(raw{2,i})
model.annotation.familyName=toStr(raw{2,i}); %Should be string already
end
case 'CONTACT EMAIL'
if any(raw{2,i})
model.annotation.email=toStr(raw{2,i}); %Should be string already
end
case 'ORGANIZATION'
if any(raw{2,i})
model.annotation.organization=toStr(raw{2,i}); %Should be string already
end
case 'NOTES'
if any(raw{2,i})
model.annotation.note=toStr(raw{2,i}); %Should be string already
end
end
end
end
%Get compartment information
[raw, flag]=loadSheet(workbook,'COMPS');
if flag<0
if printWarnings==true
EM='Could not load the COMPS sheet. All elements will be assigned to a compartment "s" for "System"';
dispEM(EM,false);
end
model.comps={'s'};
model.compNames={'System'};
else
raw=cleanSheet(raw);
%Map to new captions
raw(1,:)=upper(raw(1,:));
raw(1,:)=strrep(raw(1,:),'COMPABBREV','ABBREVIATION');
raw(1,:)=strrep(raw(1,:),'COMPNAME','NAME');
%Loop through the labels
for i=1:numel(raw(1,:))
switch raw{1,i}
case 'ABBREVIATION'
model.comps=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'NAME'
model.compNames=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'INSIDE'
model.compOutside=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'MIRIAM'
model.compMiriams=raw(2:end,i);
end
end
%Check that necessary fields are loaded
if isempty(model.comps)
EM='There must be a column named ABBREVIATION in the COMPS sheet';
dispEM(EM);
end
if isempty(model.compNames)
model.compNames=model.comps;
if printWarnings==true
EM='There is no column named NAME in the COMPS sheet. ABBREVIATION will be used as name';
dispEM(EM,false);
end
end
model.compMiriams=parseMiriam(model.compMiriams);
end
%Get all the genes and info about them
[raw, flag]=loadSheet(workbook,'GENES');
if flag<0
if printWarnings==true
EM='There is no spreadsheet named GENES';
dispEM(EM,false)
end
else
raw=cleanSheet(raw);
%Map to new captions
raw(1,:)=upper(raw(1,:));
raw(1,:)=strrep(raw(1,:),'GENE NAME','NAME');
%Loop through the labels
foundGenes=false;
for i=1:numel(raw(1,:))
switch raw{1,i}
case 'NAME'
model.genes=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
foundGenes=true;
case 'MIRIAM'
model.geneMiriams=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'SHORT NAME'
model.geneShortNames=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'COMPARTMENT'
model.geneComps=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
end
end
if foundGenes==false
EM='There must be a column named NAME in the GENES sheet';
dispEM(EM);
end
%Its ok if all of them are empty
if all(cellfun(@isempty,model.geneComps))
model.geneComps=[];
end
%Check that geneName contain only strings and no empty strings
if ~iscellstr(model.genes)
EM='All gene names have to be strings';
dispEM(EM);
else
if any(strcmp('',model.genes))
EM='There can be no empty strings in gene names';
dispEM(EM);
end
end
%Check that geneComp contains only strings and no empty string
if ~isempty(model.geneComps)
if ~iscellstr(model.geneComps)
EM='All gene compartments have to be strings';
dispEM(EM);
else
if any(strcmp('',model.geneComps))
EM='There can be no empty strings in gene compartments';
dispEM(EM);
end
end
[I, model.geneComps]=ismember(model.geneComps,model.comps);
EM='The following genes have compartment abbreviations which could not be found:';
dispEM(EM,true,model.genes(~I));
end
end
model.geneMiriams=parseMiriam(model.geneMiriams);
%Loads the reaction data
[raw, flag]=loadSheet(workbook,'RXNS');
if flag<0
EM='Could not load the RXNS sheet';
dispEM(EM);
end
raw=cleanSheet(raw);
%Map to new captions
raw(1,:)=upper(raw(1,:));
raw(1,:)=strrep(raw(1,:),'RXNID','ID');
%Loop through the labels
equations={};
reactionReplacement={};
for i=1:numel(raw(1,:))
switch raw{1,i}
case 'ID'
model.rxns=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'NAME'
model.rxnNames=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'EQUATION'
equations=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'EC-NUMBER'
model.eccodes=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'GENE ASSOCIATION'
model.grRules=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'LOWER BOUND'
try
model.lb=cellfun(@(x) toDouble(x,NaN),raw(2:end,i));
catch
EM='The lower bounds must be numerical values';
dispEM(EM);
end
case 'UPPER BOUND'
try
model.ub=cellfun(@(x) toDouble(x,NaN),raw(2:end,i));
catch
EM='The upper bounds must be numerical values';
dispEM(EM);
end
case 'OBJECTIVE'
try
model.c=cellfun(@(x) toDouble(x,0),raw(2:end,i));
catch
EM='The objective coefficients must be numerical values';
dispEM(EM);
end
case 'COMPARTMENT'
model.rxnComps=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'SUBSYSTEM'
subsystems=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'REPLACEMENT ID'
reactionReplacement=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'MIRIAM'
model.rxnMiriams=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'NOTE'
model.rxnNotes=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'REFERENCE'
model.rxnReferences=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'CONFIDENCE SCORE'
model.rxnConfidenceScores=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
end
end
if ~isempty(model.rxnConfidenceScores)
model.rxnConfidenceScores=str2double(model.rxnConfidenceScores);
end
for i=1:numel(subsystems)
model.subSystems{i,1}=cellstr(strsplit(subsystems{i,1},';'));
end
%Check that all necessary reaction info has been loaded
if isempty(equations)
EM='There must be a column named EQUATION in the RXNS sheet';
dispEM(EM);
end
if isempty(model.rxns)
if printWarnings==true
EM='There is no column named ID in the RXNS sheet. The reactions will be named as "r1", "r2"...';
dispEM(EM,false);
end
I=num2cell((1:numel(equations))');
model.rxns=strcat('r',cellfun(@num2str,I,'UniformOutput',false));
end
%Check if some other stuff is loaded and populate with default values
%otherwise
if isempty(model.rxnNames)
model.rxnNames=cell(numel(model.rxns),1);
model.rxnNames(:)={''};
if printWarnings==true
EM='There is no column named NAME in the RXNS sheet. Empty strings will be used as reaction names';
dispEM(EM,false);
end
end
if isempty(model.lb)
%This is not set here since the reversibility isn't known yet
model.lb=nan(numel(model.rxns),1);
if printWarnings==true
EM='There is no column named LOWER BOUND in the RXNS sheet. Default bounds will be used';
dispEM(EM,false);
end
end
if isempty(model.ub)
model.ub=nan(numel(model.rxns),1);
if printWarnings==true
EM='There is no column named UPPER BOUND in the RXNS sheet. Default bounds will be used';
dispEM(EM,false);
end
end
if isempty(model.c)
model.c=zeros(numel(model.rxns),1);
if printWarnings==true
EM='There is no column named OBJECTIVE in the RXNS sheet';
dispEM(EM,false);
end
end
%Either all reactions must have a compartment string or none of them. Check
%if it's only empty and if so return it to []
if ~isempty(model.rxnComps)
if all(cellfun(@isempty,model.rxnComps))
model.rxnComps=[];
end
end
%Construct the rxnMiriams structure
model.rxnMiriams=parseMiriam(model.rxnMiriams);
%Replace the reaction IDs for those IDs that have a corresponding
%replacement name.
I=cellfun(@any,reactionReplacement);
model.rxns(I)=reactionReplacement(I);
%Check that there are no empty strings in reactionIDs or equations
if any(strcmp('',model.rxns))
EM='There are empty reaction IDs';
dispEM(EM);
end
if any(strcmp('',equations))
EM='There are empty equations';
dispEM(EM);
end
if ~isempty(model.rxnComps)
if any(strcmp('',model.rxnComps))
EM='Either all reactions must have an associated compartment string or none of them';
dispEM(EM);
end
end
if ~isempty(model.grRules)
tempRules=model.grRules;
for i=1:length(model.rxns)
%Check that all gene associations have a match in the gene list
if ~isempty(model.grRules{i})
tempRules{i}=regexprep(tempRules{i},' and | or ','>'); %New format: Genes are separated 'and' and 'or' strings with parentheses
tempRules{i}=regexprep(tempRules{i},'(',''); %New format: Genes are separated 'and' and 'or' strings with parentheses
tempRules{i}=regexprep(tempRules{i},')',''); %New format: Genes are separated 'and' and 'or' strings with parentheses
indexesNew=strfind(tempRules{i},'>'); %Old format: Genes are separated by ":" for AND and ";" for OR
indexes=strfind(tempRules{i},':'); %Old format: Genes are separated by ":" for AND and ";" for OR
indexes=unique([indexesNew indexes strfind(tempRules{i},';')]);
if isempty(indexes)
%See if you have a match
I=find(strcmp(tempRules{i},model.genes));
if isempty(I)
EM=['The gene association in reaction ' model.rxns{i} ' (' tempRules{i} ') is not present in the gene list'];
dispEM(EM);
end
else
temp=[0 indexes numel(tempRules{i})+1];
for j=1:numel(indexes)+1
%The reaction has several associated genes
geneName=tempRules{i}(temp(j)+1:temp(j+1)-1);
I=find(strcmp(geneName,model.genes));
if isempty(I)
EM=['The gene association in reaction ' model.rxns{i} ' (' geneName ') is not present in the gene list'];
dispEM(EM);
end
end
end
%In order to adhere to the COBRA standards it should be like
%this: -If only one gene then no parentheses -If only "and" or
%only "or" there should only be one set of parentheses -If both
%"and" and "or", then split on "or". This is not complete, but
%it's the type of relationship supported by the Excel
%formulation
aSign=strfind(model.grRules{i},':');
oSign=strfind(model.grRules{i},';');
if isempty(aSign) && isempty(oSign)
model.grRules{i}=model.grRules{i};
else
if isempty(aSign)
model.grRules{i}=['(' strrep(model.grRules{i},';',' or ') ')'];
else
if isempty(oSign)
model.grRules{i}=['(' strrep(model.grRules{i},':',' and ') ')'];
else
model.grRules{i}=['((' strrep(model.grRules{i},';',') or (') '))'];
model.grRules{i}=strrep(model.grRules{i},':',' and ');
end
end
end
end
end
end
%Check that the compartment for each reaction can be found
if ~isempty(model.rxnComps)
[I, model.rxnComps]=ismember(model.rxnComps,model.comps);
EM='The following reactions have compartment abbreviations which could not be found:';
dispEM(EM,true,model.rxns(~I));
end
%Get all the metabolites and info about them
[raw, flag]=loadSheet(workbook,'METS');
if flag<0
if printWarnings==true
EM='There is no spreadsheet named METS. The metabolites will be named "m1", "m2"... and assigned to the first compartment';
dispEM(EM,false);
end
%Parse the equations to find out how many metabolites there are
metsForParsing=parseRxnEqu(equations);
I=num2cell((1:numel(metsForParsing))');
model.mets=strcat('m',cellfun(@num2str,I,'UniformOutput',false));
model.metComps=ones(numel(model.mets),1);
model.unconstrained=zeros(numel(model.mets),1);
model.metNames=metsForParsing;
else
raw=cleanSheet(raw);
%Map to new captions
raw(1,:)=upper(raw(1,:));
raw(1,:)=strrep(raw(1,:),'METID','ID');
raw(1,:)=strrep(raw(1,:),'METNAME','NAME');
%Loop through the labels
metReplacement={};
for i=1:numel(raw(1,:))
switch raw{1,i}
case 'ID'
model.mets=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'NAME'
model.metNames=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'UNCONSTRAINED'
model.unconstrained=cellfun(@boolToDouble,raw(2:end,i));
%NaN is returned if the values couldn't be parsed
EM='The UNCONSTRAINED property for the following metabolites must be "true"/"false", 1/0, TRUE/FALSE or not set:';
dispEM(EM,true,model.mets(isnan(model.unconstrained)));
case 'MIRIAM'
model.metMiriams=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'COMPOSITION'
model.metFormulas=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'INCHI'
model.inchis=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'COMPARTMENT'
model.metComps=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
%Check that all metabolites have compartments defined
if any(strcmp('',model.metComps))
EM='All metabolites must have an associated compartment string';
dispEM(EM);
end
case 'REPLACEMENT ID'
metReplacement=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
case 'CHARGE'
model.metCharges=cellfun(@toStr,raw(2:end,i),'UniformOutput',false);
end
end
%Check that necessary fields are loaded (METID)
if isempty(model.mets)
EM='There must be a column named ID in the METS sheet';
dispEM(EM);
end
%Check that some other stuff is loaded and use default values otherwise
if isempty(model.metNames)
model.metNames=cell(numel(model.mets),1);
if printWarnings==true
EM='There is no column named NAME in the METS sheet. ID will be used as name';
dispEM(EM,false);
end
end
if isempty(model.unconstrained)
model.unconstrained=zeros(numel(model.mets),1);
if printWarnings==true
EM='There is no column named UNCONSTRAINED in the METS sheet. All metabolites will be constrained';
dispEM(EM,false);
end
end
if isempty(model.metComps)
model.metComps=cell(numel(model.mets),1);
model.metComps(:)=model.comps(1);
if printWarnings==true
EM='There is no column named COMPARTMENT in the METS sheet. All metabolites will be assigned to the first compartment in COMPS. Note that RAVEN makes extensive use of metabolite names and compartments. Some features will therefore not function correctly if metabolite compartments are not correctly assigned';
dispEM(EM,false);
end
end
%The composition should be loaded from InChIs when available
I=find(~cellfun(@isempty,model.inchis));
for i=1:numel(I)
S=regexp(model.inchis(I(i)),'/','split');
S=S{1};
if numel(S)>=2
%Don't copy if it doesn't look good
model.metFormulas(I(i))=S(2);
end
end
%Check that the compartment for each metabolite can be found. Also
%convert from id to index
[I, model.metComps]=ismember(model.metComps,model.comps);
EM='The following metabolites have compartment abbreviations which could not be found:';
dispEM(EM,true,model.mets(~I));
%Check that the model.mets vector is unique. The problem is that the
%checkModelStruct cannot check for that since only the metReplacements
%(if used) end up in the model structure
I=false(numel(model.mets),1);
[J, K]=unique(model.mets);
if numel(J)~=numel(model.mets)
L=1:numel(model.mets);
L(K)=[];
I(L)=true;
end
EM='The following metabolites are duplicates:';
dispEM(EM,~ignoreErrors,model.mets(I));
%Check that there are no metabolite IDs which are numbers. This would
%give errors when parsing the equations
I=cellfun(@str2double,model.mets);
EM='The following metabolites have names which cannot be distinguished from numbers:';
dispEM(EM,~ignoreErrors,model.mets(~isnan(I)));
I=cellfun(@str2double,metReplacement);
EM='The following metabolites have names which cannot be distinguished from numbers:';
dispEM(EM,~ignoreErrors,metReplacement(~isnan(I)));
%Replace the metabolite IDs for those IDs that have a corresponding
%replacement metabolite. This is not used for matching, but will be
%checked for consistency with SBML naming conventions
metsForParsing=model.mets; %This is because the equations are written with this
I=cellfun(@any,metReplacement);
model.mets(I)=metReplacement(I);
%If the metabolite name isn't set, replace it with the metabolite id
I=~cellfun(@any,model.metNames);
model.metNames(I)=model.mets(I);
%Construct the metMiriams structure
model.metMiriams=parseMiriam(model.metMiriams);
%Either all metabolites have charge or none of them. Check if it's only
%empty and if so return it to []
if ~isempty(model.metCharges)
if all(cellfun(@isempty,model.metCharges))
model.metCharges=[];
end
end
if ~isempty(model.metCharges)
model.metCharges=str2double(model.metCharges);
end
end
%Everything seems fine with the metabolite IDs, compartments, genes, and
%reactions
%Parse the equations
[model.S, mets, badRxns, model.rev]=constructS(equations,metsForParsing,model.rxns);
model.rev=model.rev*1; %Typecast to double
%Add default constraints
model.lb(isnan(model.lb))=model.annotation.defaultLB.*model.rev(isnan(model.lb));
model.ub(isnan(model.ub))=model.annotation.defaultUB;
%Reorder the S matrix so that it fits with the metabolite list in the
%structure
[~, I]=ismember(mets,metsForParsing);
model.S=model.S(I,:);
%Print warnings about the reactions which contain the same metabolite as
%both reactants and products
EM='The following reactions have metabolites which are present more than once. Only the net reactions will be exported:';
dispEM(EM,false,model.rxns(badRxns));
model.b=zeros(numel(model.mets),1);
%Fix grRules and reconstruct rxnGeneMat
[grRules,rxnGeneMat] = standardizeGrRules(model,true);
model.grRules = grRules;
model.rxnGeneMat = rxnGeneMat;
%Remove unused fields
if all(cellfun(@isempty,model.compOutside))
model=rmfield(model,'compOutside');
end
if all(cellfun(@isempty,model.compMiriams))
model=rmfield(model,'compMiriams');
end
if all(cellfun(@isempty,model.rxnNames))
model=rmfield(model,'rxnNames');
end
if isempty(model.rxnComps)
model=rmfield(model,'rxnComps');
end
if all(cellfun(@isempty,model.grRules))
model=rmfield(model,'grRules');
end
if isfield(model,'rxnGeneMat') && isempty(model.rxnGeneMat)
model=rmfield(model,'rxnGeneMat');
end
if all(cellfun(@isempty,model.subSystems))
model=rmfield(model,'subSystems');
end
if all(cellfun(@isempty,model.eccodes))
model=rmfield(model,'eccodes');
end
if all(cellfun(@isempty,model.rxnMiriams))
model=rmfield(model,'rxnMiriams');
end
if all(cellfun(@isempty,model.rxnNotes))
model=rmfield(model,'rxnNotes');
end
if all(cellfun(@isempty,model.rxnReferences))
model=rmfield(model,'rxnReferences');
end
if isempty(model.rxnConfidenceScores)
model=rmfield(model,'rxnConfidenceScores');
end
if isempty(model.genes)
model=rmfield(model,'genes');
end
if isempty(model.geneComps)
model=rmfield(model,'geneComps');
end
if isempty(model.geneMiriams)
model=rmfield(model,'geneMiriams');
end
if all(cellfun(@isempty,model.geneShortNames))
model=rmfield(model,'geneShortNames');
end
if all(cellfun(@isempty,model.inchis))
model=rmfield(model,'inchis');
end
if all(cellfun(@isempty,model.metFormulas))
model=rmfield(model,'metFormulas');
end
if all(cellfun(@isempty,model.metMiriams))
model=rmfield(model,'metMiriams');
end
if isempty(model.metCharges)
model=rmfield(model,'metCharges');
end
%The model structure has now been reconstructed but it can still contain
%many types of errors. The checkModelConsistency function is used to make
%sure that naming and mapping of stuff looks good
checkModelStruct(model,~ignoreErrors);
if removeExcMets==true
model=simplifyModel(model);
end
end
function miriamStruct=parseMiriam(strings,miriamStruct)
%Gets the names and values of Miriam-string. Nothing fancy at all, just to
%prevent using the same code for metabolites, genes, and reactions. The
%function also allows for supplying a miriamStruct and the info will then
%be added
if nargin<2
miriamStruct=cell(numel(strings),1);
end
for i=1:numel(strings)
if any(strings{i})
%A Miriam string can be several ids separated by ";". Each id is
%"name(..:..)/value"; an old format when value is separated by
%colon is also supported
I=regexp(strings{i},';','split');
if isfield(miriamStruct{i},'name')
startIndex=numel(miriamStruct{i}.name);
miriamStruct{i}.name=[miriamStruct{i}.name;cell(numel(I),1)];
miriamStruct{i}.value=[miriamStruct{i}.value;cell(numel(I),1)];
else
startIndex=0;
miriamStruct{i}.name=cell(numel(I),1);
miriamStruct{i}.value=cell(numel(I),1);
end
for j=1:numel(I)
if any(strfind(I{j},'/'))
index=max(strfind(I{j},'/'));
elseif any(strfind(I{j},':'))
index=max(strfind(I{j},':'));
end
if exist('index','var') & any(index)
miriamStruct{i}.name{startIndex+j}=I{j}(1:index-1);
miriamStruct{i}.value{startIndex+j}=I{j}(index+1:end);
else
EM=['"' I{j} '" is not a valid MIRIAM string. The format must be "identifier/value" or identifier:value'];
dispEM(EM);
end
end
end
end
end
%For converting a value to string. This is used instead of num2str because
%I want to convert empty cells to {''}.
function y=toStr(x)
%x can be empty, numerical, string or boolean. It cannot be NaN. Boolean
%values will be converted to '1'/'0'
if isempty(x)
y='';
else
y=num2str(x);
end
end
%For converting to numeric. This is used instead of str2num because I want
%to be able to choose what empty values should be mapped to.
%
% default the value to use for empty input
function y=toDouble(x,default)
if isempty(x) %Note that this catches '' as well
y=default;
else
if isnumeric(x)
y=x;
else
y=str2double(x);
%This happens if the input couldn't be converted. Note that the
%input itself cannot be NaN since it was fixed in clean imported
if isnan(y)
EM=['Cannot convert the string "' x '" to double'];
dispEM(EM);
end
end
end
end
%For converting boolean (the UNCONSTRAINED field) to double (the
%model.unconstrained field)
function y=boolToDouble(x)
if isempty(x)
y=0;
return;
end
if islogical(x)
y=x*1; %Typecast to double
return;
end
if isnumeric(x)
if x~=0
y=1;
return;
else
y=0;
return;
end
end
if ischar(x)
if strcmpi(x,'TRUE')
y=1;
return;
end
if strcmpi(x,'FALSE')
y=0;
return;
end
end
y=NaN; %This means that the input couldn't be parsed
end