diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html
index 20e67bc1..5d08bbe7 100644
--- a/doc/external/kegg/getKEGGModelForOrganism.html
+++ b/doc/external/kegg/getKEGGModelForOrganism.html
@@ -755,693 +755,702 @@
SOURCE CODE %If no FASTA file is supplied, then just remove all genes which are not for
0440
0441 if isempty(fastaFile)
-0442 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']);
-0443 if ismember(organismID,{'eukaryotes','prokaryotes'})
-0444 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1);
-0445 if strcmp(organismID,'eukaryotes')
-0446 proxyid='hsa';
-0447
-0448 else
-0449 proxyid='eco';
-0450
-0451 end
-0452 [~, phylDistId]=ismember(proxyid,phylDists.ids);
-0453 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:)));
-0454 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false);
-0455 I=ismember(upper(taxIDs),upper(idsToKeep));
-0456 else
-0457
-0458 organismID=strcat(organismID,':');
-0459
-0460 if length(organismID)==4
-0461 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes);
-0462 elseif length(organismID)==5
-0463 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes);
-0464 end
-0465 end
-0466
-0467 model.genes=model.genes(I);
-0468 model.rxnGeneMat=model.rxnGeneMat(:,I);
-0469 fprintf('COMPLETE\n');
-0470 end
-0471
-0472
-0473 if keepSpontaneous==true
-0474 fprintf('Removing non-spontaneous reactions without GPR rules... ');
-0475 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous');
-0476 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
-0477 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous));
-0478 else
-0479 fprintf('Removing reactions without GPR rules... ');
-0480 I=~any(model.rxnGeneMat,2);
-0481 end
-0482 model=removeReactions(model,I,true);
-0483 fprintf('COMPLETE\n');
-0484
-0485
-0486 fprintf('Fixing gene names in the model... ');
-0487
-0488 model.genes=regexprep(model.genes,'^\w+?:','');
-0489 fprintf('COMPLETE\n');
-0490
-0491
-0492 if isempty(fastaFile)
-0493
-0494 fprintf('Constructing GPR associations and annotations for the model... ');
-0495 model.grRules=cell(numel(model.rxns),1);
-0496 model.grRules(:)={''};
-0497
-0498 for i=1:numel(model.rxns)
-0499
-0500 I=find(model.rxnGeneMat(i,:));
-0501 if any(I)
-0502 model.grRules{i}=['(' model.genes{I(1)}];
-0503 for j=2:numel(I)
-0504 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
-0505 end
-0506 model.grRules{i}=[model.grRules{i} ')'];
-0507 end
-0508 end
-0509
-0510 [grRules,rxnGeneMat] = standardizeGrRules(model);
-0511 model.grRules = grRules;
-0512 model.rxnGeneMat = rxnGeneMat;
-0513
-0514
-0515 model.geneMiriams='';
-0516 for i=1:numel(model.genes)
-0517 model.geneMiriams{i,1}.name{1,1}='kegg.genes';
-0518 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1});
-0519 end
-0520
-0521 for i=1:numel(model.rxns)
-0522 if ~isempty(model.rxnNotes{i})
-0523 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i));
-0524 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
-0525 else
-0526 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'};
-0527 end
+0442
+0443
+0444 phylDistsFull=getPhylDist(fullfile(dataDir,'keggdb'),true);
+0445 if ~ismember(organismID,[phylDistsFull.ids 'eukaryotes' 'prokaryotes'])
+0446 EM='Provided organismID is incorrect. Only species abbreviations from KEGG Species List or "eukaryotes"/"prokaryotes" are allowed.';
+0447 disp(EM);
+0448 error('Fatal error occured. See the details above');
+0449 end
+0450
+0451 fprintf(['Pruning the model from <strong>non-' organismID '</strong> genes... ']);
+0452 if ismember(organismID,{'eukaryotes','prokaryotes'})
+0453 phylDists=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1);
+0454 if strcmp(organismID,'eukaryotes')
+0455 proxyid='hsa';
+0456
+0457 else
+0458 proxyid='eco';
+0459
+0460 end
+0461 [~, phylDistId]=ismember(proxyid,phylDists.ids);
+0462 idsToKeep=phylDists.ids(~isinf(phylDists.distMat(phylDistId,:)));
+0463 taxIDs=cellfun(@(x) x{1},cellfun(@(x) strsplit(x,':'),model.genes,'UniformOutput',false),'UniformOutput',false);
+0464 I=ismember(upper(taxIDs),upper(idsToKeep));
+0465 else
+0466
+0467 organismID=strcat(organismID,':');
+0468
+0469 if length(organismID)==4
+0470 I=cellfun(@(x) strcmpi(x(1:4),organismID),model.genes);
+0471 elseif length(organismID)==5
+0472 I=cellfun(@(x) strcmpi(x(1:5),organismID),model.genes);
+0473 end
+0474 end
+0475
+0476 model.genes=model.genes(I);
+0477 model.rxnGeneMat=model.rxnGeneMat(:,I);
+0478 fprintf('COMPLETE\n');
+0479 end
+0480
+0481
+0482 if keepSpontaneous==true
+0483 fprintf('Removing non-spontaneous reactions without GPR rules... ');
+0484 load(fullfile(ravenPath,'external','kegg','keggRxns.mat'),'isSpontaneous');
+0485 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
+0486 spontRxnsWithGenes=model.rxns(any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous));
+0487 else
+0488 fprintf('Removing reactions without GPR rules... ');
+0489 I=~any(model.rxnGeneMat,2);
+0490 end
+0491 model=removeReactions(model,I,true);
+0492 fprintf('COMPLETE\n');
+0493
+0494
+0495 fprintf('Fixing gene names in the model... ');
+0496
+0497 model.genes=regexprep(model.genes,'^\w+?:','');
+0498 fprintf('COMPLETE\n');
+0499
+0500
+0501 if isempty(fastaFile)
+0502
+0503 fprintf('Constructing GPR associations and annotations for the model... ');
+0504 model.grRules=cell(numel(model.rxns),1);
+0505 model.grRules(:)={''};
+0506
+0507 for i=1:numel(model.rxns)
+0508
+0509 I=find(model.rxnGeneMat(i,:));
+0510 if any(I)
+0511 model.grRules{i}=['(' model.genes{I(1)}];
+0512 for j=2:numel(I)
+0513 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
+0514 end
+0515 model.grRules{i}=[model.grRules{i} ')'];
+0516 end
+0517 end
+0518
+0519 [grRules,rxnGeneMat] = standardizeGrRules(model);
+0520 model.grRules = grRules;
+0521 model.rxnGeneMat = rxnGeneMat;
+0522
+0523
+0524 model.geneMiriams='';
+0525 for i=1:numel(model.genes)
+0526 model.geneMiriams{i,1}.name{1,1}='kegg.genes';
+0527 model.geneMiriams{i,1}.value{1,1}=strcat(lower(organismID),model.genes{i,1});
0528 end
-0529 fprintf('COMPLETE\n\n');
-0530 fprintf('*** Model reconstruction complete ***\n');
-0531 return;
-0532 end
-0533
-0534
-0535 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1);
-0536 [~, phylDistId]=ismember(model.id,phylDistStruct.ids);
-0537
-0538
-0539
-0540
-0541 if isinf(maxPhylDist) || maxPhylDist==-1
-0542 maxPhylDist=1000;
-0543 end
-0544
-0545
-0546
-0547 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa'));
-0548 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa'));
-0549 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw'));
-0550 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm'));
-0551 outFiles=listFiles(fullfile(outDir,'*.out'));
-0552
-0553
-0554
-0555
-0556 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]);
-0557
-0558 if ~isempty(missingFASTA)
-0559 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file')
-0560 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n'];
-0561 dispEM(EM);
-0562 end
-0563
-0564 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true);
-0565
-0566
-0567 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns');
-0568 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta'));
-0569 else
-0570 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n');
-0571 end
-0572
-0573 if isunix
-0574 if ismac
-0575 binEnd='.mac';
-0576 else
-0577 binEnd='';
-0578 end
-0579 elseif ispc
-0580 binEnd='';
-0581 else
-0582 EM='Unknown OS, exiting.';
-0583 disp(EM);
-0584 return
-0585 end
-0586
-0587
-0588 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]);
-0589 if ~isempty(missingAligned)
-0590 if seqIdentity==-1
-0591 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... ');
-0592 else
-0593 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... ');
-0594 end
-0595 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned)));
-0596 progressFlag=0;
-0597
-0598
-0599 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa'));
-0600
-0601 for i=1:numel(missingAligned)
-0602
-0603
-0604
-0605 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&...
-0606 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file')
-0607
-0608
-0609
-0610 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file')
-0611 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist'];
-0612 dispEM(EM,false);
-0613 continue;
-0614 end
-0615
-0616
-0617
-0618 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
-0619 if s.bytes<=0
-0620 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w');
-0621 fclose(fid);
+0529
+0530 for i=1:numel(model.rxns)
+0531 if ~isempty(model.rxnNotes{i})
+0532 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (without HMMs).',model.rxnNotes(i));
+0533 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
+0534 else
+0535 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (without HMMs)'};
+0536 end
+0537 end
+0538 fprintf('COMPLETE\n\n');
+0539 fprintf('*** Model reconstruction complete ***\n');
+0540 return;
+0541 end
+0542
+0543
+0544 phylDistStruct=getPhylDist(fullfile(dataDir,'keggdb'),maxPhylDist==-1);
+0545 [~, phylDistId]=ismember(model.id,phylDistStruct.ids);
+0546
+0547
+0548
+0549
+0550 if isinf(maxPhylDist) || maxPhylDist==-1
+0551 maxPhylDist=1000;
+0552 end
+0553
+0554
+0555
+0556 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa'));
+0557 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa'));
+0558 alignedWorking=listFiles(fullfile(dataDir,'aligned','*.faw'));
+0559 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm'));
+0560 outFiles=listFiles(fullfile(outDir,'*.out'));
+0561
+0562
+0563
+0564
+0565 missingFASTA=setdiff(KOModel.rxns,[fastaFiles;alignedFiles;hmmFiles;outFiles]);
+0566
+0567 if ~isempty(missingFASTA)
+0568 if ~exist(fullfile(dataDir,'keggdb','genes.pep'),'file')
+0569 EM=['The file ''genes.pep'' cannot be located at ' strrep(dataDir,'\','/') '/ and should be downloaded from the KEGG FTP.\n'];
+0570 dispEM(EM);
+0571 end
+0572
+0573 fastaModel=removeReactions(KOModel,setdiff(KOModel.rxns,missingFASTA),true,true);
+0574
+0575
+0576 fastaModel=permuteModel(fastaModel,randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(fastaModel.rxns)),'rxns');
+0577 constructMultiFasta(fastaModel,fullfile(dataDir,'keggdb','genes.pep'),fullfile(dataDir,'fasta'));
+0578 else
+0579 fprintf('Generating the KEGG Orthology specific multi-FASTA files... COMPLETE\n');
+0580 end
+0581
+0582 if isunix
+0583 if ismac
+0584 binEnd='.mac';
+0585 else
+0586 binEnd='';
+0587 end
+0588 elseif ispc
+0589 binEnd='';
+0590 else
+0591 EM='Unknown OS, exiting.';
+0592 disp(EM);
+0593 return
+0594 end
+0595
+0596
+0597 missingAligned=setdiff(KOModel.rxns,[alignedFiles;hmmFiles;alignedWorking;outFiles]);
+0598 if ~isempty(missingAligned)
+0599 if seqIdentity==-1
+0600 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... ');
+0601 else
+0602 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... ');
+0603 end
+0604 missingAligned=missingAligned(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingAligned)));
+0605 progressFlag=0;
+0606
+0607
+0608 fastaFiles=listFiles(fullfile(dataDir,'fasta','*.fa'));
+0609
+0610 for i=1:numel(missingAligned)
+0611
+0612
+0613
+0614 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&...
+0615 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file')
+0616
+0617
+0618
+0619 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file')
+0620 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist'];
+0621 dispEM(EM,false);
0622 continue;
0623 end
0624
-0625
-0626
-0627 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w');
-0628 fclose(fid);
-0629
-0630
-0631
-0632
-0633
-0634
-0635 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
-0636 phylDist=inf(numel(fastaStruct),1);
-0637 for j=1:numel(fastaStruct)
-0638
-0639 index=strfind(fastaStruct(j).Header,':');
-0640 if any(index)
-0641 abbrev=fastaStruct(j).Header(1:index(1)-1);
-0642 [~, index]=ismember(abbrev,phylDistStruct.ids);
-0643 if any(index)
-0644 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId);
-0645 end
-0646 end
-0647 end
-0648
-0649
-0650 phylDist(phylDist>maxPhylDist)=[];
-0651
-0652
-0653 [~, order]=sort(phylDist);
-0654
-0655
-0656 if nSequences<=numel(fastaStruct)
-0657 fastaStruct=fastaStruct(order(1:nSequences));
-0658 else
-0659 fastaStruct=fastaStruct(order);
-0660 end
-0661
-0662
-0663
-0664 if numel(fastaStruct)>1
-0665 if seqIdentity==0.9
-0666 cdhitInp100=tempname;
-0667 fastawrite(cdhitInp100,fastaStruct);
-0668 cdhitInp90=tempname;
-0669 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']);
-0670 if status~=0
-0671 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0672 dispEM(EM);
-0673 end
-0674
-0675 if exist(cdhitInp100, 'file')
-0676 delete([cdhitInp100 '*']);
-0677 end
-0678 tmpFile=tempname;
-0679 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' tmpFile '" -c 0.9 -n 5 -M 2000 -aL 0.8']);
-0680 if status~=0
-0681 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0682 dispEM(EM);
-0683 end
-0684
-0685 if exist(cdhitInp90, 'file')
-0686 delete([cdhitInp90 '*']);
-0687 end
-0688 elseif seqIdentity==0.5
-0689 cdhitInp100=tempname;
-0690 fastawrite(cdhitInp100,fastaStruct);
-0691 cdhitInp90=tempname;
-0692 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']);
-0693 if status~=0
-0694 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0695 dispEM(EM);
+0625
+0626
+0627 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
+0628 if s.bytes<=0
+0629 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w');
+0630 fclose(fid);
+0631 continue;
+0632 end
+0633
+0634
+0635
+0636 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w');
+0637 fclose(fid);
+0638
+0639
+0640
+0641
+0642
+0643
+0644 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
+0645 phylDist=inf(numel(fastaStruct),1);
+0646 for j=1:numel(fastaStruct)
+0647
+0648 index=strfind(fastaStruct(j).Header,':');
+0649 if any(index)
+0650 abbrev=fastaStruct(j).Header(1:index(1)-1);
+0651 [~, index]=ismember(abbrev,phylDistStruct.ids);
+0652 if any(index)
+0653 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId);
+0654 end
+0655 end
+0656 end
+0657
+0658
+0659 phylDist(phylDist>maxPhylDist)=[];
+0660
+0661
+0662 [~, order]=sort(phylDist);
+0663
+0664
+0665 if nSequences<=numel(fastaStruct)
+0666 fastaStruct=fastaStruct(order(1:nSequences));
+0667 else
+0668 fastaStruct=fastaStruct(order);
+0669 end
+0670
+0671
+0672
+0673 if numel(fastaStruct)>1
+0674 if seqIdentity==0.9
+0675 cdhitInp100=tempname;
+0676 fastawrite(cdhitInp100,fastaStruct);
+0677 cdhitInp90=tempname;
+0678 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']);
+0679 if status~=0
+0680 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0681 dispEM(EM);
+0682 end
+0683
+0684 if exist(cdhitInp100, 'file')
+0685 delete([cdhitInp100 '*']);
+0686 end
+0687 tmpFile=tempname;
+0688 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' tmpFile '" -c 0.9 -n 5 -M 2000 -aL 0.8']);
+0689 if status~=0
+0690 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0691 dispEM(EM);
+0692 end
+0693
+0694 if exist(cdhitInp90, 'file')
+0695 delete([cdhitInp90 '*']);
0696 end
-0697
-0698 if exist(cdhitInp100, 'file')
-0699 delete([cdhitInp100 '*']);
-0700 end
-0701 cdhitInp50=tempname;
-0702 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' cdhitInp50 '" -c 0.9 -n 5 -M 2000 -aL 0.8']);
-0703 if status~=0
-0704 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0705 dispEM(EM);
-0706 end
-0707
-0708 if exist(cdhitInp90, 'file')
-0709 delete([cdhitInp90 '*']);
-0710 end
-0711 tmpFile=tempname;
-0712 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp50 '" -o "' tmpFile '" -c 0.5 -n 3 -M 2000 -aL 0.8']);
-0713 if status~=0
-0714 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0715 dispEM(EM);
-0716 end
-0717
-0718 if exist(cdhitInp50, 'file')
-0719 delete([cdhitInp50 '*']);
-0720 end
-0721 elseif seqIdentity~=-1
-0722 cdhitInpCustom=tempname;
-0723 fastawrite(cdhitInpCustom,fastaStruct);
-0724 tmpFile=tempname;
-0725 if seqIdentity<=1 && seqIdentity>0.7
-0726 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 5 -M 2000']);
-0727 elseif seqIdentity>0.6
-0728 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 4 -M 2000']);
-0729 elseif seqidentity>0.5
-0730 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 3 -M 2000']);
-0731 elseif seqidentity>0.4
-0732 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 2 -M 2000']);
-0733 else
-0734 EM='The provided seqIdentity must be between 0 and 1\n';
-0735 dispEM(EM);
-0736 end
-0737 if status~=0
-0738 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0739 dispEM(EM);
-0740 end
-0741
-0742 if exist(cdhitInpCustom, 'file')
-0743 delete([cdhitInpCustom '*']);
-0744 end
-0745 else
-0746
-0747
-0748 tmpFile=tempname;
-0749 fastawrite(tmpFile,fastaStruct);
-0750 end
-0751
-0752 if ismac
-0753 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
-0754 elseif isunix
-0755 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
-0756 elseif ispc
-0757 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-win','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
-0758 end
-0759 if status~=0
-0760
-0761
-0762
-0763
-0764 if any(regexp(output,'Only 1 sequence found'))
-0765 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f');
-0766 else
-0767 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output];
-0768 dispEM(EM);
-0769 end
-0770 end
-0771
-0772 if exist(tmpFile, 'file')
-0773 delete([tmpFile '*']);
-0774 end
-0775 else
-0776
-0777
-0778
-0779
-0780 if numel(fastaStruct)==1
-0781 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct);
-0782 end
-0783 end
-0784
-0785 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f');
-0786
-0787
-0788
-0789 if progressFlag==0 && i>numel(missingAligned)*0.25
-0790 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100);
-0791 progressFlag=progressFlag+1;
-0792 elseif (progressFlag==1 && i>=numel(missingAligned)*0.5) || (progressFlag==2 && i>=numel(missingAligned)*0.75)
-0793 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100);
-0794 progressFlag=progressFlag+1;
-0795 end
-0796 end
-0797 end
-0798 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0799 else
-0800 if seqIdentity==-1
-0801 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
-0802 else
-0803 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
-0804 end
-0805 end
-0806
-0807
-0808 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]);
-0809 if ~isempty(missingHMMs)
-0810 fprintf('Training the KEGG Orthology specific HMMs... ');
-0811 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs)));
-0812 progressFlag=0;
-0813
-0814
-0815 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa'));
-0816
-0817 for i=1:numel(missingHMMs)
-0818
-0819
-0820 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file')
-0821
-0822
-0823
-0824
-0825
-0826 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file')
-0827 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist'];
-0828 dispEM(EM,false);
-0829 continue;
-0830 end
-0831
-0832
-0833
-0834 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']));
-0835 if s.bytes<=0
-0836 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w');
-0837 fclose(fid);
+0697 elseif seqIdentity==0.5
+0698 cdhitInp100=tempname;
+0699 fastawrite(cdhitInp100,fastaStruct);
+0700 cdhitInp90=tempname;
+0701 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp100 '" -o "' cdhitInp90 '" -c 1.0 -n 5 -M 2000']);
+0702 if status~=0
+0703 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0704 dispEM(EM);
+0705 end
+0706
+0707 if exist(cdhitInp100, 'file')
+0708 delete([cdhitInp100 '*']);
+0709 end
+0710 cdhitInp50=tempname;
+0711 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp90 '" -o "' cdhitInp50 '" -c 0.9 -n 5 -M 2000 -aL 0.8']);
+0712 if status~=0
+0713 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0714 dispEM(EM);
+0715 end
+0716
+0717 if exist(cdhitInp90, 'file')
+0718 delete([cdhitInp90 '*']);
+0719 end
+0720 tmpFile=tempname;
+0721 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInp50 '" -o "' tmpFile '" -c 0.5 -n 3 -M 2000 -aL 0.8']);
+0722 if status~=0
+0723 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0724 dispEM(EM);
+0725 end
+0726
+0727 if exist(cdhitInp50, 'file')
+0728 delete([cdhitInp50 '*']);
+0729 end
+0730 elseif seqIdentity~=-1
+0731 cdhitInpCustom=tempname;
+0732 fastawrite(cdhitInpCustom,fastaStruct);
+0733 tmpFile=tempname;
+0734 if seqIdentity<=1 && seqIdentity>0.7
+0735 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 5 -M 2000']);
+0736 elseif seqIdentity>0.6
+0737 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 4 -M 2000']);
+0738 elseif seqidentity>0.5
+0739 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 3 -M 2000']);
+0740 elseif seqidentity>0.4
+0741 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n 2 -M 2000']);
+0742 else
+0743 EM='The provided seqIdentity must be between 0 and 1\n';
+0744 dispEM(EM);
+0745 end
+0746 if status~=0
+0747 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0748 dispEM(EM);
+0749 end
+0750
+0751 if exist(cdhitInpCustom, 'file')
+0752 delete([cdhitInpCustom '*']);
+0753 end
+0754 else
+0755
+0756
+0757 tmpFile=tempname;
+0758 fastawrite(tmpFile,fastaStruct);
+0759 end
+0760
+0761 if ismac
+0762 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
+0763 elseif isunix
+0764 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
+0765 elseif ispc
+0766 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-win','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
+0767 end
+0768 if status~=0
+0769
+0770
+0771
+0772
+0773 if any(regexp(output,'Only 1 sequence found'))
+0774 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f');
+0775 else
+0776 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output];
+0777 dispEM(EM);
+0778 end
+0779 end
+0780
+0781 if exist(tmpFile, 'file')
+0782 delete([tmpFile '*']);
+0783 end
+0784 else
+0785
+0786
+0787
+0788
+0789 if numel(fastaStruct)==1
+0790 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct);
+0791 end
+0792 end
+0793
+0794 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f');
+0795
+0796
+0797
+0798 if progressFlag==0 && i>numel(missingAligned)*0.25
+0799 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100);
+0800 progressFlag=progressFlag+1;
+0801 elseif (progressFlag==1 && i>=numel(missingAligned)*0.5) || (progressFlag==2 && i>=numel(missingAligned)*0.75)
+0802 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.fa')))/numel(fastaFiles))*100);
+0803 progressFlag=progressFlag+1;
+0804 end
+0805 end
+0806 end
+0807 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0808 else
+0809 if seqIdentity==-1
+0810 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
+0811 else
+0812 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
+0813 end
+0814 end
+0815
+0816
+0817 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]);
+0818 if ~isempty(missingHMMs)
+0819 fprintf('Training the KEGG Orthology specific HMMs... ');
+0820 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs)));
+0821 progressFlag=0;
+0822
+0823
+0824 alignedFiles=listFiles(fullfile(dataDir,'aligned','*.fa'));
+0825
+0826 for i=1:numel(missingHMMs)
+0827
+0828
+0829 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file')
+0830
+0831
+0832
+0833
+0834
+0835 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file')
+0836 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist'];
+0837 dispEM(EM,false);
0838 continue;
0839 end
-0840
-0841
-0842 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w');
-0843 fclose(fid);
-0844
-0845
-0846 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']);
-0847 if status~=0
-0848 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output];
-0849 dispEM(EM);
-0850 end
-0851
-0852
-0853 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']));
-0854
-0855
-0856
-0857 if progressFlag==0 && i>numel(missingHMMs)*0.25
-0858 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100);
-0859 progressFlag=progressFlag+1;
-0860 elseif (progressFlag==1 && i>=numel(missingHMMs)*0.5) || (progressFlag==2 && i>=numel(missingHMMs)*0.75)
-0861 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100);
-0862 progressFlag=progressFlag+1;
-0863 end
-0864 end
-0865 end
-0866 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0867 else
-0868 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n');
-0869 end
-0870
-0871
-0872
-0873 missingOUT=setdiff(KOModel.rxns,outFiles);
-0874 if ~isempty(missingOUT)
-0875 fprintf(['Querying <strong>' strrep(fastaFile,'\','/') '</strong> against the KEGG Orthology specific HMMs... ']);
-0876 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT)));
-0877 progressFlag=0;
-0878
-0879
-0880 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm'));
-0881 for i=1:numel(missingOUT)
-0882
-0883
-0884 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file')
-0885
-0886
-0887
-0888 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file')
-0889 EM=['The HMM file for ' missingOUT{i} ' does not exist'];
-0890 dispEM(EM,false);
-0891 continue;
-0892 end
-0893
-0894
-0895
-0896 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
-0897 fclose(fid);
-0898
-0899
-0900 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']));
-0901 if s.bytes<=0
-0902 continue;
-0903 end
-0904
-0905
-0906 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']);
-0907 if status~=0
-0908 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output];
-0909 dispEM(EM);
-0910 end
-0911
-0912
-0913 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
-0914 fwrite(fid,output);
-0915 fclose(fid);
-0916
-0917
-0918
-0919 if progressFlag==0 && i>numel(missingOUT)*0.25
-0920 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100);
-0921 progressFlag=progressFlag+1;
-0922 elseif (progressFlag==1 && i>=numel(missingOUT)*0.5) || (progressFlag==2 && i>=numel(missingOUT)*0.75)
-0923 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100);
-0924 progressFlag=progressFlag+1;
-0925 end
-0926 end
-0927 end
-0928 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0929 else
-0930 fprintf(['Querying <strong>' strrep(fastaFile,'\','/') '</strong> against the KEGG Orthology specific HMMs... COMPLETE\n']);
-0931 end
-0932
-0933
-0934
-0935
-0936 fprintf('Parsing the HMM search results... ');
-0937
-0938 koGeneMat=zeros(numel(KOModel.rxns),3000);
-0939 genes=cell(3000,1);
-0940
-0941
-0942 hTable = java.util.Hashtable;
-0943
-0944 geneCounter=0;
-0945 for i=1:numel(KOModel.rxns)
-0946 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file')
-0947 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r');
-0948 beginMatches=false;
-0949 while 1
-0950
-0951 tline = fgetl(fid);
-0952
-0953
-0954 if ~ischar(tline)
-0955 break;
-0956 end
-0957
-0958 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------'))
-0959 break;
-0960 end
+0840
+0841
+0842
+0843 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']));
+0844 if s.bytes<=0
+0845 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w');
+0846 fclose(fid);
+0847 continue;
+0848 end
+0849
+0850
+0851 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w');
+0852 fclose(fid);
+0853
+0854
+0855 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']);
+0856 if status~=0
+0857 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output];
+0858 dispEM(EM);
+0859 end
+0860
+0861
+0862 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']));
+0863
+0864
+0865
+0866 if progressFlag==0 && i>numel(missingHMMs)*0.25
+0867 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100);
+0868 progressFlag=progressFlag+1;
+0869 elseif (progressFlag==1 && i>=numel(missingHMMs)*0.5) || (progressFlag==2 && i>=numel(missingHMMs)*0.75)
+0870 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(dataDir,'*.hmm')))/numel(alignedFiles))*100);
+0871 progressFlag=progressFlag+1;
+0872 end
+0873 end
+0874 end
+0875 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0876 else
+0877 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n');
+0878 end
+0879
+0880
+0881
+0882 missingOUT=setdiff(KOModel.rxns,outFiles);
+0883 if ~isempty(missingOUT)
+0884 fprintf(['Querying <strong>' strrep(fastaFile,'\','/') '</strong> against the KEGG Orthology specific HMMs... ']);
+0885 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT)));
+0886 progressFlag=0;
+0887
+0888
+0889 hmmFiles=listFiles(fullfile(dataDir,'hmms','*.hmm'));
+0890 for i=1:numel(missingOUT)
+0891
+0892
+0893 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file')
+0894
+0895
+0896
+0897 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file')
+0898 EM=['The HMM file for ' missingOUT{i} ' does not exist'];
+0899 dispEM(EM,false);
+0900 continue;
+0901 end
+0902
+0903
+0904
+0905 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
+0906 fclose(fid);
+0907
+0908
+0909 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']));
+0910 if s.bytes<=0
+0911 continue;
+0912 end
+0913
+0914
+0915 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']);
+0916 if status~=0
+0917 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output];
+0918 dispEM(EM);
+0919 end
+0920
+0921
+0922 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
+0923 fwrite(fid,output);
+0924 fclose(fid);
+0925
+0926
+0927
+0928 if progressFlag==0 && i>numel(missingOUT)*0.25
+0929 fprintf('%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100);
+0930 progressFlag=progressFlag+1;
+0931 elseif (progressFlag==1 && i>=numel(missingOUT)*0.5) || (progressFlag==2 && i>=numel(missingOUT)*0.75)
+0932 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%*.*f%% complete',5,2,(numel(listFiles(fullfile(outDir,'*.out')))/numel(hmmFiles))*100);
+0933 progressFlag=progressFlag+1;
+0934 end
+0935 end
+0936 end
+0937 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0938 else
+0939 fprintf(['Querying <strong>' strrep(fastaFile,'\','/') '</strong> against the KEGG Orthology specific HMMs... COMPLETE\n']);
+0940 end
+0941
+0942
+0943
+0944
+0945 fprintf('Parsing the HMM search results... ');
+0946
+0947 koGeneMat=zeros(numel(KOModel.rxns),3000);
+0948 genes=cell(3000,1);
+0949
+0950
+0951 hTable = java.util.Hashtable;
+0952
+0953 geneCounter=0;
+0954 for i=1:numel(KOModel.rxns)
+0955 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file')
+0956 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r');
+0957 beginMatches=false;
+0958 while 1
+0959
+0960 tline = fgetl(fid);
0961
-0962 if beginMatches==false
-0963
-0964 if any(strfind(tline,'E-value '))
-0965
-0966 tline = fgetl(fid);
-0967 beginMatches=true;
-0968 end
-0969 else
-0970
-0971 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
-0972 elements=regexp(tline,' ','split');
-0973 elements=elements(cellfun(@any,elements));
-0974
-0975
-0976 score=str2double(elements{1});
-0977 gene=elements{9};
-0978 if score<=cutOff
-0979
-0980
-0981 if score==0
-0982 score=10^-250;
-0983 end
-0984
-0985
-0986 I=hTable.get(gene);
-0987 if any(I)
-0988 koGeneMat(i,I)=score;
-0989 else
-0990 geneCounter=geneCounter+1;
-0991
-0992 hTable.put(gene,geneCounter);
-0993 genes{geneCounter}=gene;
-0994 koGeneMat(i,geneCounter)=score;
-0995 end
-0996 end
-0997 else
-0998 break;
-0999 end
-1000 end
-1001 end
-1002 fclose(fid);
-1003 end
-1004 end
-1005 fprintf('COMPLETE\n');
-1006
-1007 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... ');
-1008 koGeneMat=koGeneMat(:,1:geneCounter);
-1009
-1010
-1011 for i=1:size(koGeneMat,1)
-1012 J=find(koGeneMat(i,:));
-1013 if any(J)
-1014 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0;
-1015 end
-1016 end
-1017
-1018
-1019 for i=1:size(koGeneMat,2)
-1020 J=find(koGeneMat(:,i));
-1021 if any(J)
-1022 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0;
-1023 end
-1024 end
-1025 fprintf('COMPLETE\n');
+0962
+0963 if ~ischar(tline)
+0964 break;
+0965 end
+0966
+0967 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------'))
+0968 break;
+0969 end
+0970
+0971 if beginMatches==false
+0972
+0973 if any(strfind(tline,'E-value '))
+0974
+0975 tline = fgetl(fid);
+0976 beginMatches=true;
+0977 end
+0978 else
+0979
+0980 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
+0981 elements=regexp(tline,' ','split');
+0982 elements=elements(cellfun(@any,elements));
+0983
+0984
+0985 score=str2double(elements{1});
+0986 gene=elements{9};
+0987 if score<=cutOff
+0988
+0989
+0990 if score==0
+0991 score=10^-250;
+0992 end
+0993
+0994
+0995 I=hTable.get(gene);
+0996 if any(I)
+0997 koGeneMat(i,I)=score;
+0998 else
+0999 geneCounter=geneCounter+1;
+1000
+1001 hTable.put(gene,geneCounter);
+1002 genes{geneCounter}=gene;
+1003 koGeneMat(i,geneCounter)=score;
+1004 end
+1005 end
+1006 else
+1007 break;
+1008 end
+1009 end
+1010 end
+1011 fclose(fid);
+1012 end
+1013 end
+1014 fprintf('COMPLETE\n');
+1015
+1016 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... ');
+1017 koGeneMat=koGeneMat(:,1:geneCounter);
+1018
+1019
+1020 for i=1:size(koGeneMat,1)
+1021 J=find(koGeneMat(i,:));
+1022 if any(J)
+1023 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0;
+1024 end
+1025 end
1026
-1027 fprintf('Adding gene annotations to the model... ');
-1028
-1029 model.genes=genes(1:geneCounter);
-1030 model.grRules=cell(numel(model.rxns),1);
-1031 model.grRules(:)={''};
-1032 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
-1033
-1034
-1035 for i=1:numel(model.rxns)
-1036 if isstruct(model.rxnMiriams{i})
-1037
-1038 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'));
-1039 KOs=model.rxnMiriams{i}.value(I);
-1040
-1041 J=ismember(KOModel.rxns,KOs);
-1042 [~, K]=find(koGeneMat(J,:));
-1043
-1044 if any(K)
-1045 model.rxnGeneMat(i,K)=1;
-1046
-1047
-1048 L=sum(koGeneMat(J,:),2)==0;
-1049 model.rxnMiriams{i}.value(I(L))=[];
-1050 model.rxnMiriams{i}.name(I(L))=[];
-1051 end
-1052 end
-1053 end
-1054 fprintf('COMPLETE\n');
-1055
-1056
-1057
-1058
-1059
-1060
-1061
-1062
-1063 if keepSpontaneous==true
-1064
-1065
-1066
-1067
-1068
-1069 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... ');
-1070 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
-1071 model=removeReactions(model,I,true,true);
-1072 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes);
-1073 model=removeReactions(model,I,true,true);
-1074 else
-1075
-1076
-1077 fprintf('Removing reactions which after HMM search no longer have GPR rules... ');
-1078 I=~any(model.rxnGeneMat,2);
-1079 model=removeReactions(model,I,true,true);
-1080 end
-1081 fprintf('COMPLETE\n');
-1082
-1083 fprintf('Constructing GPR rules and finalizing the model... ');
-1084
-1085 for i=1:numel(model.rxns)
-1086
-1087 I=find(model.rxnGeneMat(i,:));
-1088 if any(I)
-1089 model.grRules{i}=['(' model.genes{I(1)}];
-1090 for j=2:numel(I)
-1091 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
-1092 end
-1093 model.grRules{i}=[model.grRules{i} ')'];
-1094 end
-1095 end
-1096
-1097
-1098 [grRules,rxnGeneMat] = standardizeGrRules(model,false);
-1099 model.grRules = grRules;
-1100 model.rxnGeneMat = rxnGeneMat;
-1101
-1102
-1103 for i=1:numel(model.rxns)
-1104 if ~isempty(model.rxnNotes{i})
-1105 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i));
-1106 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
-1107 else
-1108 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'};
-1109 end
-1110 end
-1111
-1112 delete(fastaFile)
-1113 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n');
-1114 end
-1115
-1116 function files=listFiles(directory)
-1117
-1118
-1119 temp=dir(directory);
-1120 files=cell(numel(temp),1);
-1121 for i=1:numel(temp)
-1122 files{i}=temp(i,1).name;
+1027
+1028 for i=1:size(koGeneMat,2)
+1029 J=find(koGeneMat(:,i));
+1030 if any(J)
+1031 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0;
+1032 end
+1033 end
+1034 fprintf('COMPLETE\n');
+1035
+1036 fprintf('Adding gene annotations to the model... ');
+1037
+1038 model.genes=genes(1:geneCounter);
+1039 model.grRules=cell(numel(model.rxns),1);
+1040 model.grRules(:)={''};
+1041 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
+1042
+1043
+1044 for i=1:numel(model.rxns)
+1045 if isstruct(model.rxnMiriams{i})
+1046
+1047 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'));
+1048 KOs=model.rxnMiriams{i}.value(I);
+1049
+1050 J=ismember(KOModel.rxns,KOs);
+1051 [~, K]=find(koGeneMat(J,:));
+1052
+1053 if any(K)
+1054 model.rxnGeneMat(i,K)=1;
+1055
+1056
+1057 L=sum(koGeneMat(J,:),2)==0;
+1058 model.rxnMiriams{i}.value(I(L))=[];
+1059 model.rxnMiriams{i}.name(I(L))=[];
+1060 end
+1061 end
+1062 end
+1063 fprintf('COMPLETE\n');
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072 if keepSpontaneous==true
+1073
+1074
+1075
+1076
+1077
+1078 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... ');
+1079 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
+1080 model=removeReactions(model,I,true,true);
+1081 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes);
+1082 model=removeReactions(model,I,true,true);
+1083 else
+1084
+1085
+1086 fprintf('Removing reactions which after HMM search no longer have GPR rules... ');
+1087 I=~any(model.rxnGeneMat,2);
+1088 model=removeReactions(model,I,true,true);
+1089 end
+1090 fprintf('COMPLETE\n');
+1091
+1092 fprintf('Constructing GPR rules and finalizing the model... ');
+1093
+1094 for i=1:numel(model.rxns)
+1095
+1096 I=find(model.rxnGeneMat(i,:));
+1097 if any(I)
+1098 model.grRules{i}=['(' model.genes{I(1)}];
+1099 for j=2:numel(I)
+1100 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
+1101 end
+1102 model.grRules{i}=[model.grRules{i} ')'];
+1103 end
+1104 end
+1105
+1106
+1107 [grRules,rxnGeneMat] = standardizeGrRules(model,false);
+1108 model.grRules = grRules;
+1109 model.rxnGeneMat = rxnGeneMat;
+1110
+1111
+1112 for i=1:numel(model.rxns)
+1113 if ~isempty(model.rxnNotes{i})
+1114 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i));
+1115 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
+1116 else
+1117 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'};
+1118 end
+1119 end
+1120
+1121 delete(fastaFile)
+1122 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n');
1123 end
-1124 files=strrep(files,'.fa','');
-1125 files=strrep(files,'.hmm','');
-1126 files=strrep(files,'.out','');
-1127 files=strrep(files,'.faw','');
-1128 end
+1124
+1125 function files=listFiles(directory)
+1126
+1127
+1128 temp=dir(directory);
+1129 files=cell(numel(temp),1);
+1130 for i=1:numel(temp)
+1131 files{i}=temp(i,1).name;
+1132 end
+1133 files=strrep(files,'.fa','');
+1134 files=strrep(files,'.hmm','');
+1135 files=strrep(files,'.out','');
+1136 files=strrep(files,'.faw','');
+1137 end
Generated by m2html © 2005