diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html
index 93ab50a2..341eca9d 100644
--- a/doc/external/kegg/getKEGGModelForOrganism.html
+++ b/doc/external/kegg/getKEGGModelForOrganism.html
@@ -278,7 +278,7 @@
DESCRIPTION CROSS-REFERENCE INFORMATION
This function calls:
+constructMultiFasta constructMultiFastagetModelFromKEGG getModelFromKEGGgetPhylDist getPhylDistgetWSLpath getWSLpath
This function is called by:
@@ -863,486 +863,480 @@ SOURCE CODE %On Windows, paths need to be translated to Unix before parsing it to WSL
0574 if ispc
-0575 [~,wslPath.tmpFile]=system(['wsl wslpath ''' tmpFile '''']);
-0576 wslPath.tmpFile=wslPath.tmpFile(1:end-1);
-0577
-0578
-0579 mafftOutput = tempname;
-0580 [~,wslPath.mafftOutput]=system(['wsl wslpath ''' mafftOutput '''']);
-0581 wslPath.mafftOutput=wslPath.mafftOutput(1:end-1);
-0582 [~,wslPath.mafft]=system(['wsl wslpath ''' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '''']);
-0583 wslPath.mafft=wslPath.mafft(1:end-1);
-0584 [~,wslPath.cdhit]=system(['wsl wslpath ''' fullfile(ravenPath,'software','cd-hit','cd-hit') '''']);
-0585 wslPath.cdhit=wslPath.cdhit(1:end-1);
-0586 end
-0587
-0588 for i=1:numel(missingAligned)
-0589
-0590
-0591
-0592 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&...
-0593 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file')
-0594
-0595
-0596
-0597 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file')
-0598 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist'];
-0599 dispEM(EM,false);
-0600 continue;
-0601 end
-0602
-0603
-0604
-0605 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
-0606 if s.bytes<=0
-0607 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w');
-0608 fclose(fid);
-0609 continue;
-0610 end
-0611
-0612
-0613
-0614 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w');
-0615 fclose(fid);
-0616
-0617
-0618
-0619
-0620
-0621
-0622 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
-0623 phylDist=inf(numel(fastaStruct),1);
-0624 for j=1:numel(fastaStruct)
-0625
-0626 index=strfind(fastaStruct(j).Header,':');
-0627 if any(index)
-0628 abbrev=fastaStruct(j).Header(1:index(1)-1);
-0629 [~, index]=ismember(abbrev,phylDistStruct.ids);
-0630 if any(index)
-0631 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId);
-0632 end
-0633 end
-0634 end
-0635
-0636
-0637 phylDist(phylDist>maxPhylDist)=[];
-0638
-0639
-0640 [~, order]=sort(phylDist);
-0641
-0642
-0643 if nSequences<=numel(fastaStruct)
-0644 fastaStruct=fastaStruct(order(1:nSequences));
-0645 else
-0646 fastaStruct=fastaStruct(order);
-0647 end
-0648
-0649
-0650
-0651 if numel(fastaStruct)>1
-0652 if seqIdentity~=-1
-0653 cdhitInpCustom=tempname;
-0654 fastawrite(cdhitInpCustom,fastaStruct);
-0655 if seqIdentity<=1 && seqIdentity>0.7
-0656 nparam='5';
-0657 elseif seqIdentity>0.6
-0658 nparam='4';
-0659 elseif seqIdentity>0.5
-0660 nparam='3';
-0661 elseif seqIdentity>0.4
-0662 nparam='2';
-0663 else
-0664 EM='The provided seqIdentity must be between 0 and 1\n';
-0665 dispEM(EM);
-0666 end
-0667 if ispc
-0668 [~,wslPath.cdhitInpCustom]=system(['wsl wslpath ''' cdhitInpCustom '''']);
-0669 wslPath.cdhitInpCustom=wslPath.cdhitInpCustom(1:end-1);
-0670 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']);
-0671 elseif ismac || isunix
-0672 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']);
-0673 end
-0674 if status~=0
-0675 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
-0676 dispEM(EM);
-0677 end
-0678
-0679 if exist(cdhitInpCustom, 'file')
-0680 delete([cdhitInpCustom '*']);
-0681 end
-0682 else
-0683
-0684
-0685 fastawrite(tmpFile,fastaStruct);
-0686 end
-0687
-0688 if ismac
-0689 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
-0690 elseif isunix
-0691 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
-0692 elseif ispc
-0693 [~,wslPath.fawFile]=system(['wsl wslpath ''' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '''']);
-0694 wslPath.fawFile=wslPath.fawFile(1:end-1);
-0695 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']);
-0696 output=fileread(mafftOutput);
-0697 delete(mafftOutput);
-0698 end
-0699 if status~=0
-0700
-0701
-0702
-0703
-0704 if any(regexp(output,'Only 1 sequence found'))
-0705 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f');
-0706 else
-0707 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output];
-0708 dispEM(EM);
-0709 end
-0710 end
-0711
-0712 if exist(tmpFile, 'file')
-0713 delete([tmpFile '*']);
-0714 end
-0715 else
-0716
-0717
-0718
-0719
-0720 if numel(fastaStruct)==1
-0721 warnState = warning;
-0722 warning('off','Bioinfo:fastawrite:AppendToFile');
-0723 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct);
-0724 warning(warnState)
-0725 end
-0726 end
-0727
-0728 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f');
-0729
-0730
-0731 if rem(i-1,25) == 0
-0732 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns)));
-0733 progress=pad(progress,3,'left');
-0734 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
-0735 end
-0736 end
-0737 end
-0738 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0739 else
-0740 if seqIdentity==-1
-0741 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
-0742 else
-0743 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
-0744 end
-0745 end
-0746
-0747
-0748 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]);
-0749 if ~isempty(missingHMMs)
-0750 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete');
-0751 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs)));
-0752
-0753 for i=1:numel(missingHMMs)
-0754
-0755
-0756 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file')
-0757
-0758
-0759
-0760
-0761
-0762 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file')
-0763 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist'];
-0764 dispEM(EM,false);
-0765 continue;
-0766 end
-0767
-0768
-0769
-0770 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']));
-0771 if s.bytes<=0
-0772 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w');
-0773 fclose(fid);
-0774 continue;
-0775 end
-0776
-0777
-0778 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w');
-0779 fclose(fid);
-0780
-0781
-0782 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']);
-0783 if status~=0
-0784 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output];
-0785 dispEM(EM);
-0786 end
-0787
-0788
-0789 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']));
-0790
-0791
-0792 if rem(i-1,25) == 0
-0793 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns)));
-0794 progress=pad(progress,3,'left');
-0795 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
-0796 end
-0797 end
-0798 end
-0799 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0800 else
-0801 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n');
-0802 end
-0803
-0804
-0805
-0806 missingOUT=setdiff(KOModel.rxns,outFiles);
-0807 if ~isempty(missingOUT)
-0808 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete');
-0809 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT)));
-0810 for i=1:numel(missingOUT)
-0811
-0812
-0813 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file')
-0814
-0815
-0816
-0817 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file')
-0818 EM=['The HMM file for ' missingOUT{i} ' does not exist'];
-0819 dispEM(EM,false);
-0820 continue;
-0821 end
-0822
-0823
-0824
-0825 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
-0826 fclose(fid);
+0575 wslPath.tmpFile=getWSLpath(tmpFile);
+0576
+0577
+0578 mafftOutput = tempname;
+0579 wslPath.mafftOutput=getWSLpath(mafftOutput));
+0580 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat'));
+0581 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit');
+0582 end
+0583
+0584 for i=1:numel(missingAligned)
+0585
+0586
+0587
+0588 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&...
+0589 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file')
+0590
+0591
+0592
+0593 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file')
+0594 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist'];
+0595 dispEM(EM,false);
+0596 continue;
+0597 end
+0598
+0599
+0600
+0601 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
+0602 if s.bytes<=0
+0603 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w');
+0604 fclose(fid);
+0605 continue;
+0606 end
+0607
+0608
+0609
+0610 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w');
+0611 fclose(fid);
+0612
+0613
+0614
+0615
+0616
+0617
+0618 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']));
+0619 phylDist=inf(numel(fastaStruct),1);
+0620 for j=1:numel(fastaStruct)
+0621
+0622 index=strfind(fastaStruct(j).Header,':');
+0623 if any(index)
+0624 abbrev=fastaStruct(j).Header(1:index(1)-1);
+0625 [~, index]=ismember(abbrev,phylDistStruct.ids);
+0626 if any(index)
+0627 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId);
+0628 end
+0629 end
+0630 end
+0631
+0632
+0633 phylDist(phylDist>maxPhylDist)=[];
+0634
+0635
+0636 [~, order]=sort(phylDist);
+0637
+0638
+0639 if nSequences<=numel(fastaStruct)
+0640 fastaStruct=fastaStruct(order(1:nSequences));
+0641 else
+0642 fastaStruct=fastaStruct(order);
+0643 end
+0644
+0645
+0646
+0647 if numel(fastaStruct)>1
+0648 if seqIdentity~=-1
+0649 cdhitInpCustom=tempname;
+0650 fastawrite(cdhitInpCustom,fastaStruct);
+0651 if seqIdentity<=1 && seqIdentity>0.7
+0652 nparam='5';
+0653 elseif seqIdentity>0.6
+0654 nparam='4';
+0655 elseif seqIdentity>0.5
+0656 nparam='3';
+0657 elseif seqIdentity>0.4
+0658 nparam='2';
+0659 else
+0660 EM='The provided seqIdentity must be between 0 and 1\n';
+0661 dispEM(EM);
+0662 end
+0663 if ispc
+0664 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom);
+0665 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']);
+0666 elseif ismac || isunix
+0667 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']);
+0668 end
+0669 if status~=0
+0670 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output];
+0671 dispEM(EM);
+0672 end
+0673
+0674 if exist(cdhitInpCustom, 'file')
+0675 delete([cdhitInpCustom '*']);
+0676 end
+0677 else
+0678
+0679
+0680 fastawrite(tmpFile,fastaStruct);
+0681 end
+0682
+0683 if ismac
+0684 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
+0685 elseif isunix
+0686 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']);
+0687 elseif ispc
+0688 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']));
+0689 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']);
+0690 output=fileread(mafftOutput);
+0691 delete(mafftOutput);
+0692 end
+0693 if status~=0
+0694
+0695
+0696
+0697
+0698 if any(regexp(output,'Only 1 sequence found'))
+0699 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f');
+0700 else
+0701 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output];
+0702 dispEM(EM);
+0703 end
+0704 end
+0705
+0706 if exist(tmpFile, 'file')
+0707 delete([tmpFile '*']);
+0708 end
+0709 else
+0710
+0711
+0712
+0713
+0714 if numel(fastaStruct)==1
+0715 warnState = warning;
+0716 warning('off','Bioinfo:fastawrite:AppendToFile');
+0717 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct);
+0718 warning(warnState)
+0719 end
+0720 end
+0721
+0722 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f');
+0723
+0724
+0725 if rem(i-1,25) == 0
+0726 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns)));
+0727 progress=pad(progress,3,'left');
+0728 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
+0729 end
+0730 end
+0731 end
+0732 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0733 else
+0734 if seqIdentity==-1
+0735 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
+0736 else
+0737 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n');
+0738 end
+0739 end
+0740
+0741
+0742 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]);
+0743 if ~isempty(missingHMMs)
+0744 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete');
+0745 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs)));
+0746
+0747 for i=1:numel(missingHMMs)
+0748
+0749
+0750 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file')
+0751
+0752
+0753
+0754
+0755
+0756 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file')
+0757 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist'];
+0758 dispEM(EM,false);
+0759 continue;
+0760 end
+0761
+0762
+0763
+0764 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']));
+0765 if s.bytes<=0
+0766 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w');
+0767 fclose(fid);
+0768 continue;
+0769 end
+0770
+0771
+0772 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w');
+0773 fclose(fid);
+0774
+0775
+0776 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']);
+0777 if status~=0
+0778 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output];
+0779 dispEM(EM);
+0780 end
+0781
+0782
+0783 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']));
+0784
+0785
+0786 if rem(i-1,25) == 0
+0787 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns)));
+0788 progress=pad(progress,3,'left');
+0789 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
+0790 end
+0791 end
+0792 end
+0793 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0794 else
+0795 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n');
+0796 end
+0797
+0798
+0799
+0800 missingOUT=setdiff(KOModel.rxns,outFiles);
+0801 if ~isempty(missingOUT)
+0802 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete');
+0803 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT)));
+0804 for i=1:numel(missingOUT)
+0805
+0806
+0807 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file')
+0808
+0809
+0810
+0811 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file')
+0812 EM=['The HMM file for ' missingOUT{i} ' does not exist'];
+0813 dispEM(EM,false);
+0814 continue;
+0815 end
+0816
+0817
+0818
+0819 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
+0820 fclose(fid);
+0821
+0822
+0823 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']));
+0824 if s.bytes<=0
+0825 continue;
+0826 end
0827
-0828
-0829 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']));
-0830 if s.bytes<=0
-0831 continue;
-0832 end
-0833
-0834
-0835 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']);
-0836 if status~=0
-0837 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output];
-0838 dispEM(EM);
-0839 end
-0840
-0841
-0842 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
-0843 fwrite(fid,output);
-0844 fclose(fid);
-0845
-0846
-0847 if rem(i-1,25) == 0
-0848 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns)));
-0849 progress=pad(progress,3,'left');
-0850 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
-0851 end
-0852 end
-0853 end
-0854 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
-0855 else
-0856 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n');
-0857 end
-0858
-0859
-0860
-0861
-0862 fprintf('Parsing the HMM search results... ');
-0863
-0864 koGeneMat=zeros(numel(KOModel.rxns),3000);
-0865 genes=cell(3000,1);
-0866
-0867
-0868 hTable = java.util.Hashtable;
-0869
-0870 geneCounter=0;
-0871 for i=1:numel(KOModel.rxns)
-0872 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file')
-0873 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r');
-0874 beginMatches=false;
-0875 while 1
-0876
-0877 tline = fgetl(fid);
-0878
-0879
-0880 if ~ischar(tline)
-0881 break;
-0882 end
-0883
-0884 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------'))
-0885 break;
-0886 end
-0887
-0888 if beginMatches==false
-0889
-0890 if any(strfind(tline,'E-value '))
-0891
-0892 tline = fgetl(fid);
-0893 beginMatches=true;
-0894 end
-0895 else
-0896
-0897 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
-0898 elements=regexp(tline,' ','split');
-0899 elements=elements(cellfun(@any,elements));
-0900
-0901
-0902 score=str2double(elements{1});
-0903 gene=elements{9};
-0904 if score<=cutOff
-0905
-0906
-0907 if score==0
-0908 score=10^-250;
-0909 end
-0910
-0911
-0912 I=hTable.get(gene);
-0913 if any(I)
-0914 koGeneMat(i,I)=score;
-0915 else
-0916 geneCounter=geneCounter+1;
-0917
-0918 hTable.put(gene,geneCounter);
-0919 genes{geneCounter}=gene;
-0920 koGeneMat(i,geneCounter)=score;
-0921 end
-0922 end
-0923 else
-0924 break;
-0925 end
-0926 end
-0927 end
-0928 fclose(fid);
-0929 end
-0930 end
-0931 fprintf('COMPLETE\n');
-0932
-0933 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... ');
-0934 koGeneMat=koGeneMat(:,1:geneCounter);
-0935
-0936
-0937 for i=1:size(koGeneMat,1)
-0938 J=find(koGeneMat(i,:));
-0939 if any(J)
-0940 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0;
-0941 end
-0942 end
-0943
-0944
-0945 for i=1:size(koGeneMat,2)
-0946 J=find(koGeneMat(:,i));
-0947 if any(J)
-0948 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0;
-0949 end
-0950 end
-0951 fprintf('COMPLETE\n');
-0952
-0953 fprintf('Adding gene annotations to the model... ');
-0954
-0955 model.genes=genes(1:geneCounter);
-0956 model.grRules=cell(numel(model.rxns),1);
-0957 model.grRules(:)={''};
-0958 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
-0959
-0960
-0961 for i=1:numel(model.rxns)
-0962 if isstruct(model.rxnMiriams{i})
-0963
-0964 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'));
-0965 KOs=model.rxnMiriams{i}.value(I);
-0966
-0967 J=ismember(KOModel.rxns,KOs);
-0968 [~, K]=find(koGeneMat(J,:));
-0969
-0970 if any(K)
-0971 model.rxnGeneMat(i,K)=1;
-0972
-0973
-0974 L=sum(koGeneMat(J,:),2)==0;
-0975 model.rxnMiriams{i}.value(I(L))=[];
-0976 model.rxnMiriams{i}.name(I(L))=[];
-0977 end
-0978 end
-0979 end
-0980 fprintf('COMPLETE\n');
-0981
-0982
-0983
-0984
-0985
-0986
-0987
-0988
-0989 if keepSpontaneous==true
-0990
-0991
-0992
-0993
-0994
-0995 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... ');
-0996 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
-0997 model=removeReactions(model,I,true,true);
-0998 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes);
+0828
+0829 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']);
+0830 if status~=0
+0831 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output];
+0832 dispEM(EM);
+0833 end
+0834
+0835
+0836 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w');
+0837 fwrite(fid,output);
+0838 fclose(fid);
+0839
+0840
+0841 if rem(i-1,25) == 0
+0842 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns)));
+0843 progress=pad(progress,3,'left');
+0844 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress);
+0845 end
+0846 end
+0847 end
+0848 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n');
+0849 else
+0850 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n');
+0851 end
+0852
+0853
+0854
+0855
+0856 fprintf('Parsing the HMM search results... ');
+0857
+0858 koGeneMat=zeros(numel(KOModel.rxns),3000);
+0859 genes=cell(3000,1);
+0860
+0861
+0862 hTable = java.util.Hashtable;
+0863
+0864 geneCounter=0;
+0865 for i=1:numel(KOModel.rxns)
+0866 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file')
+0867 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r');
+0868 beginMatches=false;
+0869 while 1
+0870
+0871 tline = fgetl(fid);
+0872
+0873
+0874 if ~ischar(tline)
+0875 break;
+0876 end
+0877
+0878 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------'))
+0879 break;
+0880 end
+0881
+0882 if beginMatches==false
+0883
+0884 if any(strfind(tline,'E-value '))
+0885
+0886 tline = fgetl(fid);
+0887 beginMatches=true;
+0888 end
+0889 else
+0890
+0891 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline)
+0892 elements=regexp(tline,' ','split');
+0893 elements=elements(cellfun(@any,elements));
+0894
+0895
+0896 score=str2double(elements{1});
+0897 gene=elements{9};
+0898 if score<=cutOff
+0899
+0900
+0901 if score==0
+0902 score=10^-250;
+0903 end
+0904
+0905
+0906 I=hTable.get(gene);
+0907 if any(I)
+0908 koGeneMat(i,I)=score;
+0909 else
+0910 geneCounter=geneCounter+1;
+0911
+0912 hTable.put(gene,geneCounter);
+0913 genes{geneCounter}=gene;
+0914 koGeneMat(i,geneCounter)=score;
+0915 end
+0916 end
+0917 else
+0918 break;
+0919 end
+0920 end
+0921 end
+0922 fclose(fid);
+0923 end
+0924 end
+0925 fprintf('COMPLETE\n');
+0926
+0927 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... ');
+0928 koGeneMat=koGeneMat(:,1:geneCounter);
+0929
+0930
+0931 for i=1:size(koGeneMat,1)
+0932 J=find(koGeneMat(i,:));
+0933 if any(J)
+0934 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0;
+0935 end
+0936 end
+0937
+0938
+0939 for i=1:size(koGeneMat,2)
+0940 J=find(koGeneMat(:,i));
+0941 if any(J)
+0942 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0;
+0943 end
+0944 end
+0945 fprintf('COMPLETE\n');
+0946
+0947 fprintf('Adding gene annotations to the model... ');
+0948
+0949 model.genes=genes(1:geneCounter);
+0950 model.grRules=cell(numel(model.rxns),1);
+0951 model.grRules(:)={''};
+0952 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes));
+0953
+0954
+0955 for i=1:numel(model.rxns)
+0956 if isstruct(model.rxnMiriams{i})
+0957
+0958 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'));
+0959 KOs=model.rxnMiriams{i}.value(I);
+0960
+0961 J=ismember(KOModel.rxns,KOs);
+0962 [~, K]=find(koGeneMat(J,:));
+0963
+0964 if any(K)
+0965 model.rxnGeneMat(i,K)=1;
+0966
+0967
+0968 L=sum(koGeneMat(J,:),2)==0;
+0969 model.rxnMiriams{i}.value(I(L))=[];
+0970 model.rxnMiriams{i}.name(I(L))=[];
+0971 end
+0972 end
+0973 end
+0974 fprintf('COMPLETE\n');
+0975
+0976
+0977
+0978
+0979
+0980
+0981
+0982
+0983 if keepSpontaneous==true
+0984
+0985
+0986
+0987
+0988
+0989 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... ');
+0990 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous);
+0991 model=removeReactions(model,I,true,true);
+0992 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes);
+0993 model=removeReactions(model,I,true,true);
+0994 else
+0995
+0996
+0997 fprintf('Removing reactions which after HMM search no longer have GPR rules... ');
+0998 I=~any(model.rxnGeneMat,2);
0999 model=removeReactions(model,I,true,true);
-1000 else
-1001
-1002
-1003 fprintf('Removing reactions which after HMM search no longer have GPR rules... ');
-1004 I=~any(model.rxnGeneMat,2);
-1005 model=removeReactions(model,I,true,true);
-1006 end
-1007 fprintf('COMPLETE\n');
-1008
-1009 fprintf('Constructing GPR rules and finalizing the model... ');
-1010
-1011 for i=1:numel(model.rxns)
-1012
-1013 I=find(model.rxnGeneMat(i,:));
-1014 if any(I)
-1015 model.grRules{i}=['(' model.genes{I(1)}];
-1016 for j=2:numel(I)
-1017 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
-1018 end
-1019 model.grRules{i}=[model.grRules{i} ')'];
-1020 end
-1021 end
-1022
-1023
-1024 [grRules,rxnGeneMat] = standardizeGrRules(model,false);
-1025 model.grRules = grRules;
-1026 model.rxnGeneMat = rxnGeneMat;
-1027
-1028
-1029 for i=1:numel(model.rxns)
-1030 if ~isempty(model.rxnNotes{i})
-1031 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i));
-1032 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
-1033 else
-1034 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'};
-1035 end
-1036 end
-1037
-1038 delete(fastaFile)
-1039 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n');
-1040 end
-1041
-1042 function files=listFiles(directory)
-1043
-1044
-1045 temp=dir(directory);
-1046 files=cell(numel(temp),1);
-1047 for i=1:numel(temp)
-1048 files{i}=temp(i,1).name;
-1049 end
-1050 files=strrep(files,'.fa','');
-1051 files=strrep(files,'.hmm','');
-1052 files=strrep(files,'.out','');
-1053 files=strrep(files,'.faw','');
-1054 end
+1000 end
+1001 fprintf('COMPLETE\n');
+1002
+1003 fprintf('Constructing GPR rules and finalizing the model... ');
+1004
+1005 for i=1:numel(model.rxns)
+1006
+1007 I=find(model.rxnGeneMat(i,:));
+1008 if any(I)
+1009 model.grRules{i}=['(' model.genes{I(1)}];
+1010 for j=2:numel(I)
+1011 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}];
+1012 end
+1013 model.grRules{i}=[model.grRules{i} ')'];
+1014 end
+1015 end
+1016
+1017
+1018 [grRules,rxnGeneMat] = standardizeGrRules(model,false);
+1019 model.grRules = grRules;
+1020 model.rxnGeneMat = rxnGeneMat;
+1021
+1022
+1023 for i=1:numel(model.rxns)
+1024 if ~isempty(model.rxnNotes{i})
+1025 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i));
+1026 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. ');
+1027 else
+1028 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'};
+1029 end
+1030 end
+1031
+1032 delete(fastaFile)
+1033 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n');
+1034 end
+1035
+1036 function files=listFiles(directory)
+1037
+1038
+1039 temp=dir(directory);
+1040 files=cell(numel(temp),1);
+1041 for i=1:numel(temp)
+1042 files{i}=temp(i,1).name;
+1043 end
+1044 files=strrep(files,'.fa','');
+1045 files=strrep(files,'.hmm','');
+1046 files=strrep(files,'.out','');
+1047 files=strrep(files,'.faw','');
+1048 end
Generated by m2html © 2005