diff --git a/doc/external/kegg/getKEGGModelForOrganism.html b/doc/external/kegg/getKEGGModelForOrganism.html index 93ab50a2..341eca9d 100644 --- a/doc/external/kegg/getKEGGModelForOrganism.html +++ b/doc/external/kegg/getKEGGModelForOrganism.html @@ -278,7 +278,7 @@

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

This function calls: +
  • constructMultiFasta constructMultiFasta
  • getModelFromKEGG getModelFromKEGG
  • getPhylDist getPhylDist
  • getWSLpath getWSLpath
  • This function is called by: @@ -863,486 +863,480 @@

    SOURCE CODE ^%On Windows, paths need to be translated to Unix before parsing it to WSL 0574 if ispc -0575 [~,wslPath.tmpFile]=system(['wsl wslpath ''' tmpFile '''']); -0576 wslPath.tmpFile=wslPath.tmpFile(1:end-1); -0577 %mafft has problems writing to terminal (/dev/stderr) when running -0578 %on WSL via MATLAB, instead write and read progress file -0579 mafftOutput = tempname; -0580 [~,wslPath.mafftOutput]=system(['wsl wslpath ''' mafftOutput '''']); -0581 wslPath.mafftOutput=wslPath.mafftOutput(1:end-1); -0582 [~,wslPath.mafft]=system(['wsl wslpath ''' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '''']); -0583 wslPath.mafft=wslPath.mafft(1:end-1); -0584 [~,wslPath.cdhit]=system(['wsl wslpath ''' fullfile(ravenPath,'software','cd-hit','cd-hit') '''']); -0585 wslPath.cdhit=wslPath.cdhit(1:end-1); -0586 end -0587 -0588 for i=1:numel(missingAligned) -0589 %This is checked here because it could be that it is created by a -0590 %parallel process. The faw-files are saved as temporary files to -0591 %kept track of which files are being worked on -0592 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... -0593 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') -0594 %Check that the multi-FASTA file exists. It should do so since -0595 %we are saving empty files as well. Print a warning and -0596 %continue if not -0597 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') -0598 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; -0599 dispEM(EM,false); -0600 continue; -0601 end -0602 -0603 %If the multi-FASTA file is empty then save an empty aligned -0604 %file and continue -0605 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0606 if s.bytes<=0 -0607 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); -0608 fclose(fid); -0609 continue; -0610 end -0611 -0612 %Create an empty file to prevent other threads to start to work -0613 %on the same alignment -0614 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); -0615 fclose(fid); -0616 -0617 %First load the FASTA file, then select up to nSequences -0618 %sequences of the most closely related species, apply any -0619 %constraints from maxPhylDist, and save it as a temporary file, -0620 %and create the model from that -0621 -0622 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); -0623 phylDist=inf(numel(fastaStruct),1); -0624 for j=1:numel(fastaStruct) -0625 %Get the organism abbreviation -0626 index=strfind(fastaStruct(j).Header,':'); -0627 if any(index) -0628 abbrev=fastaStruct(j).Header(1:index(1)-1); -0629 [~, index]=ismember(abbrev,phylDistStruct.ids); -0630 if any(index) -0631 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); -0632 end -0633 end -0634 end -0635 -0636 %Inf means that it should not be included -0637 phylDist(phylDist>maxPhylDist)=[]; -0638 -0639 %Sort based on phylDist -0640 [~, order]=sort(phylDist); -0641 -0642 %Save the first nSequences hits to a temporary FASTA file -0643 if nSequences<=numel(fastaStruct) -0644 fastaStruct=fastaStruct(order(1:nSequences)); -0645 else -0646 fastaStruct=fastaStruct(order); -0647 end -0648 -0649 %Do the clustering and alignment if there are more than one -0650 %sequences, otherwise just save the sequence (or an empty file) -0651 if numel(fastaStruct)>1 -0652 if seqIdentity~=-1 -0653 cdhitInpCustom=tempname; -0654 fastawrite(cdhitInpCustom,fastaStruct); -0655 if seqIdentity<=1 && seqIdentity>0.7 -0656 nparam='5'; -0657 elseif seqIdentity>0.6 -0658 nparam='4'; -0659 elseif seqIdentity>0.5 -0660 nparam='3'; -0661 elseif seqIdentity>0.4 -0662 nparam='2'; -0663 else -0664 EM='The provided seqIdentity must be between 0 and 1\n'; -0665 dispEM(EM); -0666 end -0667 if ispc -0668 [~,wslPath.cdhitInpCustom]=system(['wsl wslpath ''' cdhitInpCustom '''']); -0669 wslPath.cdhitInpCustom=wslPath.cdhitInpCustom(1:end-1); -0670 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); -0671 elseif ismac || isunix -0672 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); -0673 end -0674 if status~=0 -0675 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; -0676 dispEM(EM); -0677 end -0678 %Remove the old tempfile -0679 if exist(cdhitInpCustom, 'file') -0680 delete([cdhitInpCustom '*']); -0681 end -0682 else -0683 %This means that CD-HIT should be skipped since -0684 %seqIdentity is equal to -1 -0685 fastawrite(tmpFile,fastaStruct); -0686 end -0687 %Do the alignment for this file -0688 if ismac -0689 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0690 elseif isunix -0691 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); -0692 elseif ispc -0693 [~,wslPath.fawFile]=system(['wsl wslpath ''' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '''']); -0694 wslPath.fawFile=wslPath.fawFile(1:end-1); -0695 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); -0696 output=fileread(mafftOutput); -0697 delete(mafftOutput); -0698 end -0699 if status~=0 -0700 %It could be that alignment failed because only one -0701 %sequence was left after clustering. If that is the -0702 %case, then the clustered file is just copied as 'faw' -0703 %file -0704 if any(regexp(output,'Only 1 sequence found')) -0705 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); -0706 else -0707 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; -0708 dispEM(EM); -0709 end -0710 end -0711 %Remove the old tempfile -0712 if exist(tmpFile, 'file') -0713 delete([tmpFile '*']); -0714 end -0715 else -0716 %If there is only one sequence then it's not possible to do -0717 %a multiple alignment. Just print the sequence instead. An -0718 %empty file was written previously so that doesn't have to -0719 %be dealt with -0720 if numel(fastaStruct)==1 -0721 warnState = warning; %Save the current warning state -0722 warning('off','Bioinfo:fastawrite:AppendToFile'); -0723 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); -0724 warning(warnState) %Reset warning state to previous settings -0725 end -0726 end -0727 %Move the temporary file to the real one -0728 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); -0729 -0730 %Print the progress every 25 files -0731 if rem(i-1,25) == 0 -0732 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); -0733 progress=pad(progress,3,'left'); -0734 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0735 end -0736 end -0737 end -0738 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0739 else -0740 if seqIdentity==-1 -0741 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0742 else -0743 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); -0744 end -0745 end -0746 -0747 %Check if training of Hidden Markov models should be performed -0748 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); -0749 if ~isempty(missingHMMs) -0750 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); -0751 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); -0752 %Train models for all missing KOs -0753 for i=1:numel(missingHMMs) -0754 %This is checked here because it could be that it is created by a -0755 %parallel process -0756 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') -0757 %Check that the aligned FASTA file exists. It could be that it -0758 %is still being worked on by some other instance of the program -0759 %(the .faw file should then exist). This should not happen on a -0760 %single computer. It doesn't throw an error, because it should -0761 %finalize the ones it can -0762 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') -0763 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; -0764 dispEM(EM,false); -0765 continue; -0766 end -0767 -0768 %If the multi-FASTA file is empty then save an empty aligned -0769 %file and continue -0770 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); -0771 if s.bytes<=0 -0772 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); -0773 fclose(fid); -0774 continue; -0775 end -0776 %Create a temporary file to indicate that it is working on the -0777 %KO. This is because hmmbuild cannot overwrite existing files -0778 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); -0779 fclose(fid); -0780 -0781 %Create HMM -0782 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); -0783 if status~=0 -0784 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; -0785 dispEM(EM); -0786 end -0787 -0788 %Delete the temporary file -0789 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); -0790 -0791 %Print the progress every 25 files -0792 if rem(i-1,25) == 0 -0793 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); -0794 progress=pad(progress,3,'left'); -0795 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0796 end -0797 end -0798 end -0799 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0800 else -0801 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); -0802 end -0803 -0804 %Check which new .out files that should be generated. Check if training of -0805 %Hidden Markov models should be performed -0806 missingOUT=setdiff(KOModel.rxns,outFiles); -0807 if ~isempty(missingOUT) -0808 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); -0809 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); -0810 for i=1:numel(missingOUT) -0811 %This is checked here because it could be that it is created by a -0812 %parallel process -0813 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') -0814 %Check that the HMM file exists. It should do so since %we are -0815 %saving empty files as well. Print a warning and continue if -0816 %not -0817 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') -0818 EM=['The HMM file for ' missingOUT{i} ' does not exist']; -0819 dispEM(EM,false); -0820 continue; -0821 end -0822 -0823 %Save an empty file to prevent several threads working on the -0824 %same file -0825 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0826 fclose(fid); +0575 wslPath.tmpFile=getWSLpath(tmpFile); +0576 %mafft has problems writing to terminal (/dev/stderr) when running +0577 %on WSL via MATLAB, instead write and read progress file +0578 mafftOutput = tempname; +0579 wslPath.mafftOutput=getWSLpath(mafftOutput)); +0580 wslPath.mafft=getWSLpath(fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat')); +0581 wslPath.cdhit=getWSLpath(fullfile(ravenPath,'software','cd-hit','cd-hit'); +0582 end +0583 +0584 for i=1:numel(missingAligned) +0585 %This is checked here because it could be that it is created by a +0586 %parallel process. The faw-files are saved as temporary files to +0587 %kept track of which files are being worked on +0588 if ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'file') &&... +0589 ~exist(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'file') +0590 %Check that the multi-FASTA file exists. It should do so since +0591 %we are saving empty files as well. Print a warning and +0592 %continue if not +0593 if ~exist(fullfile(dataDir,'fasta',[missingAligned{i} '.fa']),'file') +0594 EM=['WARNING: The multi-FASTA file for ' missingAligned{i} ' does not exist']; +0595 dispEM(EM,false); +0596 continue; +0597 end +0598 +0599 %If the multi-FASTA file is empty then save an empty aligned +0600 %file and continue +0601 s=dir(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0602 if s.bytes<=0 +0603 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'w'); +0604 fclose(fid); +0605 continue; +0606 end +0607 +0608 %Create an empty file to prevent other threads to start to work +0609 %on the same alignment +0610 fid=fopen(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'w'); +0611 fclose(fid); +0612 +0613 %First load the FASTA file, then select up to nSequences +0614 %sequences of the most closely related species, apply any +0615 %constraints from maxPhylDist, and save it as a temporary file, +0616 %and create the model from that +0617 +0618 fastaStruct=fastaread(fullfile(dataDir,'fasta',[missingAligned{i} '.fa'])); +0619 phylDist=inf(numel(fastaStruct),1); +0620 for j=1:numel(fastaStruct) +0621 %Get the organism abbreviation +0622 index=strfind(fastaStruct(j).Header,':'); +0623 if any(index) +0624 abbrev=fastaStruct(j).Header(1:index(1)-1); +0625 [~, index]=ismember(abbrev,phylDistStruct.ids); +0626 if any(index) +0627 phylDist(j)=phylDistStruct.distMat(index(1),phylDistId); +0628 end +0629 end +0630 end +0631 +0632 %Inf means that it should not be included +0633 phylDist(phylDist>maxPhylDist)=[]; +0634 +0635 %Sort based on phylDist +0636 [~, order]=sort(phylDist); +0637 +0638 %Save the first nSequences hits to a temporary FASTA file +0639 if nSequences<=numel(fastaStruct) +0640 fastaStruct=fastaStruct(order(1:nSequences)); +0641 else +0642 fastaStruct=fastaStruct(order); +0643 end +0644 +0645 %Do the clustering and alignment if there are more than one +0646 %sequences, otherwise just save the sequence (or an empty file) +0647 if numel(fastaStruct)>1 +0648 if seqIdentity~=-1 +0649 cdhitInpCustom=tempname; +0650 fastawrite(cdhitInpCustom,fastaStruct); +0651 if seqIdentity<=1 && seqIdentity>0.7 +0652 nparam='5'; +0653 elseif seqIdentity>0.6 +0654 nparam='4'; +0655 elseif seqIdentity>0.5 +0656 nparam='3'; +0657 elseif seqIdentity>0.4 +0658 nparam='2'; +0659 else +0660 EM='The provided seqIdentity must be between 0 and 1\n'; +0661 dispEM(EM); +0662 end +0663 if ispc +0664 wslPath.cdhitInpCustom=getWSLpath(cdhitInpCustom); +0665 [status, output]=system(['wsl "' wslPath.cdhit '" -T "' num2str(cores) '" -i "' wslPath.cdhitInpCustom '" -o "' wslPath.tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0666 elseif ismac || isunix +0667 [status, output]=system(['"' fullfile(ravenPath,'software','cd-hit',['cd-hit' binEnd]) '" -T "' num2str(cores) '" -i "' cdhitInpCustom '" -o "' tmpFile '" -c "' num2str(seqIdentity) '" -n ' nparam ' -M 2000']); +0668 end +0669 if status~=0 +0670 EM=['Error when performing clustering of ' missingAligned{i} ':\n' output]; +0671 dispEM(EM); +0672 end +0673 %Remove the old tempfile +0674 if exist(cdhitInpCustom, 'file') +0675 delete([cdhitInpCustom '*']); +0676 end +0677 else +0678 %This means that CD-HIT should be skipped since +0679 %seqIdentity is equal to -1 +0680 fastawrite(tmpFile,fastaStruct); +0681 end +0682 %Do the alignment for this file +0683 if ismac +0684 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-mac','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0685 elseif isunix +0686 [status, output]=system(['"' fullfile(ravenPath,'software','mafft','mafft-linux64','mafft.bat') '" --auto --anysymbol --thread "' num2str(cores) '" "' tmpFile '" > "' fullfile(dataDir,'aligned',[missingAligned{i} '.faw']) '"']); +0687 elseif ispc +0688 wslPath.fawFile=getWSLpath(fullfile(dataDir,'aligned',[missingAligned{i} '.faw'])); +0689 [status, ~]=system(['wsl "' wslPath.mafft '" --auto --anysymbol --progress "' wslPath.mafftOutput '" --thread "' num2str(cores) '" --out "' wslPath.fawFile '" "' wslPath.tmpFile '"']); +0690 output=fileread(mafftOutput); +0691 delete(mafftOutput); +0692 end +0693 if status~=0 +0694 %It could be that alignment failed because only one +0695 %sequence was left after clustering. If that is the +0696 %case, then the clustered file is just copied as 'faw' +0697 %file +0698 if any(regexp(output,'Only 1 sequence found')) +0699 movefile(tmpFile,fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),'f'); +0700 else +0701 EM=['Error when performing alignment of ' missingAligned{i} ':\n' output]; +0702 dispEM(EM); +0703 end +0704 end +0705 %Remove the old tempfile +0706 if exist(tmpFile, 'file') +0707 delete([tmpFile '*']); +0708 end +0709 else +0710 %If there is only one sequence then it's not possible to do +0711 %a multiple alignment. Just print the sequence instead. An +0712 %empty file was written previously so that doesn't have to +0713 %be dealt with +0714 if numel(fastaStruct)==1 +0715 warnState = warning; %Save the current warning state +0716 warning('off','Bioinfo:fastawrite:AppendToFile'); +0717 fastawrite(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fastaStruct); +0718 warning(warnState) %Reset warning state to previous settings +0719 end +0720 end +0721 %Move the temporary file to the real one +0722 movefile(fullfile(dataDir,'aligned',[missingAligned{i} '.faw']),fullfile(dataDir,'aligned',[missingAligned{i} '.fa']),'f'); +0723 +0724 %Print the progress every 25 files +0725 if rem(i-1,25) == 0 +0726 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'aligned','*.fa')))/numel(KOModel.rxns))); +0727 progress=pad(progress,3,'left'); +0728 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0729 end +0730 end +0731 end +0732 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0733 else +0734 if seqIdentity==-1 +0735 fprintf('Performing the multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0736 else +0737 fprintf('Performing clustering and multiple alignment for KEGG Orthology specific protein sets... COMPLETE\n'); +0738 end +0739 end +0740 +0741 %Check if training of Hidden Markov models should be performed +0742 missingHMMs=setdiff(KOModel.rxns,[hmmFiles;outFiles]); +0743 if ~isempty(missingHMMs) +0744 fprintf('Training the KEGG Orthology specific HMMs... 0%% complete'); +0745 missingHMMs=missingHMMs(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingHMMs))); +0746 %Train models for all missing KOs +0747 for i=1:numel(missingHMMs) +0748 %This is checked here because it could be that it is created by a +0749 %parallel process +0750 if ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'file') && ~exist(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'file') +0751 %Check that the aligned FASTA file exists. It could be that it +0752 %is still being worked on by some other instance of the program +0753 %(the .faw file should then exist). This should not happen on a +0754 %single computer. It doesn't throw an error, because it should +0755 %finalize the ones it can +0756 if ~exist(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']),'file') +0757 EM=['The aligned FASTA file for ' missingHMMs{i} ' does not exist']; +0758 dispEM(EM,false); +0759 continue; +0760 end +0761 +0762 %If the multi-FASTA file is empty then save an empty aligned +0763 %file and continue +0764 s=dir(fullfile(dataDir,'aligned',[missingHMMs{i} '.fa'])); +0765 if s.bytes<=0 +0766 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']),'w'); +0767 fclose(fid); +0768 continue; +0769 end +0770 %Create a temporary file to indicate that it is working on the +0771 %KO. This is because hmmbuild cannot overwrite existing files +0772 fid=fopen(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw']),'w'); +0773 fclose(fid); +0774 +0775 %Create HMM +0776 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmbuild' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingHMMs{i} '.hmm']) '" "' fullfile(dataDir,'aligned',[missingHMMs{i} '.fa']) '"']); +0777 if status~=0 +0778 EM=['Error when training HMM for ' missingHMMs{i} ':\n' output]; +0779 dispEM(EM); +0780 end +0781 +0782 %Delete the temporary file +0783 delete(fullfile(dataDir,'hmms',[missingHMMs{i} '.hmw'])); +0784 +0785 %Print the progress every 25 files +0786 if rem(i-1,25) == 0 +0787 progress=num2str(floor(100*numel(listFiles(fullfile(dataDir,'hmms','*.hmm')))/numel(KOModel.rxns))); +0788 progress=pad(progress,3,'left'); +0789 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0790 end +0791 end +0792 end +0793 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0794 else +0795 fprintf('Training the KEGG Orthology specific HMMs... COMPLETE\n'); +0796 end +0797 +0798 %Check which new .out files that should be generated. Check if training of +0799 %Hidden Markov models should be performed +0800 missingOUT=setdiff(KOModel.rxns,outFiles); +0801 if ~isempty(missingOUT) +0802 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... 0%% complete'); +0803 missingOUT=missingOUT(randperm(RandStream.create('mrg32k3a','Seed',cputime()),numel(missingOUT))); +0804 for i=1:numel(missingOUT) +0805 %This is checked here because it could be that it is created by a +0806 %parallel process +0807 if ~exist(fullfile(outDir,[missingOUT{i} '.out']),'file') +0808 %Check that the HMM file exists. It should do so since %we are +0809 %saving empty files as well. Print a warning and continue if +0810 %not +0811 if ~exist(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']),'file') +0812 EM=['The HMM file for ' missingOUT{i} ' does not exist']; +0813 dispEM(EM,false); +0814 continue; +0815 end +0816 +0817 %Save an empty file to prevent several threads working on the +0818 %same file +0819 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0820 fclose(fid); +0821 +0822 %If the HMM file is empty then save an out file and continue +0823 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); +0824 if s.bytes<=0 +0825 continue; +0826 end 0827 -0828 %If the HMM file is empty then save an out file and continue -0829 s=dir(fullfile(dataDir,'hmms',[missingOUT{i} '.hmm'])); -0830 if s.bytes<=0 -0831 continue; -0832 end -0833 -0834 %Check each gene in the input file against this model -0835 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); -0836 if status~=0 -0837 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; -0838 dispEM(EM); -0839 end -0840 -0841 %Save the output to a file -0842 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); -0843 fwrite(fid,output); -0844 fclose(fid); -0845 -0846 %Print the progress every 25 files -0847 if rem(i-1,25) == 0 -0848 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); -0849 progress=pad(progress,3,'left'); -0850 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); -0851 end -0852 end -0853 end -0854 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); -0855 else -0856 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); -0857 end -0858 -0859 -0860 %***Begin retrieving the output and putting together the resulting model -0861 -0862 fprintf('Parsing the HMM search results... '); -0863 %Retrieve matched genes from the HMMs -0864 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes -0865 genes=cell(3000,1); -0866 %Store the best score for a gene in a hash list (since it will be searching -0867 %many times) -0868 hTable = java.util.Hashtable; -0869 -0870 geneCounter=0; -0871 for i=1:numel(KOModel.rxns) -0872 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') -0873 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); -0874 beginMatches=false; -0875 while 1 -0876 %Get the next line -0877 tline = fgetl(fid); -0878 -0879 %Abort at end of file -0880 if ~ischar(tline) -0881 break; -0882 end -0883 -0884 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) -0885 break; -0886 end -0887 -0888 if beginMatches==false -0889 %This is how the listing of matches begins -0890 if any(strfind(tline,'E-value ')) -0891 %Read one more line that is only padding -0892 tline = fgetl(fid); -0893 beginMatches=true; -0894 end -0895 else -0896 %If matches should be read -0897 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) -0898 elements=regexp(tline,' ','split'); -0899 elements=elements(cellfun(@any,elements)); -0900 -0901 %Check if the match is below the treshhold -0902 score=str2double(elements{1}); -0903 gene=elements{9}; -0904 if score<=cutOff -0905 %If the score is exactly 0, change it to a very -0906 %small value to avoid NaN -0907 if score==0 -0908 score=10^-250; -0909 end -0910 %Check if the gene is added already and, is so, get -0911 %the best score for it -0912 I=hTable.get(gene); -0913 if any(I) -0914 koGeneMat(i,I)=score; -0915 else -0916 geneCounter=geneCounter+1; -0917 %The gene was not present yet so add it -0918 hTable.put(gene,geneCounter); -0919 genes{geneCounter}=gene; -0920 koGeneMat(i,geneCounter)=score; -0921 end -0922 end -0923 else -0924 break; -0925 end -0926 end -0927 end -0928 fclose(fid); -0929 end -0930 end -0931 fprintf('COMPLETE\n'); -0932 -0933 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); -0934 koGeneMat=koGeneMat(:,1:geneCounter); -0935 -0936 %Remove the genes for each KO that are below minScoreRatioKO. -0937 for i=1:size(koGeneMat,1) -0938 J=find(koGeneMat(i,:)); -0939 if any(J) -0940 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; -0941 end -0942 end -0943 -0944 %Remove the KOs for each gene that are below minScoreRatioG -0945 for i=1:size(koGeneMat,2) -0946 J=find(koGeneMat(:,i)); -0947 if any(J) -0948 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; -0949 end -0950 end -0951 fprintf('COMPLETE\n'); -0952 -0953 fprintf('Adding gene annotations to the model... '); -0954 %Create the new model -0955 model.genes=genes(1:geneCounter); -0956 model.grRules=cell(numel(model.rxns),1); -0957 model.grRules(:)={''}; -0958 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); -0959 -0960 %Loop through the reactions and add the corresponding genes -0961 for i=1:numel(model.rxns) -0962 if isstruct(model.rxnMiriams{i}) -0963 %Get all KOs -0964 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); -0965 KOs=model.rxnMiriams{i}.value(I); -0966 %Find the KOs and the corresponding genes -0967 J=ismember(KOModel.rxns,KOs); -0968 [~, K]=find(koGeneMat(J,:)); -0969 -0970 if any(K) -0971 model.rxnGeneMat(i,K)=1; -0972 %Also delete KOs for which no genes were found. If no genes at -0973 %all were matched to the reaction it will be deleted later -0974 L=sum(koGeneMat(J,:),2)==0; -0975 model.rxnMiriams{i}.value(I(L))=[]; -0976 model.rxnMiriams{i}.name(I(L))=[]; -0977 end -0978 end -0979 end -0980 fprintf('COMPLETE\n'); -0981 -0982 %Find and delete all reactions without genes. This also removes genes that -0983 %are not used (which could happen because minScoreRatioG and -0984 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions -0985 %without genes are kept in the model. Spontaneous reactions with original -0986 %gene associations are treated in the same way, like the rest of the -0987 %reactions - if gene associations were removed during HMM search, such -0988 %reactions are deleted from the model -0989 if keepSpontaneous==true -0990 %Not the most comprise way to delete reactions without genes, but this -0991 %makes the code easier to understand. Firstly the non-spontaneous -0992 %reactions without genes are removed. After that, the second deletion -0993 %step removes spontaneous reactions, which had gene associations before -0994 %HMM search, but no longer have after it -0995 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); -0996 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); -0997 model=removeReactions(model,I,true,true); -0998 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); +0828 %Check each gene in the input file against this model +0829 [status, output]=system(['"' fullfile(ravenPath,'software','hmmer',['hmmsearch' binEnd]) '" --cpu "' num2str(cores) '" "' fullfile(dataDir,'hmms',[missingOUT{i} '.hmm']) '" "' fastaFile '"']); +0830 if status~=0 +0831 EM=['Error when querying HMM for ' missingOUT{i} ':\n' output]; +0832 dispEM(EM); +0833 end +0834 +0835 %Save the output to a file +0836 fid=fopen(fullfile(outDir,[missingOUT{i} '.out']),'w'); +0837 fwrite(fid,output); +0838 fclose(fid); +0839 +0840 %Print the progress every 25 files +0841 if rem(i-1,25) == 0 +0842 progress=num2str(floor(100*numel(listFiles(fullfile(outDir,'*.out')))/numel(KOModel.rxns))); +0843 progress=pad(progress,3,'left'); +0844 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b%s%% complete',progress); +0845 end +0846 end +0847 end +0848 fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\bCOMPLETE\n'); +0849 else +0850 fprintf('Querying the user-specified FASTA file against the KEGG Orthology specific HMMs... COMPLETE\n'); +0851 end +0852 +0853 +0854 %***Begin retrieving the output and putting together the resulting model +0855 +0856 fprintf('Parsing the HMM search results... '); +0857 %Retrieve matched genes from the HMMs +0858 koGeneMat=zeros(numel(KOModel.rxns),3000); %Make room for 3000 genes +0859 genes=cell(3000,1); +0860 %Store the best score for a gene in a hash list (since it will be searching +0861 %many times) +0862 hTable = java.util.Hashtable; +0863 +0864 geneCounter=0; +0865 for i=1:numel(KOModel.rxns) +0866 if exist(fullfile(outDir,[KOModel.rxns{i} '.out']), 'file') +0867 fid=fopen(fullfile(outDir,[KOModel.rxns{i} '.out']),'r'); +0868 beginMatches=false; +0869 while 1 +0870 %Get the next line +0871 tline = fgetl(fid); +0872 +0873 %Abort at end of file +0874 if ~ischar(tline) +0875 break; +0876 end +0877 +0878 if and(beginMatches,strcmp(tline,' ------ inclusion threshold ------')) +0879 break; +0880 end +0881 +0882 if beginMatches==false +0883 %This is how the listing of matches begins +0884 if any(strfind(tline,'E-value ')) +0885 %Read one more line that is only padding +0886 tline = fgetl(fid); +0887 beginMatches=true; +0888 end +0889 else +0890 %If matches should be read +0891 if ~strcmp(tline,' [No hits detected that satisfy reporting thresholds]') && ~isempty(tline) +0892 elements=regexp(tline,' ','split'); +0893 elements=elements(cellfun(@any,elements)); +0894 +0895 %Check if the match is below the treshhold +0896 score=str2double(elements{1}); +0897 gene=elements{9}; +0898 if score<=cutOff +0899 %If the score is exactly 0, change it to a very +0900 %small value to avoid NaN +0901 if score==0 +0902 score=10^-250; +0903 end +0904 %Check if the gene is added already and, is so, get +0905 %the best score for it +0906 I=hTable.get(gene); +0907 if any(I) +0908 koGeneMat(i,I)=score; +0909 else +0910 geneCounter=geneCounter+1; +0911 %The gene was not present yet so add it +0912 hTable.put(gene,geneCounter); +0913 genes{geneCounter}=gene; +0914 koGeneMat(i,geneCounter)=score; +0915 end +0916 end +0917 else +0918 break; +0919 end +0920 end +0921 end +0922 fclose(fid); +0923 end +0924 end +0925 fprintf('COMPLETE\n'); +0926 +0927 fprintf('Removing gene, KEGG Orthology associations below minScoreRatioKO, minScoreRatioG... '); +0928 koGeneMat=koGeneMat(:,1:geneCounter); +0929 +0930 %Remove the genes for each KO that are below minScoreRatioKO. +0931 for i=1:size(koGeneMat,1) +0932 J=find(koGeneMat(i,:)); +0933 if any(J) +0934 koGeneMat(i,J(log(koGeneMat(i,J))/log(min(koGeneMat(i,J)))<minScoreRatioKO))=0; +0935 end +0936 end +0937 +0938 %Remove the KOs for each gene that are below minScoreRatioG +0939 for i=1:size(koGeneMat,2) +0940 J=find(koGeneMat(:,i)); +0941 if any(J) +0942 koGeneMat(J(log(koGeneMat(J,i))/log(min(koGeneMat(J,i)))<minScoreRatioG),i)=0; +0943 end +0944 end +0945 fprintf('COMPLETE\n'); +0946 +0947 fprintf('Adding gene annotations to the model... '); +0948 %Create the new model +0949 model.genes=genes(1:geneCounter); +0950 model.grRules=cell(numel(model.rxns),1); +0951 model.grRules(:)={''}; +0952 model.rxnGeneMat=sparse(numel(model.rxns),numel(model.genes)); +0953 +0954 %Loop through the reactions and add the corresponding genes +0955 for i=1:numel(model.rxns) +0956 if isstruct(model.rxnMiriams{i}) +0957 %Get all KOs +0958 I=find(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology')); +0959 KOs=model.rxnMiriams{i}.value(I); +0960 %Find the KOs and the corresponding genes +0961 J=ismember(KOModel.rxns,KOs); +0962 [~, K]=find(koGeneMat(J,:)); +0963 +0964 if any(K) +0965 model.rxnGeneMat(i,K)=1; +0966 %Also delete KOs for which no genes were found. If no genes at +0967 %all were matched to the reaction it will be deleted later +0968 L=sum(koGeneMat(J,:),2)==0; +0969 model.rxnMiriams{i}.value(I(L))=[]; +0970 model.rxnMiriams{i}.name(I(L))=[]; +0971 end +0972 end +0973 end +0974 fprintf('COMPLETE\n'); +0975 +0976 %Find and delete all reactions without genes. This also removes genes that +0977 %are not used (which could happen because minScoreRatioG and +0978 %minScoreRatioKO). If keepSpontaneous==true, the spontaneous reactions +0979 %without genes are kept in the model. Spontaneous reactions with original +0980 %gene associations are treated in the same way, like the rest of the +0981 %reactions - if gene associations were removed during HMM search, such +0982 %reactions are deleted from the model +0983 if keepSpontaneous==true +0984 %Not the most comprise way to delete reactions without genes, but this +0985 %makes the code easier to understand. Firstly the non-spontaneous +0986 %reactions without genes are removed. After that, the second deletion +0987 %step removes spontaneous reactions, which had gene associations before +0988 %HMM search, but no longer have after it +0989 fprintf('Removing non-spontaneous reactions which after HMM search no longer have GPR rules... '); +0990 I=~any(model.rxnGeneMat,2)&~ismember(model.rxns,isSpontaneous); +0991 model=removeReactions(model,I,true,true); +0992 I=~any(model.rxnGeneMat,2)&ismember(model.rxns,spontRxnsWithGenes); +0993 model=removeReactions(model,I,true,true); +0994 else +0995 %Just simply check for any new reactions without genes and remove +0996 %it +0997 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); +0998 I=~any(model.rxnGeneMat,2); 0999 model=removeReactions(model,I,true,true); -1000 else -1001 %Just simply check for any new reactions without genes and remove -1002 %it -1003 fprintf('Removing reactions which after HMM search no longer have GPR rules... '); -1004 I=~any(model.rxnGeneMat,2); -1005 model=removeReactions(model,I,true,true); -1006 end -1007 fprintf('COMPLETE\n'); -1008 -1009 fprintf('Constructing GPR rules and finalizing the model... '); -1010 %Add the gene associations as 'or' -1011 for i=1:numel(model.rxns) -1012 %Find the involved genes -1013 I=find(model.rxnGeneMat(i,:)); -1014 if any(I) -1015 model.grRules{i}=['(' model.genes{I(1)}]; -1016 for j=2:numel(I) -1017 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; -1018 end -1019 model.grRules{i}=[model.grRules{i} ')']; -1020 end -1021 end -1022 -1023 %Fix grRules and reconstruct rxnGeneMat -1024 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output -1025 model.grRules = grRules; -1026 model.rxnGeneMat = rxnGeneMat; -1027 -1028 %Add the description to the reactions -1029 for i=1:numel(model.rxns) -1030 if ~isempty(model.rxnNotes{i}) -1031 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); -1032 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); -1033 else -1034 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; -1035 end -1036 end -1037 %Remove the temp fasta file -1038 delete(fastaFile) -1039 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); -1040 end -1041 -1042 function files=listFiles(directory) -1043 %Supporter function to list the files in a directory and return them as a -1044 %cell array -1045 temp=dir(directory); -1046 files=cell(numel(temp),1); -1047 for i=1:numel(temp) -1048 files{i}=temp(i,1).name; -1049 end -1050 files=strrep(files,'.fa',''); -1051 files=strrep(files,'.hmm',''); -1052 files=strrep(files,'.out',''); -1053 files=strrep(files,'.faw',''); -1054 end +1000 end +1001 fprintf('COMPLETE\n'); +1002 +1003 fprintf('Constructing GPR rules and finalizing the model... '); +1004 %Add the gene associations as 'or' +1005 for i=1:numel(model.rxns) +1006 %Find the involved genes +1007 I=find(model.rxnGeneMat(i,:)); +1008 if any(I) +1009 model.grRules{i}=['(' model.genes{I(1)}]; +1010 for j=2:numel(I) +1011 model.grRules{i}=[model.grRules{i} ' or ' model.genes{I(j)}]; +1012 end +1013 model.grRules{i}=[model.grRules{i} ')']; +1014 end +1015 end +1016 +1017 %Fix grRules and reconstruct rxnGeneMat +1018 [grRules,rxnGeneMat] = standardizeGrRules(model,false); %Give detailed output +1019 model.grRules = grRules; +1020 model.rxnGeneMat = rxnGeneMat; +1021 +1022 %Add the description to the reactions +1023 for i=1:numel(model.rxns) +1024 if ~isempty(model.rxnNotes{i}) +1025 model.rxnNotes(i)=strcat('Included by getKEGGModelForOrganism (using HMMs).',model.rxnNotes(i)); +1026 model.rxnNotes(i)=strrep(model.rxnNotes(i),'.','. '); +1027 else +1028 model.rxnNotes(i)={'Included by getKEGGModelForOrganism (using HMMs)'}; +1029 end +1030 end +1031 %Remove the temp fasta file +1032 delete(fastaFile) +1033 fprintf('COMPLETE\n\n*** Model reconstruction complete ***\n'); +1034 end +1035 +1036 function files=listFiles(directory) +1037 %Supporter function to list the files in a directory and return them as a +1038 %cell array +1039 temp=dir(directory); +1040 files=cell(numel(temp),1); +1041 for i=1:numel(temp) +1042 files{i}=temp(i,1).name; +1043 end +1044 files=strrep(files,'.fa',''); +1045 files=strrep(files,'.hmm',''); +1046 files=strrep(files,'.out',''); +1047 files=strrep(files,'.faw',''); +1048 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/kegg/getWSLpath.html b/doc/external/kegg/getWSLpath.html new file mode 100644 index 00000000..b5f665d1 --- /dev/null +++ b/doc/external/kegg/getWSLpath.html @@ -0,0 +1,80 @@ + + + + Description of getWSLpath + + + + + + + + + +
    Home > external > kegg > getWSLpath.m
    + + + +

    getWSLpath +

    + +

    PURPOSE ^

    +
    getWSLpath
    + +

    SYNOPSIS ^

    +
    function path=getWSLpath(path)
    + +

    DESCRIPTION ^

    +
     getWSLpath
    +   Translate Windows-style path to its Unix WSL (Windows Subsystem for
    +   Linux) equivalent.
    +
    +   Input:
    +   path        string with directory of file path, in Windows-style (e.g.
    +               'C:\Directory\')
    +
    +   Output:
    +   path        string with directory of file path, in Unix style (e.g.
    +               '/mnt/c/Directory/')
    +
    +   Uses the WSL function 'wslpath' to translate the path.
    +
    +   Usage: path=getWSLpath(path)
    + + +

    CROSS-REFERENCE INFORMATION ^

    +This function calls: + +This function is called by: + + + + + +

    SOURCE CODE ^

    +
    0001 function path=getWSLpath(path)
    +0002 % getWSLpath
    +0003 %   Translate Windows-style path to its Unix WSL (Windows Subsystem for
    +0004 %   Linux) equivalent.
    +0005 %
    +0006 %   Input:
    +0007 %   path        string with directory of file path, in Windows-style (e.g.
    +0008 %               'C:\Directory\')
    +0009 %
    +0010 %   Output:
    +0011 %   path        string with directory of file path, in Unix style (e.g.
    +0012 %               '/mnt/c/Directory/')
    +0013 %
    +0014 %   Uses the WSL function 'wslpath' to translate the path.
    +0015 %
    +0016 %   Usage: path=getWSLpath(path)
    +0017 [~,path]=system(['wsl wslpath ''' path '''']);
    +0018 path=path(1:end-1);% Remove final character (line-break)
    +0019 end
    +
    Generated by m2html © 2005
    + + \ No newline at end of file diff --git a/doc/external/kegg/index.html b/doc/external/kegg/index.html index a6620e9d..6bb246fa 100644 --- a/doc/external/kegg/index.html +++ b/doc/external/kegg/index.html @@ -19,7 +19,7 @@

    Index for external\kegg

    Matlab files in this directory:

    -
     constructMultiFastaconstructMultiFasta
     getGenesFromKEGGgetGenesFromKEGG
     getKEGGModelForOrganismgetKEGGModelForOrganism
     getMetsFromKEGGgetMetsFromKEGG
     getModelFromKEGGgetModelFromKEGG
     getPhylDistgetPhylDist
     getRxnsFromKEGGgetRxnsFromKEGG
    + constructMultiFastaconstructMultiFasta  getGenesFromKEGGgetGenesFromKEGG  getKEGGModelForOrganismgetKEGGModelForOrganism  getMetsFromKEGGgetMetsFromKEGG  getModelFromKEGGgetModelFromKEGG  getPhylDistgetPhylDist  getRxnsFromKEGGgetRxnsFromKEGG  getWSLpathgetWSLpath

    Other Matlab-specific files in this directory: