Skip to content

Commit

Permalink
Merge pull request bjtully#47 from bjtully/underdevelopment
Browse files Browse the repository at this point in the history
Updating coding for V1.2
  • Loading branch information
bjtully authored Aug 11, 2020
2 parents 1a41b7a + ce421ee commit ac8debd
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 6 deletions.
Binary file modified .DS_Store
Binary file not shown.
89 changes: 87 additions & 2 deletions KEGGDecoder/KEGG_decoder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#!/usr/bin/python

'''
KEGG-decoder.py V.1.1
KEGG-decoder.py V.1.2
V.1.2
Added several new pathways including PET degradation, carbon storage,
related to starch/gylcogen & polyhydroxybutyrate, and posphate storage,
related to the reversible polyphosphate reaction. Part of summer research
with Sheyla Aviles.
V.1.1
Correcting typos identified by Chris Neely. Adding more complete
pathways components for amino acid biosynthesis identified by
Expand Down Expand Up @@ -1357,6 +1362,78 @@ def amino_acids(ko_match):

return out_data

def plastic(ko_match):
out_data = {"PET degradation": 0}
#poly(ethylene terephthalate) hydrolase
#mono(ethylene terephthalate) hydrolase
#1,2-dihydroxy-3,5-cyclohexadiene-1,4-dicarboxylate dehydrogenase
petdeg = ["K21104", "K21105", "K18076"]
for i in petdeg:
if i in ko_match:
out_data["PET degradation"] += 0.25
#terephthalate 1,2-dioxygenase oxygenase
#two possible versions
if ("K18077" in ko_match) or ("K18074" in ko_match and "K18075" in ko_match):
out_data["PET degradation"] += 0.25

return out_data

def carbon_storage(ko_match):
out_data = {'starch/glycogen synthesis': 0, 'starch/glycogen degradation': 0, 'polyhydroxybutyrate synthesis': 0}
# starch synthesis
carbonsto = ["K00703", "K00975"]
for i in carbonsto:
if i in ko_match:
out_data["starch/glycogen synthesis"] += 0.33

if ('K00700' in ko_match) or ('K16149' in ko_match):
out_data['starch/glycogen synthesis'] += 0.33
#starch > D-glucose
#K21574 susB; glucan 1,4-alpha-glucosidase
if ('K21574' in ko_match):
out_data['starch/glycogen degradation'] = 1
#starch > cyclodextrin
#K00701 cgt; cyclomaltodextrin glucanotransferase
if ('K00701' in ko_match):
out_data['starch/glycogen degradation'] = 1
#starch > maltodextrin
#K01214 treX; isoamylase
if ('K01214' in ko_match):
out_data['starch/glycogen degradation'] = 1
#starch > glucose-6P
if ('K00688' in ko_match) or ('K16153' in ko_match) or ('K00705' in ko_match) or ('K22451' in ko_match) or ('K02438' in ko_match) or ('K01200' in ko_match):
out_data['starch/glycogen degradation'] = 1
#starch > dextrin
#alpha-amlyase
if ('K01176' in ko_match) or ('K05343' in ko_match):
out_data['starch/glycogen degradation'] = 1
#beta-amlyase
if 'K01177' in ko_match:
out_data['starch/glycogen degradation'] = 1
#maltogenic alpha-amylase
if ('K05992' in ko_match) or ('K01208' in ko_match):
out_data['starch/glycogen degradation'] = 1
if ('K00023' in ko_match):
out_data['polyhydroxybutyrate synthesis'] += 0.5
phb = ['K00626', 'K03821', 'K22881']
for i in phb:
if i in ko_match:
out_data['polyhydroxybutyrate synthesis'] += 0.167

return out_data


def phosphate_storage(ko_match):
out_data = {'bidirectional polyphosphate': 0}

if ('K00937' in ko_match) or ('K22468' in ko_match):
out_data['bidirectional polyphosphate'] += 0.5
if ('K01507' in ko_match) or ('K15986' in ko_match) or ('K06019' in ko_match):
out_data['bidirectional polyphosphate'] += 0.5

return out_data


def default_viz(genome_df, outfile_name):
import seaborn as sns
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -1485,7 +1562,9 @@ def main():
'threonine', 'asparagine', 'glutamine', 'cysteine',
'glycine', 'proline', 'alanine', 'valine',
'methionine', 'phenylalanine', 'isoleucine', 'leucine',
'tryptophan', 'tyrosine', 'aspartate', 'glutamate']
'tryptophan', 'tyrosine', 'aspartate', 'glutamate', 'PET degradation',
'starch/glycogen synthesis', 'starch/glycogen degradation', 'polyhydroxybutyrate synthesis',
'bidirectional polyphosphate']


filehandle = str(arg_dict['output'])
Expand Down Expand Up @@ -1530,6 +1609,9 @@ def main():
pathway_data.update(arsenic(genome_data[k]))
pathway_data.update(metal_transport(genome_data[k]))
pathway_data.update(amino_acids(genome_data[k]))
pathway_data.update(plastic(genome_data[k]))
pathway_data.update(carbon_storage(genome_data[k]))
pathway_data.update(phosphate_storage(genome_data[k]))
# print k, pathway_data

out_string = str(k)+"\t"
Expand Down Expand Up @@ -1562,14 +1644,17 @@ def main():

if arg_dict['vizoption'] == 'static':
from .KEGG_clustering import hClust_euclidean
#from KEGG_clustering import hClust_euclidean
if len(genome.index) >= 2 and not rearrange:
genome = hClust_euclidean(genome)
default_viz(genome, os.path.splitext(filehandle)[0] + ".svg")
if arg_dict['vizoption'] == 'interactive':
from .Plotly_viz import plotly_viz
#from Plotly_viz import plotly_viz
plotly_viz(genome, os.path.splitext(filehandle)[0] + ".html")
if arg_dict['vizoption'] == 'tanglegram':
from .MakeTanglegram import make_tanglegram
#from MakeTanglegram import make_tanglegram
if len(genome.index) >= 3:
make_tanglegram(genome, str(arg_dict['newick']), os.path.splitext(filehandle)[0] + ".tanglegram.svg", int(arg_dict["tangleopt"]))
else:
Expand Down
38 changes: 36 additions & 2 deletions KEGGDecoder/KOALA_definitions.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

V.1.2
Added several new pathways including PET degradation, carbon storage,
related to starch/gylcogen & polyhydroxybutyrate, and posphate storage,
related to the reversible polyphosphate reaction. Part of summer research
with Sheyla Aviles.
V.1.1
Correcting typos identified by Chris Neely. Adding more complete
pathways components for amino acid biosynthesis identified by
Expand Down Expand Up @@ -653,6 +658,35 @@ asparagine synthase (glutamine-hydrolysing) asparagine - polar uncharged side ch
K00811 ASP5; OR K00812 aspB; K00813 aspC; K11358 yhdR; K14454 GOT1; K14455 GOT2 aspartate aminotransferase glutamate - negative charge




Polyethylene terephthalate degradation
K21104 poly(ethylene terephthalate) hydrolase
K21105 mono(ethylene terephthalate) hydrolase
K18074 tphA2; terephthalate 1,2-dioxygenase oxygenase component alpha subunit AND K18075 tphA3; terephthalate 1,2-dioxygenase oxygenase component beta subunit OR K18077 tphA1; terephthalate 1,2-dioxygenase reductase component
K18076 tphB; 1,2-dihydroxy-3,5-cyclohexadiene-1,4-dicarboxylate dehydrogenase

Storage carbon (starch/glycogen) and phosphorous (polyphosphate)
STARCH Synthesis
K00703 glgA; starch synthase
K00975 glgC; glucose-1-phosphate adenylyltransferase
K00700 glgB; 1,4-alpha-glucan branching enzyme OR K16149 1,4-alpha-glucan branching enzyme
STARCH Degradation -- any pathway that converts glycogen will be included. Only recording 1 instance of transformation.
starch > glucose-6P
K00688 glgP; glycogen phosphorylase OR K16153 glycogen phosphorylase/synthase OR K00705 malQ; 4-alpha-glucanotransferase OR K22451 jgt; 4-alpha-glucanotransferase OR K02438 glgX; glycogen debranching enzyme OR K01200 pulA; pullulanase
starch > D-glucose
K21574 susB; glucan 1,4-alpha-glucosidase
starch > cyclodextrin
K00701 cgt; cyclomaltodextrin glucanotransferase
starch > maltodextrin
K01214 treX; isoamylase
starch > dextrin
K01176 amyA; alpha-amylase OR K07405 alpha-amylase OR K05343 treS; maltose alpha-D-glucosyltransferase / alpha-amylase
K01177 beta-amylase
K05992 amyM; maltogenic alpha-amylase OR K01208 nplT; cyclomaltodextrinase / maltogenic alpha-amylase / neopullulanase
POLYPHOSPHATE bidirectional
K00937 ppk; polyphosphate kinase OR K22468 ppk2; polyphosphate kinase
K01507 ppa; inorganic pyrophosphatase OR K15986 ppaC; manganese-dependent inorganic pyrophosphatase OR K06019 ppaX; pyrophosphatase PpaX
PHA/PHB synthesis/degradation
K00023 phbB; acetoacetyl-CoA reductase
K00626 phaA; acetyl-CoA C-acetyltransferase AND K03821 phaC; polyhydroxyalkanoate synthase subunit PhaC AND K22881 phaE; polyhydroxyalkanoate synthase subunit PhaE


11 changes: 10 additions & 1 deletion KEGGDecoder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ python Decode_and_Expand.py <FUNCTION_OUT.list> <HMM_OUT.list>

Change Log
================================================================
## V1.2
Added several new pathways including:

* PET degradation
* carbon storage, related to starch/gylcogen & polyhydroxybutyrate
* posphate storage, related to the reversible polyphosphate reaction.

Part of summer research with Sheyla Aviles.

## V1.1
Correcting typos identified by Chris Neely. Adding more complete
pathways components for amino acid biosynthesis identified by
Expand All @@ -131,7 +140,7 @@ Dr. Eric Webb
* phenylalanine added K01713 pheC; cyclohexadienyl dehydratase OR K05359 ADT; arogenate/prephenate dehydratase OR K04518 pheA2; prephenate dehydratase
* tyrosine added K00220 tyrC; cyclohexadieny/prephenate dehydrogenase OR K24018; cyclohexadieny/prephenate dehydrogenase OR K15226 tyrAa; arogenate dehydrogenase

## V1.0.10##
## V1.0.10 ##
Added the 20 amino acids. In most instances, only the last step in converting precusor to amino acid is assessed (except for valine, isoleucine, leucine, and tryptophan). The following amino acids share detection pathways:

* serine & glycine
Expand Down
2 changes: 1 addition & 1 deletion KEGGDecoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
"""


__version__ = "1.1"
__version__ = "1.2"

0 comments on commit ac8debd

Please sign in to comment.