Skip to content

Commit

Permalink
Added and updated docstrings.
Browse files Browse the repository at this point in the history
  • Loading branch information
sdhutchins committed May 22, 2019
1 parent f9f6fbc commit dca3e10
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 19 deletions.
25 changes: 18 additions & 7 deletions OrthoEvol/Orthologs/Blast/base_blastn.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,27 @@ def __init__(self, project, method, template=None, save_data=True, quiet=True, *
self.complete_time_file = self.project + '_TIME.csv'
self.complete_time_file_path = self.data / Path(self.complete_time_file)

self.blastn_parameters, self.query_config = self.select_method(method=self.method)
self.blastn_parameters, self.query_config = BaseBlastN.select_method(method=self.method)

def _make_blast_dir(self, gene, path):
"""Create a blast directory for a gene."""
"""Create a blast directory for a gene.
:param gene: A gene that will be run with blastn.
:param path: The path of the gene directory named with the gene name.
"""
try:
Path.mkdir(path, exist_ok=True, parents=True)
self.blastn_log.debug("Directory created for %s" % gene)
except FileExistsError:
self.blastn_log.debug("Directory exists for %s" % gene)

def _create_temp_fasta(self, query, gene, query_config):
"""Create a temporary fasta file using blastdbcmd."""
"""Create a temporary fasta file using blastdbcmd.
:param query: The blast query or reference accession.
:param gene: The gene name of the accession.
:param query_config: A configuration dict for the blastdbcmd string.
"""
try:
blastdbcmd_query = "blastdbcmd -entry {query} -db {db} -outfmt %f -out {temp fasta}".format(**query_config)
blastdbcmd_status = run(blastdbcmd_query, stdout=PIPE,
Expand All @@ -97,10 +106,11 @@ def _create_temp_fasta(self, query, gene, query_config):
with contextlib.suppress(ValueError):
self.current_gene_list.remove(gene)

def select_method(self, method):
@staticmethod
def select_method(method):
"""Select a method for running blastn.
:param method: a blast method - 1, 2, 3, or None
:param method: The blast method to use. Either 1, 2, or 3.
"""
# Local blast using seqidlist
if method == 1:
Expand Down Expand Up @@ -140,7 +150,7 @@ def select_method(self, method):
'temp fasta': ''}
else:
raise ValueError('%s is not a blast method.' % method)
return (blastn_parameters, query_config)
return blastn_parameters, query_config

def configure(self, query_accessions, query_organism, auto_start=False):
"""This method configures everything for our BLAST workflow.
Expand Down Expand Up @@ -227,7 +237,8 @@ def parse_xml(self, xml_path, gene, organism):
for hsp in hit.hsps:
# Find the highest scoring hit for each gene
if hsp.bitscore_raw > maximum:
# If the gene is a predicted non-coding RefSeq gene then go the the next hit
# If the gene is a predicted non-coding RefSeq gene
# then go the the next hit
# https://en.wikipedia.org/wiki/RefSeq
if "xr" in str(hit.id.lower()):
self.blastn_log.info("Encountered a predicted(X*_00000) "
Expand Down
26 changes: 15 additions & 11 deletions OrthoEvol/Orthologs/Blast/comparative_genetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ def __init__(self, project=None, project_path=os.getcwd(), acc_file=None,
downstream processing or for basic observation of the data.
:param project: The name of the project.
:param project_path: The location of the project, which is generally defined by the ProjectManagement configuration.
:param project_path: The location of the project, which is generally
defined by the ProjectManagement configuration.
:param acc_file: The name of the accession file.
:param taxon_file: A file that contains an ordered list of taxonomy ids.
:param pre_blast: A flag that gives the user access to an API that contains extra information about their genes
using the mygene package.
:param pre_blast: A flag that gives the user access to an API that
contains extra information about their genes using the
mygene package.
:param post_blast: A flag that is used to handle a BLAST result file, which returns information about misssing
data, duplicates, etc.
:param hgnc: A flag used as a placeholder for future work with HGNC files.
Expand Down Expand Up @@ -121,7 +123,8 @@ def __init__(self, project=None, project_path=os.getcwd(), acc_file=None,
self.taxon_path = self.project_index / Path(self.__taxon_filename)
# Handle the master accession file (could be before or after blast)
if self.copy_from_package:
shutil.copy(pkg_resources.resource_filename(data.__name__, kwargs['MAF']), str(self.project_index))
shutil.copy(pkg_resources.resource_filename(data.__name__, kwargs['MAF']),
str(self.project_index))
self.acc_file = self.MAF = kwargs['MAF']
self.acc_filename = self.acc_file
if self.acc_file is not None:
Expand Down Expand Up @@ -169,12 +172,14 @@ def __init__(self, project=None, project_path=os.getcwd(), acc_file=None,
self.building = pd.read_csv(str(self.acc_path), dtype=str)
del self.building['Tier']
del self.building[self.species]
self.building = self.building.set_index('Gene') # Object for good user output
# Object for good user output
self.building = self.building.set_index('Gene')
self.building_file_path = self.data / Path(self.building_filename)

# Blast time points
# Master time file for the blast
self.building_time_filename = self.building_filename.replace(
'building.csv', 'building_time.csv') # Master time file for the blast
'building.csv', 'building_time.csv')
self.building_time = pd.read_csv(str(self.acc_path), dtype=str)
del self.building_time['Tier']
del self.building_time[self.species]
Expand Down Expand Up @@ -218,8 +223,8 @@ def __init__(self, project=None, project_path=os.getcwd(), acc_file=None,
self.gene_dict = self.df.T.to_dict()
self.get_master_lists(self.__data) # populates our lists
else:
self.building_filename = str(self.project + 'building.csv')
self.building_time_filename = str(self.project + 'building_time.csv')
self.building_filename = str(self.project + '_building.csv')
self.building_time_filename = str(self.project + '_building_time.csv')


# //TODO-ROB Add HGNC python module
Expand All @@ -229,9 +234,8 @@ def get_file_list(file):
:param file: Name of csv file.
"""

data = pd.read_csv(file, header=None)
file_list = list(data[0])
file_data = pd.read_csv(file, header=None)
file_list = list(file_data[0])
return file_list

def get_master_lists(self, df, csv_file=None):
Expand Down
4 changes: 3 additions & 1 deletion OrthoEvol/Orthologs/Blast/orthologs_blastn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class OrthoBlastN(BaseBlastN):
def __init__(self, project="orthology-gpcr", method=3, template=None,
save_data=True, acc_file="gpcr.csv", copy_from_package=True,
**kwargs):
"""This class inherits from the CompGenFiles class.
"""This class inherits from the BaseBlastN class.
This class utilizes it's parent classes to search a standalone
Blast database for specific orthologs of a gene using a query organism
Expand All @@ -21,6 +21,7 @@ def __init__(self, project="orthology-gpcr", method=3, template=None,
:param template: The accession file template.
:param save_data: A flag for saving the post_blast data to an excel file.
:param acc_file: The accession file to use. (Default: 'karg.csv')
:param copy_from_package: Copy the acc_file from the package. (Default: True)
:param kwargs:
"""
# Set values for methods to prevent using a config.
Expand All @@ -43,4 +44,5 @@ def __init__(self, project="orthology-gpcr", method=3, template=None,
proj_mana=self.proj_mana, **kwargs)

def run(self):
"""Run the blast using a default configuration."""
self.configure(self.blast_human, self.species, auto_start=True)

0 comments on commit dca3e10

Please sign in to comment.