Skip to content

Commit

Permalink
Added missing docstrings.
Browse files Browse the repository at this point in the history
  • Loading branch information
sdhutchins committed Jun 7, 2019
1 parent 1e2cbc4 commit fe83f5b
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 30 deletions.
11 changes: 6 additions & 5 deletions OrthoEvol/Manager/database_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,13 @@ def download_blast_database(self, database_name="refseq_rna", v5=True, set_blast
For other types of blast data, please see the NCBIREADME.md file.
:param database_name: A string that represents a pattern in the files of interest.
:type database_name: string.
:param database_name: A string that represents a pattern in the files of interest, defaults to "refseq_rna"
:type database_name: str, optional
:param v5: A flag that determines which version of blastdb to use, defaults to True
:type v5: bool, optional
:param set_blastdb: A flag that determines whether the BLASTDB environment
variable is automatically set.
:type set_blastdb: bool.
:type set_blastdb: bool, optional
"""
# <path>/<user or basic_project>/databases/NCBI/blast/db/<database_name>
dl_path = Path(self.database_path) / Path("NCBI") / Path("blast") / Path("db")
Expand Down Expand Up @@ -333,8 +335,7 @@ def PROJECTS(self, **kwargs):
print(self)
return {}, {}

def full(self, NCBI, ITIS, Projects=None,
configure_flag=None, archive_flag=None, delete_flag=None, project_flag=None, _path=None):
def full(self, NCBI, ITIS, Projects=None, configure_flag=None, archive_flag=None, delete_flag=None, project_flag=None, _path=None):
"""
The most generalized strategy available. This configures everything. The 3 primary flags (configure, archive,
and delete) will be passed down to the more specific strategies, which will inherit these values unless
Expand Down
23 changes: 20 additions & 3 deletions OrthoEvol/Orthologs/Blast/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,23 @@ def select_method(method=1):
return blastn_parameters, query_config

def blastn_wrapper(self, gene, organism, parameters, xml_path, gene_path):
"""Run the Ncbiblastncommandline wrapper modified by this package."""
"""Use NCBI's blastn wrapper to run blast.
The function includes a try/except to ensure that errors are caught
and that if a blast stops while blasting, incomplete files will be
removed.
:param gene: The input gene for the blast run.
:type gene: str
:param organism: The organism to retrieve a hit for.
:type organism: str
:param parameters: A dictionary of blastn parameters.
:type parameters: dict
:param xml_path: The path to the xml output file.
:type xml_path: str
:param gene_path: The path to the gene's directory.
:type gene_path: str
"""
try:
self.blastn_log.info('Blast run has started.')
start_time = self.get_time()
Expand Down Expand Up @@ -207,7 +223,8 @@ def blastn_wrapper(self, gene, organism, parameters, xml_path, gene_path):
raise

def configure(self, query_accessions, query_organism, auto_start=False):
"""This method configures everything for our BLAST workflow.
"""Configure the BLAST workflow.
It configures the accession file, which works with interrupted Blasts.
It configures a gene_list for blasting the right genes.
Expand Down Expand Up @@ -402,7 +419,7 @@ def runblast(self, genes=None, query_organism=None, pre_configured=False):
self.blastn_log.error('%s was deleted' % xml)

else:
# Set up blast parameters
# Set up blast parameters
query_seq_path = str(gene_path / Path('temp.fasta'))
# Add blastn parameters for each method to dict
if self.method == 1:
Expand Down
5 changes: 3 additions & 2 deletions OrthoEvol/Orthologs/Blast/blastn_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def __init__(self, cmd="blastn", **kwargs):
"""Discontiguous MegaBLAST template type (string).
Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
Requires: template_length.""",
checker_function=lambda value: value in ['coding', 'coding_and_optimal', 'optimal'],
checker_function=lambda value: value in [
'coding', 'coding_and_optimal', 'optimal'],
equate=False),
_Option(["-template_length", "template_length"],
"""Discontiguous MegaBLAST template length (integer).
Expand Down Expand Up @@ -128,4 +129,4 @@ def __init__(self, cmd="blastn", **kwargs):
""",
equate=False),
]
_NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
_NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
21 changes: 11 additions & 10 deletions OrthoEvol/Tools/ftp/baseftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ class BaseFTPClient(object):
"""

def __init__(self, ftpsite, user, password, keepalive=False, debug_lvl=0):
"""Connect to a ftp site using an email address.
:param ftpsite: Address of the ftp site you want to connect to.
:param email: Input your email address.
:param keepalive: Flag to determine whether to keepalive the connection
(Default value = False)
:type keepalive: bool
:param debug_lvl: Verbosity level for debugging ftp connection
(Default value = 0)
:type debug_lvl: int
"""Connect to a ftp site using a username and password.
:param ftpsite: The url or http address of the ftp site you want to connect to.
:param user: The name of the user that will log in.
:type user: str
:param password: The password needed to log in to the ftp site.
:type password: str
:param keepalive: Flag to determine whether to keepalive the connection, defaults to False
:type keepalive: bool, optional
:param debug_lvl: Verbosity level for debugging ftp connection, defaults to 0
:type debug_lvl: int, optional
"""
self._ftpsite = ftpsite
self._user = user
Expand Down
41 changes: 31 additions & 10 deletions OrthoEvol/Tools/ftp/ncbiftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@ class NcbiFTPClient(BaseFTPClient):
"""Access NCBI's FTP servers with ease."""

def __init__(self, email):
"""Initialize the NcbiFTPClient.
:param email: A valid email address to use a password.
:type email: str
"""
_ncbi = 'ftp.ncbi.nlm.nih.gov'
super().__init__(_ncbi, user="anonymous", password=email)
self._datafmt = '%m-%d-%Y@%I:%M:%S-%p'
self._date = str(datetime.now().strftime(self._datafmt))
self._datefmt = '%m-%d-%Y@%I:%M:%S-%p'
self._date = str(datetime.now().strftime(self._datefmt))
self.blastpath = '/blast/'
self.blastdb_path = '/blast/db/'
self.blastdbv5_path = '/blast/db/v5/'
Expand All @@ -38,7 +43,7 @@ def __init__(self, email):
self.windowmasker_path = self.blastpath + 'windowmasker_files/'
self.cpus = 1

# Located in self.blastdb_path
# Located in the blastdb path
self._taxdb = ['taxdb.tar.gz', 'taxdb.tar.gz.md5']

# TODO Turn into a json file, dict, or config
Expand Down Expand Up @@ -117,7 +122,11 @@ def download_file(self, filename):
self.ncbiftp_log.info('%s was downloaded.' % str(filename))

def _download_windowmasker(self, windowmaskerfile):
"""Download the window masker files."""
"""Download the window masker files.
:param windowmaskerfile: A path to a window masker file to download.
:type windowmaskerfile: str
"""
wm = windowmaskerfile.split(sep='.')
taxid = wm[0]
wm_ext = wm[1]
Expand Down Expand Up @@ -226,7 +235,20 @@ def getwindowmaskerfiles(self, taxonomy_ids, download_path):

def getrefseqrelease(self, collection_subset, seqtype, seqformat, download_path,
extract=True):
"""Download the refseq release database."""
"""Download the refseq release database.
:param collection_subset: [description]
:type collection_subset: [type]
:param seqtype: [description]
:type seqtype: [type]
:param seqformat: [description]
:type seqformat: [type]
:param download_path: [description]
:type download_path: [type]
:param extract: [description], defaults to True
:type extract: bool, optional
:raises FileNotFoundError: [description]
"""
self.ftp = self._login()
self.ftp.cwd(self.refseqrelease_path)
taxon_dirs = self.listdirectories(self.refseqrelease_path)
Expand All @@ -240,8 +262,8 @@ def getrefseqrelease(self, collection_subset, seqtype, seqformat, download_path,
releasefiles = self.listfiles(curpath)

self.files2download = []
pattern = re.compile('^' + collection_subset + '[.](.*?)[.]' + seqtype
+ '[.]' + seqformat + '[.]gz$')
pattern = re.compile('^' + collection_subset +
'[.](.*?)[.]' + seqtype + '[.]' + seqformat + '[.]gz$')
for releasefile in releasefiles:
if re.match(pattern, releasefile):
self.files2download.append(releasefile)
Expand Down Expand Up @@ -276,8 +298,8 @@ def getrefseqrelease(self, collection_subset, seqtype, seqformat, download_path,
def getblastfasta(self, database_name, download_path, extract=True):
"""Download the fasta sequence database (not formatted).
:param database_name:
:param download_path:
:param database_name: The name of the database to download.
:param download_path: The path to download the database to.
:param extract: (Default value = True)
"""

Expand Down Expand Up @@ -370,7 +392,6 @@ def getblastdb(self, database_name, download_path, v5=True, extract=True):

except RuntimeError as err:
self.ncbiftp_log.error(err)
self.cpus = 1
# Try to download again
self.ncbiftp_log.warning('Attempting to download again.')
with ThreadPool(self.cpus) as download_pool:
Expand Down

0 comments on commit fe83f5b

Please sign in to comment.