diff --git a/server/conversion/cgivar_to_gff.py b/server/conversion/cgivar_to_gff.py index c4d4f0d..9781121 100644 --- a/server/conversion/cgivar_to_gff.py +++ b/server/conversion/cgivar_to_gff.py @@ -5,39 +5,21 @@ The files should be interpretable by GET-Evidence's genome processing system. To see command line usage, run with "-h" or "--help". """ +import os import re import sys -import bz2 -import gzip -import zipfile from optparse import OptionParser +GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +if not GETEV_MAIN_PATH in sys.path: + sys.path.insert(1, GETEV_MAIN_PATH) +del GETEV_MAIN_PATH + +from utils import autozip + DEFAULT_BUILD = "b36" DEFAULT_SOFTWARE_VER = "2.0.1.5" -def autozip_file_open(filename, mode='r'): - """Return file obj, with compression if appropriate extension is given""" - if re.search("\.zip", filename): - archive = zipfile.ZipFile(filename, mode) - if mode == 'r': - files = archive.infolist() - if len(files) == 1: - if hasattr(archive, "open"): - return archive.open(files[0]) - else: - sys.exit("zipfile.ZipFile.open not available. Upgrade " \ - + "python to 2.6 to work with zip-compressed files!") - else: - sys.exit("Zip archive " + filename + " has more than one file!") - else: - sys.exit("Zip archive only supported for reading.") - elif re.search("\.gz", filename): - return gzip.GzipFile(filename, mode) - elif re.search("\.bz2", filename): - return bz2.BZ2File(filename, mode) - else: - return open(filename, mode) - def process_full_position(data, software_ver): """Return GFF-formated string for when all alleles called on same line""" chrom, begin, end, feat_type, ref_allele, var_allele = data[3:9] @@ -184,7 +166,7 @@ def convert(cgi_input, options=None): # Set up CGI input. Default is to assume a str generator. cgi_data = cgi_input if isinstance(cgi_input, str): - cgi_data = autozip_file_open(cgi_input, 'r') + cgi_data = autozip.file_open(cgi_input, 'r') build = DEFAULT_BUILD software_ver = DEFAULT_SOFTWARE_VER @@ -255,7 +237,7 @@ def convert_to_file(cgi_input, output_file): """Convert a CGI var file and output GFF-formatted data to file""" output = output_file # default assumes writable file object if isinstance(output_file, str): - output = autozip_file_open(output_file, 'w') + output = autozip.file_open(output_file, 'w') conversion = convert(cgi_input) # set up generator for line in conversion: output.write(line + "\n") diff --git a/server/conversion/gff_from_23andme.py b/server/conversion/gff_from_23andme.py index 4685e65..2a325be 100644 --- a/server/conversion/gff_from_23andme.py +++ b/server/conversion/gff_from_23andme.py @@ -6,6 +6,7 @@ To see command line usage, run with "-h" or "--help". """ +import os import re import sys import bz2 @@ -13,38 +14,20 @@ import zipfile from optparse import OptionParser -DEFAULT_BUILD = "b36" +GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +if not GETEV_MAIN_PATH in sys.path: + sys.path.insert(1, GETEV_MAIN_PATH) +del GETEV_MAIN_PATH -def autozip_file_open(filename, mode='r'): - """Return file obj, with compression if appropriate extension is given""" - if re.search("\.zip", filename): - archive = zipfile.ZipFile(filename, mode) - if mode == 'r': - files = archive.infolist() - if len(files) == 1: - if hasattr(archive, "open"): - return archive.open(files[0]) - else: - sys.exit("zipfile.ZipFile.open not available. " + - "Upgrade python to 2.6 to work with " + - "zip-compressed files!") - else: - sys.exit("Zip archive " + filename + - " has more than one file!") - else: - sys.exit("Zip archive only supported for reading.") - elif re.search("\.gz", filename): - return gzip.GzipFile(filename, mode) - elif re.search("\.bz2", filename): - return bz2.BZ2File(filename, mode) - else: - return open(filename, mode) +from utils import autozip + +DEFAULT_BUILD = "b36" def convert(genotype_input): """Take in 23andme genotype data, yield GFF formatted lines""" genotype_data = genotype_input if isinstance(genotype_input, str): - genotype_data = autozip_file_open(genotype_input, 'r') + genotype_data = autozip.file_open(genotype_input, 'r') build = DEFAULT_BUILD header_done = False for line in genotype_data: @@ -88,7 +71,7 @@ def convert_to_file(genotype_input, output_file): """Convert a 23andme file and output GFF-formatted data to file""" output = output_file # default assumes writable file object if isinstance(output_file, str): - output = autozip_file_open(output_file, 'w') + output = autozip.file_open(output_file, 'w') conversion = convert(genotype_input) for line in conversion: output.write(line + "\n") diff --git a/server/conversion/vcf_to_gff.py b/server/conversion/vcf_to_gff.py index 9678e2b..e5f2bba 100644 --- a/server/conversion/vcf_to_gff.py +++ b/server/conversion/vcf_to_gff.py @@ -5,6 +5,7 @@ The files should be interpretable by GET-Evidence's genome processing system. To see command line usage, run with "-h" or "--help". """ +import os import re import sys import bz2 @@ -12,32 +13,14 @@ import zipfile from optparse import OptionParser +GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +if not GETEV_MAIN_PATH in sys.path: + sys.path.insert(1, GETEV_MAIN_PATH) +del GETEV_MAIN_PATH + +from utils import autozip + DEFAULT_BUILD = "b36" -DEFAULT_SOFTWARE_VER = "2.0.1.5" - - -def autozip_file_open(filename, mode='r'): - """Return file obj, with compression if appropriate extension is given""" - if re.search("\.zip", filename): - archive = zipfile.ZipFile(filename, mode) - if mode == 'r': - files = archive.infolist() - if len(files) == 1: - if hasattr(archive, "open"): - return archive.open(files[0]) - else: - sys.exit("zipfile.ZipFile.open not available. Upgrade " \ - + "python to 2.6 to work with zip-compressed files!") - else: - sys.exit("Zip archive " + filename + " has more than one file!") - else: - sys.exit("Zip archive only supported for reading.") - elif re.search("\.gz", filename): - return gzip.GzipFile(filename, mode) - elif re.search("\.bz2", filename): - return bz2.BZ2File(filename, mode) - else: - return open(filename, mode) def process_header(vcf_line, build): @@ -50,7 +33,6 @@ def process_header(vcf_line, build): build = 'b36' return build - def process_info(info_str): """Process "info" column of VCF and return dict""" data = info_str.split(';') @@ -118,7 +100,7 @@ def convert(vcf_input, options=None): # Set up VCF input. Default is to assume a str generator. vcf_data = vcf_input if isinstance(vcf_input, str): - vcf_data = autozip_file_open(vcf_input, 'r') + vcf_data = autozip.file_open(vcf_input, 'r') build = DEFAULT_BUILD header_done = False @@ -156,7 +138,7 @@ def convert_to_file(vcf_input, output_file): """Convert a VCF file and output GFF-formatted data to file""" output = output_file # default assumes writable file object if isinstance(output_file, str): - output = autozip_file_open(output_file, 'w') + output = autozip.file_open(output_file, 'w') conversion = convert(vcf_input) # set up generator for line in conversion: output.write(line + "\n")