Standardizing to all use server/utils/autozip.py

tomclegg · Aug 26, 2012 · c26785f · c26785f
1 parent f680f76
commit c26785f
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 83 deletions.
diff --git a/server/conversion/cgivar_to_gff.py b/server/conversion/cgivar_to_gff.py
@@ -5,39 +5,21 @@
 The files should be interpretable by GET-Evidence's genome processing system.
 To see command line usage, run with "-h" or "--help".
 """
+import os
 import re
 import sys
-import bz2
-import gzip
-import zipfile
 from optparse import OptionParser
 
+GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if not GETEV_MAIN_PATH in sys.path:
+    sys.path.insert(1, GETEV_MAIN_PATH)
+del GETEV_MAIN_PATH
+
+from utils import autozip
+
 DEFAULT_BUILD = "b36"
 DEFAULT_SOFTWARE_VER = "2.0.1.5"
 
-def autozip_file_open(filename, mode='r'):
-    """Return file obj, with compression if appropriate extension is given"""
-    if re.search("\.zip", filename):
-        archive = zipfile.ZipFile(filename, mode)
-        if mode == 'r':
-            files = archive.infolist()
-            if len(files) == 1:
-                if hasattr(archive, "open"):
-                    return archive.open(files[0])
-                else:
-                    sys.exit("zipfile.ZipFile.open not available. Upgrade " \
-                        + "python to 2.6 to work with zip-compressed files!")
-            else:
-                sys.exit("Zip archive " + filename + " has more than one file!")
-        else:
-            sys.exit("Zip archive only supported for reading.")
-    elif re.search("\.gz", filename):
-        return gzip.GzipFile(filename, mode)
-    elif re.search("\.bz2", filename):
-        return bz2.BZ2File(filename, mode)
-    else:
-        return open(filename, mode)
-
 def process_full_position(data, software_ver):
     """Return GFF-formated string for when all alleles called on same line"""
     chrom, begin, end, feat_type, ref_allele, var_allele = data[3:9]
@@ -184,7 +166,7 @@ def convert(cgi_input, options=None):
     # Set up CGI input. Default is to assume a str generator.
     cgi_data = cgi_input
     if isinstance(cgi_input, str): 
-        cgi_data = autozip_file_open(cgi_input, 'r')
+        cgi_data = autozip.file_open(cgi_input, 'r')
 
     build = DEFAULT_BUILD
     software_ver = DEFAULT_SOFTWARE_VER
@@ -255,7 +237,7 @@ def convert_to_file(cgi_input, output_file):
     """Convert a CGI var file and output GFF-formatted data to file"""
     output = output_file  # default assumes writable file object
     if isinstance(output_file, str): 
-        output = autozip_file_open(output_file, 'w')
+        output = autozip.file_open(output_file, 'w')
     conversion = convert(cgi_input)  # set up generator
     for line in conversion:
         output.write(line + "\n")

diff --git a/server/conversion/gff_from_23andme.py b/server/conversion/gff_from_23andme.py
@@ -6,45 +6,28 @@
 To see command line usage, run with "-h" or "--help".
 """
 
+import os
 import re
 import sys
 import bz2
 import gzip
 import zipfile
 from optparse import OptionParser
 
-DEFAULT_BUILD = "b36"
+GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if not GETEV_MAIN_PATH in sys.path:
+    sys.path.insert(1, GETEV_MAIN_PATH)
+del GETEV_MAIN_PATH
 
-def autozip_file_open(filename, mode='r'):
-    """Return file obj, with compression if appropriate extension is given"""
-    if re.search("\.zip", filename):
-        archive = zipfile.ZipFile(filename, mode)
-        if mode == 'r':
-            files = archive.infolist()
-            if len(files) == 1:
-                if hasattr(archive, "open"):
-                    return archive.open(files[0])
-                else:
-                    sys.exit("zipfile.ZipFile.open not available. " +
-                             "Upgrade python to 2.6 to work with " +
-                             "zip-compressed files!")
-            else:
-                sys.exit("Zip archive " + filename + 
-                         " has more than one file!")
-        else:
-            sys.exit("Zip archive only supported for reading.")
-    elif re.search("\.gz", filename):
-        return gzip.GzipFile(filename, mode)
-    elif re.search("\.bz2", filename):
-        return bz2.BZ2File(filename, mode)
-    else:
-        return open(filename, mode)
+from utils import autozip
+
+DEFAULT_BUILD = "b36"
 
 def convert(genotype_input):
     """Take in 23andme genotype data, yield GFF formatted lines"""
     genotype_data = genotype_input
     if isinstance(genotype_input, str):
-        genotype_data = autozip_file_open(genotype_input, 'r')
+        genotype_data = autozip.file_open(genotype_input, 'r')
     build = DEFAULT_BUILD
     header_done = False
     for line in genotype_data:
@@ -88,7 +71,7 @@ def convert_to_file(genotype_input, output_file):
     """Convert a 23andme file and output GFF-formatted data to file"""
     output = output_file  # default assumes writable file object
     if isinstance(output_file, str):
-        output = autozip_file_open(output_file, 'w')
+        output = autozip.file_open(output_file, 'w')
     conversion = convert(genotype_input)
     for line in conversion:
         output.write(line + "\n")

diff --git a/server/conversion/vcf_to_gff.py b/server/conversion/vcf_to_gff.py
@@ -5,39 +5,22 @@
 The files should be interpretable by GET-Evidence's genome processing system.
 To see command line usage, run with "-h" or "--help".
 """
+import os
 import re
 import sys
 import bz2
 import gzip
 import zipfile
 from optparse import OptionParser
 
+GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if not GETEV_MAIN_PATH in sys.path:
+    sys.path.insert(1, GETEV_MAIN_PATH)
+del GETEV_MAIN_PATH
+
+from utils import autozip
+
 DEFAULT_BUILD = "b36"
-DEFAULT_SOFTWARE_VER = "2.0.1.5"
-
-
-def autozip_file_open(filename, mode='r'):
-    """Return file obj, with compression if appropriate extension is given"""
-    if re.search("\.zip", filename):
-        archive = zipfile.ZipFile(filename, mode)
-        if mode == 'r':
-            files = archive.infolist()
-            if len(files) == 1:
-                if hasattr(archive, "open"):
-                    return archive.open(files[0])
-                else:
-                    sys.exit("zipfile.ZipFile.open not available. Upgrade " \
-                        + "python to 2.6 to work with zip-compressed files!")
-            else:
-                sys.exit("Zip archive " + filename + " has more than one file!")
-        else:
-            sys.exit("Zip archive only supported for reading.")
-    elif re.search("\.gz", filename):
-        return gzip.GzipFile(filename, mode)
-    elif re.search("\.bz2", filename):
-        return bz2.BZ2File(filename, mode)
-    else:
-        return open(filename, mode)
 
 
 def process_header(vcf_line, build):
@@ -50,7 +33,6 @@ def process_header(vcf_line, build):
             build = 'b36'
     return build
 
-
 def process_info(info_str):
     """Process "info" column of VCF and return dict"""
     data = info_str.split(';')
@@ -118,7 +100,7 @@ def convert(vcf_input, options=None):
     # Set up VCF input. Default is to assume a str generator.
     vcf_data = vcf_input
     if isinstance(vcf_input, str): 
-        vcf_data = autozip_file_open(vcf_input, 'r')
+        vcf_data = autozip.file_open(vcf_input, 'r')
 
     build = DEFAULT_BUILD
     header_done = False
@@ -156,7 +138,7 @@ def convert_to_file(vcf_input, output_file):
     """Convert a VCF file and output GFF-formatted data to file"""
     output = output_file  # default assumes writable file object
     if isinstance(output_file, str): 
-        output = autozip_file_open(output_file, 'w')
+        output = autozip.file_open(output_file, 'w')
     conversion = convert(vcf_input)  # set up generator
     for line in conversion:
         output.write(line + "\n")