Skip to content

Commit

Permalink
Standardizing to all use server/utils/autozip.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Madeleine Price Ball committed Aug 26, 2012
1 parent f680f76 commit c26785f
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 83 deletions.
38 changes: 10 additions & 28 deletions server/conversion/cgivar_to_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,21 @@
The files should be interpretable by GET-Evidence's genome processing system.
To see command line usage, run with "-h" or "--help".
"""
import os
import re
import sys
import bz2
import gzip
import zipfile
from optparse import OptionParser

GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if not GETEV_MAIN_PATH in sys.path:
sys.path.insert(1, GETEV_MAIN_PATH)
del GETEV_MAIN_PATH

from utils import autozip

DEFAULT_BUILD = "b36"
DEFAULT_SOFTWARE_VER = "2.0.1.5"

def autozip_file_open(filename, mode='r'):
"""Return file obj, with compression if appropriate extension is given"""
if re.search("\.zip", filename):
archive = zipfile.ZipFile(filename, mode)
if mode == 'r':
files = archive.infolist()
if len(files) == 1:
if hasattr(archive, "open"):
return archive.open(files[0])
else:
sys.exit("zipfile.ZipFile.open not available. Upgrade " \
+ "python to 2.6 to work with zip-compressed files!")
else:
sys.exit("Zip archive " + filename + " has more than one file!")
else:
sys.exit("Zip archive only supported for reading.")
elif re.search("\.gz", filename):
return gzip.GzipFile(filename, mode)
elif re.search("\.bz2", filename):
return bz2.BZ2File(filename, mode)
else:
return open(filename, mode)

def process_full_position(data, software_ver):
"""Return GFF-formated string for when all alleles called on same line"""
chrom, begin, end, feat_type, ref_allele, var_allele = data[3:9]
Expand Down Expand Up @@ -184,7 +166,7 @@ def convert(cgi_input, options=None):
# Set up CGI input. Default is to assume a str generator.
cgi_data = cgi_input
if isinstance(cgi_input, str):
cgi_data = autozip_file_open(cgi_input, 'r')
cgi_data = autozip.file_open(cgi_input, 'r')

build = DEFAULT_BUILD
software_ver = DEFAULT_SOFTWARE_VER
Expand Down Expand Up @@ -255,7 +237,7 @@ def convert_to_file(cgi_input, output_file):
"""Convert a CGI var file and output GFF-formatted data to file"""
output = output_file # default assumes writable file object
if isinstance(output_file, str):
output = autozip_file_open(output_file, 'w')
output = autozip.file_open(output_file, 'w')
conversion = convert(cgi_input) # set up generator
for line in conversion:
output.write(line + "\n")
Expand Down
37 changes: 10 additions & 27 deletions server/conversion/gff_from_23andme.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,28 @@
To see command line usage, run with "-h" or "--help".
"""

import os
import re
import sys
import bz2
import gzip
import zipfile
from optparse import OptionParser

DEFAULT_BUILD = "b36"
GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if not GETEV_MAIN_PATH in sys.path:
sys.path.insert(1, GETEV_MAIN_PATH)
del GETEV_MAIN_PATH

def autozip_file_open(filename, mode='r'):
"""Return file obj, with compression if appropriate extension is given"""
if re.search("\.zip", filename):
archive = zipfile.ZipFile(filename, mode)
if mode == 'r':
files = archive.infolist()
if len(files) == 1:
if hasattr(archive, "open"):
return archive.open(files[0])
else:
sys.exit("zipfile.ZipFile.open not available. " +
"Upgrade python to 2.6 to work with " +
"zip-compressed files!")
else:
sys.exit("Zip archive " + filename +
" has more than one file!")
else:
sys.exit("Zip archive only supported for reading.")
elif re.search("\.gz", filename):
return gzip.GzipFile(filename, mode)
elif re.search("\.bz2", filename):
return bz2.BZ2File(filename, mode)
else:
return open(filename, mode)
from utils import autozip

DEFAULT_BUILD = "b36"

def convert(genotype_input):
"""Take in 23andme genotype data, yield GFF formatted lines"""
genotype_data = genotype_input
if isinstance(genotype_input, str):
genotype_data = autozip_file_open(genotype_input, 'r')
genotype_data = autozip.file_open(genotype_input, 'r')
build = DEFAULT_BUILD
header_done = False
for line in genotype_data:
Expand Down Expand Up @@ -88,7 +71,7 @@ def convert_to_file(genotype_input, output_file):
"""Convert a 23andme file and output GFF-formatted data to file"""
output = output_file # default assumes writable file object
if isinstance(output_file, str):
output = autozip_file_open(output_file, 'w')
output = autozip.file_open(output_file, 'w')
conversion = convert(genotype_input)
for line in conversion:
output.write(line + "\n")
Expand Down
38 changes: 10 additions & 28 deletions server/conversion/vcf_to_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,22 @@
The files should be interpretable by GET-Evidence's genome processing system.
To see command line usage, run with "-h" or "--help".
"""
import os
import re
import sys
import bz2
import gzip
import zipfile
from optparse import OptionParser

GETEV_MAIN_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if not GETEV_MAIN_PATH in sys.path:
sys.path.insert(1, GETEV_MAIN_PATH)
del GETEV_MAIN_PATH

from utils import autozip

DEFAULT_BUILD = "b36"
DEFAULT_SOFTWARE_VER = "2.0.1.5"


def autozip_file_open(filename, mode='r'):
"""Return file obj, with compression if appropriate extension is given"""
if re.search("\.zip", filename):
archive = zipfile.ZipFile(filename, mode)
if mode == 'r':
files = archive.infolist()
if len(files) == 1:
if hasattr(archive, "open"):
return archive.open(files[0])
else:
sys.exit("zipfile.ZipFile.open not available. Upgrade " \
+ "python to 2.6 to work with zip-compressed files!")
else:
sys.exit("Zip archive " + filename + " has more than one file!")
else:
sys.exit("Zip archive only supported for reading.")
elif re.search("\.gz", filename):
return gzip.GzipFile(filename, mode)
elif re.search("\.bz2", filename):
return bz2.BZ2File(filename, mode)
else:
return open(filename, mode)


def process_header(vcf_line, build):
Expand All @@ -50,7 +33,6 @@ def process_header(vcf_line, build):
build = 'b36'
return build


def process_info(info_str):
"""Process "info" column of VCF and return dict"""
data = info_str.split(';')
Expand Down Expand Up @@ -118,7 +100,7 @@ def convert(vcf_input, options=None):
# Set up VCF input. Default is to assume a str generator.
vcf_data = vcf_input
if isinstance(vcf_input, str):
vcf_data = autozip_file_open(vcf_input, 'r')
vcf_data = autozip.file_open(vcf_input, 'r')

build = DEFAULT_BUILD
header_done = False
Expand Down Expand Up @@ -156,7 +138,7 @@ def convert_to_file(vcf_input, output_file):
"""Convert a VCF file and output GFF-formatted data to file"""
output = output_file # default assumes writable file object
if isinstance(output_file, str):
output = autozip_file_open(output_file, 'w')
output = autozip.file_open(output_file, 'w')
conversion = convert(vcf_input) # set up generator
for line in conversion:
output.write(line + "\n")
Expand Down

0 comments on commit c26785f

Please sign in to comment.