Skip to content
This repository has been archived by the owner on Mar 2, 2021. It is now read-only.

Commit

Permalink
Fix svtk vcf2bed issue failing to report complex SV
Browse files Browse the repository at this point in the history
  • Loading branch information
RCollins13 committed Oct 11, 2018
1 parent d6f2b3b commit be35ee0
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 20 deletions.
8 changes: 7 additions & 1 deletion svtk/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def vcf2bed(argv):
'reported in the order in which they are requested. '
'If ALL INFO fields are requested, they are reported '
'in the order in which they appear in the VCF header.')
parser.add_argument('--include-filters', action='store_true', default=False,
help='Include FILTER status in output, with the same ' +
'behavior an INFO field.')
parser.add_argument('--split-bnd', action='store_true', default=False,
help='Report two entries in bed file for each BND.')
parser.add_argument('--split-cpx', action='store_true', default=False,
Expand Down Expand Up @@ -66,6 +69,8 @@ def vcf2bed(argv):
header = header + vcf.header.info.keys()
else:
header = header + args.info
if args.include_filters:
header = header + ['FILTER']
header = '\t'.join(header)

include_unresolved = not args.no_unresolved
Expand All @@ -80,7 +85,8 @@ def vcf2bed(argv):
report_alt=True,
no_sort_coords=args.no_sort_coords,
simple_sinks=args.simple_sinks,
include_unresolved=include_unresolved)
include_unresolved=include_unresolved,
include_filters=args.include_filters)

if args.bed in 'stdout -'.split():
if args.header:
Expand Down
55 changes: 36 additions & 19 deletions svtk/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def vcf2bedtool(vcf, split_bnd=True, include_samples=False,
include_strands=True, split_cpx=False, include_infos=None,
annotate_ins=True, report_alt=False, svtypes=None,
no_sort_coords=False, simple_sinks=False,
include_unresolved=True):
include_unresolved=True, include_filters=False):
"""
Wrap VCF as a bedtool. Necessary as pybedtools does not support SV in VCF.
Expand Down Expand Up @@ -140,6 +140,8 @@ def vcf2bedtool(vcf, split_bnd=True, include_samples=False,
Treat all insertion sinks as single-bp windows
include_unresolved : bool, optional
Output unresolved variants
include_filters : bool, optional
Output FILTER field after INFO fields
Returns
-------
Expand All @@ -162,6 +164,8 @@ def vcf2bedtool(vcf, split_bnd=True, include_samples=False,
if 'ALL' in include_infos:
include_infos = vcf.header.info.keys()
entry += '\t{infos}'
if include_filters:
entry += '\t{filters}'
entry += '\n'

def _format_info(info):
Expand Down Expand Up @@ -219,6 +223,9 @@ def _converter():
# reformat for tabular output
infos = [_format_info(v) for v in infos]
infos = '\t'.join(infos)
if include_filters:
filters = [f for f in record.filter]
filters = '\t'.join(filters)

if record.info.get('SVTYPE', None) == 'BND':
# First end of breakpoint
Expand Down Expand Up @@ -259,35 +266,45 @@ def _converter():
end = record.stop + 1
yield entry.format(**locals())

#Deconstruct complex intervals, if optioned
elif 'CPX_INTERVALS' in record.info and split_cpx:
# If complex, all constituent intervals are in CPX_INTERVALS
for interval in record.info['CPX_INTERVALS']:
svtype, region = interval.split('_')
chrom, coords = region.split(':')
start, end = coords.split('-')
yield entry.format(**locals())

elif (record.info.get('SVTYPE', None) == 'CPX' and
'CPX_TYPE' in record.info.keys()):
if (record.info.get('CPX_TYPE', None) in cpx_ins_classes):
if annotate_ins:
svtype = 'DEL'
yield entry.format(**locals())

if split_cpx:
if 'dDUP' in record.info.get('CPX_TYPE', None):
svtype = 'DUP'
else:
svtype = 'INS'
source = record.info.get('SOURCE')
region = source.split('_')[1]
chrom, coords = region.split(':')
start, end = coords.split('-')
yield entry.format(**locals())
#If complex insertion, return insertion point as 1bp DEL
if record.info.get('CPX_TYPE', None) in cpx_ins_classes:
svtype = 'DEL'
chrom = record.chrom
start = record.pos
end = start + 1
entry.format(**locals())

# elif (record.info.get('SVTYPE', None) == 'CPX' and
# 'CPX_TYPE' in record.info.keys()):
# if (record.info.get('CPX_TYPE', None) in cpx_ins_classes):
# if annotate_ins:
# svtype = 'DEL'
# yield entry.format(**locals())

# if split_cpx:
# if 'dDUP' in record.info.get('CPX_TYPE', None):
# svtype = 'DUP'
# else:
# svtype = 'INS'
# source = record.info.get('SOURCE')
# region = source.split('_')[1]
# chrom, coords = region.split(':')
# start, end = coords.split('-')
# yield entry.format(**locals())

else:
if not no_sort_coords:
start, end = sorted([start, end])
if start == end:
end += 1
yield entry.format(**locals())

return pbt.BedTool(_converter()).saveas()
Expand Down

0 comments on commit be35ee0

Please sign in to comment.