Skip to content

Commit

Permalink
Added support for primer1/primer2 specification in manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
martinaryee committed Apr 5, 2023
1 parent 994e5ae commit 19b98ab
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
10 changes: 9 additions & 1 deletion guideseq/guideseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def parseManifest(self, manifest_path):

with open(manifest_path, 'r') as f:
manifest_data = yaml.safe_load(f)

# Set default tag/primer sequences if not specified
if not "primer1" in manifest_data:
manifest_data['primer1'] = 'TTGAGTTGTCATATGTTAAT'
if not "primer2" in manifest_data:
manifest_data['primer2'] = 'ACATATGACAACTCAATTAA'

try:
# Validate manifest data
Expand All @@ -54,6 +60,8 @@ def parseManifest(self, manifest_path):
self.output_folder = manifest_data['output_folder']
self.undemultiplexed = manifest_data['undemultiplexed']
self.samples = manifest_data['samples']
self.primer1 = manifest_data['primer1']
self.primer2 = manifest_data['primer2']

except Exception as e:
logger.error('Incorrect or malformed manifest file. Please ensure your manifest contains all required fields.')
Expand Down Expand Up @@ -237,7 +245,7 @@ def identifyOfftargetSites(self):
self.identified[sample] = os.path.join(self.output_folder, 'identified', sample + '_identifiedOfftargets.txt')

identifyOfftargetSites.analyze(samfile, self.reference_genome, self.identified[sample], annotations,
self.window_size, self.max_score)
self.window_size, self.max_score, self.primer1, self.primer2)

logger.info('Finished identifying offtarget sites.')

Expand Down
14 changes: 8 additions & 6 deletions guideseq/identifyOfftargetSites.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def alignSequences(targetsite_sequence, window_sequence, max_score=7):
"""
annotation is in the format:
"""
def analyze(sam_filename, reference_genome, outfile, annotations, windowsize, max_score):
def analyze(sam_filename, reference_genome, outfile, annotations, windowsize, max_score, primer1, primer2):

output_folder = os.path.dirname(outfile)
if not os.path.exists(output_folder):
Expand All @@ -307,7 +307,7 @@ def analyze(sam_filename, reference_genome, outfile, annotations, windowsize, ma
if int(mapq) >= 50 and int(sam_flag) & 128 and not int(sam_flag) & 2048:
# Second read in pair
barcode, count = parseReadName(full_read_name)
primer = assignPrimerstoReads(read_sequence, sam_flag)
primer = assignPrimerstoReads(read_sequence, sam_flag, primer1, primer2)
if int(template_length) < 0: # Reverse read
read_position = int(position_of_mate) + abs(int(template_length)) - 1
strand = "-"
Expand Down Expand Up @@ -411,15 +411,15 @@ def py2max(myList):
out = [i for i in myList if i != "" ]
return max(out)

def assignPrimerstoReads(read_sequence, sam_flag):
def assignPrimerstoReads(read_sequence, sam_flag, primer1, primer2):
# Get 20-nucleotide sequence from beginning or end of sequence depending on orientation
if int(sam_flag) & 16:
readstart = reverseComplement(read_sequence[-20:])
else:
readstart = read_sequence[:20]
if readstart == "TTGAGTTGTCATATGTTAAT":
if readstart == primer1:
return "primer1"
elif readstart == "ACATATGACAACTCAATTAA":
elif readstart == primer2:
return "primer2"
else:
return "nomatch"
Expand Down Expand Up @@ -468,13 +468,15 @@ def main():
parser.add_argument('--outfile', help='File to output identified sites to.', required=True)
parser.add_argument('--window', help='Window around breakpoint to search for off-target', type=int, default=25)
parser.add_argument('--max_score', help='Score threshold', type=int, default=7)
parser.add_argument('--primer1', help='Primer 1 sequence', default="TTGAGTTGTCATATGTTAAT")
parser.add_argument('--primer2', help='Primer 2 sequence', default="ACATATGACAACTCAATTAA")
# parser.add_argument('--demo')
parser.add_argument('--target', default='')

args = parser.parse_args()

annotations = {'Description': 'test description', 'Targetsite': 'dummy targetsite', 'Sequence': args.target}
analyze(args.samfile[0], args.ref, args.outfile, annotations, args.window, args.max_score)
analyze(args.samfile[0], args.ref, args.outfile, annotations, args.window, args.max_score, primer1, primer2)


if __name__ == "__main__":
Expand Down

0 comments on commit 19b98ab

Please sign in to comment.