|
| 1 | + |
| 2 | + |
| 3 | +#' checkPrimer |
| 4 | +#' |
| 5 | +#' This tool checks the assumptions about the primers. |
| 6 | +#' |
| 7 | +#' @author Robin H. van der Weide, \email{[email protected]} |
| 8 | +#' @param fwdPrimer A character-string of the forward-primer used. |
| 9 | +#' @param revPrimer A character-string of the reverse-primer used. |
| 10 | +#' @param exp The tagMeppr-object of a sample: first run \code{\link{align}}. |
| 11 | +#' @param ITR Can take PiggyBac (default), SleepingBeauty, or a path to a 1000xN-padded ITR.fasta. |
| 12 | +#' @details |
| 13 | +#' |
| 14 | +#' |
| 15 | +#' The expected general layout for the ITR-sequence looks like this: |
| 16 | +#' |
| 17 | +#' \code{|---ITR---NNN...NNN---ITR---|} |
| 18 | +#' |
| 19 | +#' The primers are expected to be 5'-end for the reverse and 3' for the forward: |
| 20 | +#' |
| 21 | +#' \code{<rev} |
| 22 | +#' \code{|---ITR---NNN...NNN---ITR---|} |
| 23 | +#' \code{ fwd>} |
| 24 | +#' |
| 25 | +#' This tool checks these assumptions and sets the rev5_fwd3 flag to TRUE. |
| 26 | +#' |
| 27 | +#' @examples |
| 28 | +#' \dontrun{ |
| 29 | +#' |
| 30 | +#' C9 = newTagMeppr(F1 = 'clone9_FWD_R1.fq.gz', |
| 31 | +#' F2 = 'clone9_FWD_R2.fq.gz', |
| 32 | +#' R1 = 'clone9_REV_R1.fq.gz', |
| 33 | +#' R2 = 'clone9_REV_R2.fq.gz', |
| 34 | +#' name = "clone9", |
| 35 | +#' protocol = 'PiggyBac') |
| 36 | +#' |
| 37 | +#' checkPrimer(fwdPrimer = "CGTCAATTTTACGCAGACTATC", |
| 38 | +#' revPrimer = "GTACGTCACAATATGATTATCTTTCTAG", |
| 39 | +#' exp = C9, |
| 40 | +#' ITR = 'PiggyBac') |
| 41 | +#' |
| 42 | +#' } |
| 43 | +#' @return The experiment-object will be updated with the rev5_fwd3-flag, which |
| 44 | +#' will tell all downstream analyses if our assumptions are correct. |
| 45 | +#' |
| 46 | +#' @importFrom Biostrings reverseComplement DNAString readDNAStringSet letterFrequency vmatchPattern |
| 47 | +#' @export |
| 48 | +checkPrimer <- function(fwdPrimer, revPrimer, exp, ITR = 'PiggyBac'){ |
| 49 | + rev5_fwd3 = F |
| 50 | + |
| 51 | + if(exp$protocol != ITR){ |
| 52 | + stop('Protocol given in exp (', exp$protocol, |
| 53 | + ') is not the same as given as ITR (',ITR,').') |
| 54 | + } |
| 55 | + |
| 56 | + ############################################################# get revComplement |
| 57 | + fwdPrimerCompl = Biostrings::reverseComplement(Biostrings::DNAString(fwdPrimer)) |
| 58 | + revPrimerCompl = Biostrings::reverseComplement(Biostrings::DNAString(revPrimer)) |
| 59 | + |
| 60 | + ##################################################################### load ITR |
| 61 | + transposonSeq = NULL |
| 62 | + if(ITR == "PiggyBac"){ |
| 63 | + transposonSeq = tagMeppr::PiggyBacITRs |
| 64 | + } else if(ITR == "SleepingBeauty"){ |
| 65 | + transposonSeq = tagMeppr::SleepingBeautyITRs |
| 66 | + } else if(grepl(ITR, pattern = ".fa")){ |
| 67 | + # check if exists |
| 68 | + if(file.exists(ITR)){ |
| 69 | + transposonSeq = Biostrings::readDNAStringSet(filepath = ITR, use.names = T) |
| 70 | + # check if N-padded |
| 71 | + N1k = Biostrings::letterFrequency(transposonSeq, letters = "N") == 1000 |
| 72 | + if(!N1k){ |
| 73 | + stop('The file ', ITR, " has no padding of 1000 N's between the arms.") |
| 74 | + } |
| 75 | + } else { |
| 76 | + stop('The file ', ITR, ' does not exist.') |
| 77 | + } |
| 78 | + } else { |
| 79 | + stop('Please set ITR to either "PiggyBac", "SleepingBeauty", or as a path to a .fasta-file!') |
| 80 | + } |
| 81 | + |
| 82 | + ##################################################################### get arms |
| 83 | + NpadRange = Biostrings::vmatchPattern(transposonSeq, |
| 84 | + pattern = paste0(rep('N', 1e3), collapse = '')) |
| 85 | + |
| 86 | + ######################################################################### find |
| 87 | + |
| 88 | + hitF = Biostrings::vmatchPattern(transposonSeq,pattern = fwdPrimer)[1] |
| 89 | + hitR = Biostrings::vmatchPattern(transposonSeq,pattern = revPrimer)[1] |
| 90 | + |
| 91 | + if(length(hitF[[1]]) == 0){ |
| 92 | + hitF = Biostrings::vmatchPattern(transposonSeq,pattern = fwdPrimerCompl)[1] |
| 93 | + } |
| 94 | + |
| 95 | + if(length(hitR[[1]]) == 0){ |
| 96 | + hitR = Biostrings::vmatchPattern(transposonSeq,pattern = revPrimerCompl)[1] |
| 97 | + } |
| 98 | + |
| 99 | + |
| 100 | + ############################################################### check if found |
| 101 | + if(length(hitF[[1]]) == 0){ |
| 102 | + stop('No match between fwdPrimer (including revComplement) and the sequence.') |
| 103 | + } |
| 104 | + |
| 105 | + if(length(hitR[[1]]) == 0){ |
| 106 | + stop('No match between revPrimer (including revComplement) and the sequence.') |
| 107 | + } |
| 108 | + |
| 109 | + ############################################# check if they are on unique arms |
| 110 | + belowF = unlist(hitF[[1]] < NpadRange) |
| 111 | + belowR = unlist(hitR[[1]] < NpadRange) |
| 112 | + |
| 113 | + if(belowR == belowF){ |
| 114 | + if(belowF){ |
| 115 | + stop('Primers are both found on the first ITR!') |
| 116 | + } else { |
| 117 | + stop('Primers are both found on the second ITR!') |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + ####################### check if start(reverse primer) < start(forward primer) |
| 122 | + if(hitR[[1]] < hitF[[1]]){ |
| 123 | + rev5_fwd3 = T |
| 124 | + } else { |
| 125 | + # reverse is on second ITR, which is not what I expect |
| 126 | + rev5_fwd3 = F |
| 127 | + } |
| 128 | + |
| 129 | + exp$rev5_fwd3 = rev5_fwd3 |
| 130 | + |
| 131 | + ##################################################################### assigner |
| 132 | + tmp = exp |
| 133 | + # get arguments |
| 134 | + name <- sapply(match.call(expand.dots=TRUE)[-1], deparse) |
| 135 | + #find argument postion for exp |
| 136 | + AP = which(names(name) == 'exp') |
| 137 | + |
| 138 | + assign(name[AP], tmp, envir = parent.frame()) |
| 139 | + |
| 140 | + invisible(rev5_fwd3) |
| 141 | + |
| 142 | +} |
0 commit comments