Skip to content

Commit

Permalink
adjusted whitespace for easier copy paste
Browse files Browse the repository at this point in the history
  • Loading branch information
BenLangmead committed Aug 23, 2015
1 parent 0f028df commit 78d874b
Showing 1 changed file with 3 additions and 14 deletions.
17 changes: 3 additions & 14 deletions 2.03_ApproximateMatching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,15 @@
" \"\"\" Use Z algorithm (Gusfield theorem 1.4.1) to preprocess s \"\"\"\n",
" assert len(s) > 1\n",
" z = [len(s)] + [0] * (len(s)-1)\n",
"\n",
" # Initial comparison of s[1:] with prefix\n",
" for i in range(1, len(s)):\n",
" if s[i] == s[i-1]:\n",
" z[1] += 1\n",
" else:\n",
" break\n",
" \n",
" r, l = 0, 0\n",
" if z[1] > 0:\n",
" r, l = z[1], 1\n",
" \n",
" for k in range(2, len(s)):\n",
" assert z[k] == 0\n",
" if k > r:\n",
Expand Down Expand Up @@ -70,7 +67,6 @@
" i = len(p) - n[j]\n",
" if i < len(p):\n",
" lp[i] = j + 1\n",
"\n",
" return lp\n",
"\n",
"\n",
Expand Down Expand Up @@ -142,25 +138,22 @@
" def __init__(self, p, alphabet='ACGT'):\n",
" self.p = p\n",
" self.alphabet = alphabet\n",
"\n",
" # Create map from alphabet characters to integers\n",
" self.amap = {}\n",
" for i in range(len(self.alphabet)):\n",
" self.amap[self.alphabet[i]] = i\n",
"\n",
" # Make bad character rule table\n",
" self.bad_char = dense_bad_char_tab(p, self.amap)\n",
"\n",
" # Create good suffix rule table\n",
" _, self.big_l, self.small_l_prime = good_suffix_table(p)\n",
"\n",
" \n",
" def bad_character_rule(self, i, c):\n",
" \"\"\" Return # skips given by bad character rule at offset i \"\"\"\n",
" assert c in self.amap\n",
" ci = self.amap[c]\n",
" assert i > (self.bad_char[i][ci]-1)\n",
" return i - (self.bad_char[i][ci]-1)\n",
"\n",
" \n",
" def good_suffix_rule(self, i):\n",
" \"\"\" Given a mismatch at offset i, return amount to shift\n",
" as determined by (weak) good suffix rule. \"\"\"\n",
Expand All @@ -172,7 +165,7 @@
" if self.big_l[i] > 0:\n",
" return length - self.big_l[i]\n",
" return length - self.small_l_prime[i]\n",
"\n",
" \n",
" def match_skip(self):\n",
" \"\"\" Return amount to shift in case where P matches T \"\"\"\n",
" return len(self.small_l_prime) - self.small_l_prime[1]"
Expand Down Expand Up @@ -224,14 +217,11 @@
" end = min((i+1)*segment_length, len(p))\n",
" p_bm = BoyerMoore(p[start:end], alphabet='ACGT')\n",
" matches = boyer_moore(p[start:end], p_bm, t)\n",
" \n",
" # Extend matching segments to see if whole p matches\n",
" for m in matches:\n",
" if m < start or m-start+len(p) > len(t):\n",
" continue\n",
" \n",
" mismatches = 0\n",
" \n",
" for j in range(0, start):\n",
" if not p[j] == t[m-start+j]:\n",
" mismatches += 1\n",
Expand All @@ -242,7 +232,6 @@
" mismatches += 1\n",
" if mismatches > n:\n",
" break\n",
" \n",
" if mismatches <= n:\n",
" all_matches.add(m - start)\n",
" return list(all_matches)"
Expand Down

0 comments on commit 78d874b

Please sign in to comment.