-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
scottdet
authored
Jun 11, 2016
1 parent
29ee6ec
commit 4b7306d
Showing
8 changed files
with
137 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import random | ||
|
||
|
||
from Read import Read | ||
|
||
|
||
# generate some random strings consisting of 1's and 0's | ||
class DataGenerator(object): | ||
def __init__(self, size): | ||
self.size = size | ||
self.H1 = [] | ||
self.H2 = [] | ||
self.reads = [] | ||
for i in range(0, size / 20): | ||
before = random.getrandbits(20) # get 25 random integers | ||
for j in range(0, 20): | ||
after = (before & (1 << j)) >> j # change integer values to 0's and 1's | ||
self.H1.append(after) | ||
self.H2.append(~after & 1) # H2 is complimentary to H1 | ||
|
||
def create_string(self, min_size, max_size, min_distance, max_distance, error, overlap_chance): | ||
index = 0 | ||
while index < self.size: | ||
read_size = random.randint(min_size, max_size) | ||
if random.random() < 0.5: | ||
data = self.H1[index:index + read_size] | ||
else: | ||
data = self.H2[index:index + read_size] | ||
if len(data) > 0: | ||
read = Read(index, data, error) # error will be 0 for now | ||
self.reads.append(read) | ||
if random.random() > overlap_chance: | ||
index += min(random.randint(min_distance, max_distance), read_size - 1) | ||
|
||
# make it so we can read what the original haplotype looked like not just 0x00... computer stuff | ||
def __repr__(self): | ||
result = "" | ||
for H in self.H1: | ||
result += str(H) | ||
result += "\n" | ||
for H in self.H2: | ||
result += str(H) | ||
return result |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
class EasyAssembly: | ||
|
||
@classmethod | ||
def assemble(cls, reads, size): | ||
hap = [0] * size | ||
hap_index = 0 | ||
read_index = 0 | ||
while hap_index < size: | ||
while read_index < len(reads) - 1 and reads[read_index + 1].start < hap_index: | ||
read_index += 1 | ||
read = reads[read_index] | ||
|
||
if hap_index > 0 and read.data[0] != hap[read.start]: | ||
hap[hap_index:read.start + read.size] = list(read.flipped[hap_index - read.start:]) | ||
else: | ||
hap[hap_index:read.start + read.size] = list(read.data[hap_index - read.start:]) | ||
|
||
hap_index += read.size - (hap_index - read.start) | ||
|
||
return hap |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import random | ||
|
||
|
||
# create readable string | ||
class Read(object): | ||
def __init__(self, start, data, error): | ||
self.start = start | ||
self.data = [H if error == 0 or random.random() > error else random.choice([0, 1]) for H in data] | ||
self.size = len(self.data) | ||
self.flipped = [~H & 1 for H in data] | ||
|
||
# repr will return a string containing a printable representation of an object | ||
def __repr__(self): | ||
result = "" | ||
for _ in range(0, self.start): | ||
result += " " | ||
for H in self.data: | ||
result += str(H) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# necessary for importing files to main # |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import timeit | ||
|
||
from DataGen import DataGenerator | ||
from EasyAssembly import EasyAssembly | ||
|
||
begin = timeit.default_timer() | ||
|
||
|
||
def main(): | ||
haplotypes = True | ||
size = 100 | ||
info = DataGenerator(size) | ||
|
||
if haplotypes: | ||
print "\nOriginal Haplotypes:" | ||
print info | ||
min_size = 10 | ||
max_size = 20 | ||
min_distance = 0 | ||
max_distance = 5 | ||
error = 0 # currently working on easy algorithm | ||
overlap_chance = 0.5 # 50/50 chance | ||
info.create_string(min_size=min_size, max_size=max_size, min_distance=min_distance, max_distance=max_distance, | ||
error=error, overlap_chance=overlap_chance) | ||
|
||
def print_hap(haplotype, flipped): | ||
result = "" | ||
if not flipped: | ||
for H in haplotype: | ||
result += str(H) | ||
else: | ||
for H in haplotype: | ||
if H == 0: | ||
result += "1" | ||
else: | ||
result += "0" | ||
print result | ||
|
||
print "\n- Easy Algorithm -\n" | ||
assembled = EasyAssembly.assemble(info.reads, size) | ||
if haplotypes: | ||
print "Assembled haplotypes:" | ||
print_hap(assembled, flipped=False) | ||
print_hap(assembled, flipped=True) | ||
print "\n- Accuracy: 100% -" | ||
|
||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
end = timeit.default_timer() | ||
|
||
time = end - begin | ||
print "- Runtime:", time, "-" |