Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
scottdet authored Jun 11, 2016
1 parent 29ee6ec commit 4b7306d
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 0 deletions.
43 changes: 43 additions & 0 deletions DataGen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import random


from Read import Read


# generate some random strings consisting of 1's and 0's
class DataGenerator(object):
def __init__(self, size):
self.size = size
self.H1 = []
self.H2 = []
self.reads = []
for i in range(0, size / 20):
before = random.getrandbits(20) # get 25 random integers
for j in range(0, 20):
after = (before & (1 << j)) >> j # change integer values to 0's and 1's
self.H1.append(after)
self.H2.append(~after & 1) # H2 is complimentary to H1

def create_string(self, min_size, max_size, min_distance, max_distance, error, overlap_chance):
index = 0
while index < self.size:
read_size = random.randint(min_size, max_size)
if random.random() < 0.5:
data = self.H1[index:index + read_size]
else:
data = self.H2[index:index + read_size]
if len(data) > 0:
read = Read(index, data, error) # error will be 0 for now
self.reads.append(read)
if random.random() > overlap_chance:
index += min(random.randint(min_distance, max_distance), read_size - 1)

# make it so we can read what the original haplotype looked like not just 0x00... computer stuff
def __repr__(self):
result = ""
for H in self.H1:
result += str(H)
result += "\n"
for H in self.H2:
result += str(H)
return result
Binary file added DataGen.pyc
Binary file not shown.
20 changes: 20 additions & 0 deletions EasyAssembly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
class EasyAssembly:

@classmethod
def assemble(cls, reads, size):
hap = [0] * size
hap_index = 0
read_index = 0
while hap_index < size:
while read_index < len(reads) - 1 and reads[read_index + 1].start < hap_index:
read_index += 1
read = reads[read_index]

if hap_index > 0 and read.data[0] != hap[read.start]:
hap[hap_index:read.start + read.size] = list(read.flipped[hap_index - read.start:])
else:
hap[hap_index:read.start + read.size] = list(read.data[hap_index - read.start:])

hap_index += read.size - (hap_index - read.start)

return hap
Binary file added EasyAssembly.pyc
Binary file not shown.
19 changes: 19 additions & 0 deletions Read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import random


# create readable string
class Read(object):
def __init__(self, start, data, error):
self.start = start
self.data = [H if error == 0 or random.random() > error else random.choice([0, 1]) for H in data]
self.size = len(self.data)
self.flipped = [~H & 1 for H in data]

# repr will return a string containing a printable representation of an object
def __repr__(self):
result = ""
for _ in range(0, self.start):
result += " "
for H in self.data:
result += str(H)
return result
Binary file added Read.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# necessary for importing files to main #
54 changes: 54 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import timeit

from DataGen import DataGenerator
from EasyAssembly import EasyAssembly

begin = timeit.default_timer()


def main():
haplotypes = True
size = 100
info = DataGenerator(size)

if haplotypes:
print "\nOriginal Haplotypes:"
print info
min_size = 10
max_size = 20
min_distance = 0
max_distance = 5
error = 0 # currently working on easy algorithm
overlap_chance = 0.5 # 50/50 chance
info.create_string(min_size=min_size, max_size=max_size, min_distance=min_distance, max_distance=max_distance,
error=error, overlap_chance=overlap_chance)

def print_hap(haplotype, flipped):
result = ""
if not flipped:
for H in haplotype:
result += str(H)
else:
for H in haplotype:
if H == 0:
result += "1"
else:
result += "0"
print result

print "\n- Easy Algorithm -\n"
assembled = EasyAssembly.assemble(info.reads, size)
if haplotypes:
print "Assembled haplotypes:"
print_hap(assembled, flipped=False)
print_hap(assembled, flipped=True)
print "\n- Accuracy: 100% -"


if __name__ == "__main__":
main()

end = timeit.default_timer()

time = end - begin
print "- Runtime:", time, "-"

0 comments on commit 4b7306d

Please sign in to comment.