Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
vipul committed Apr 8, 2020
0 parents commit b8066c0
Show file tree
Hide file tree
Showing 24 changed files with 1,006 additions and 0 deletions.
9 changes: 9 additions & 0 deletions actual_submit/ReadMe.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Dependencies
1. cython
2. keras
3. tensorflow
4. sklearn

Troubleshooting:
1. If .so file doesn't work on your system then run the following command to create compatible .so file
$ bash script.sh
Binary file added actual_submit/__pycache__/predict.cpython-36.pyc
Binary file not shown.
Binary file added actual_submit/assn3.zip
Binary file not shown.
25 changes: 25 additions & 0 deletions actual_submit/assn3_mlcs771.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"folders": [
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/RotatedData"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/reference"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit 2"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/trainSplit"
},
{
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train_with_edges"
}
]
}
56 changes: 56 additions & 0 deletions actual_submit/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import predict
import time as tm
import numpy as np
import os
# Perform longest common subsequence search on the (truncated) code and gold code
def lcs( str1, str2, p, q ):
if p == 0 or q == 0:
return 0
elif str1[p-1] == str2[q-1]:
return 1 + lcs( str1, str2, p - 1, q - 1 )
else:
return max( lcs( str1, str2, p - 1, q ), lcs( str1, str2, p, q - 1 ) )

def getLCS( code, goldCode ):
return lcs( code, goldCode, min( len( code ), 4 ), len( goldCode ) )

# If there are 100 test points and the prediction code returns 110 predictions
# then we only consider the first 100 and discard the last 10 predictions. On
# the other hand, if the code returns only 90 predictions, then we assume that
# these were predictions on the first 90 test points and evaluate accordingly

def getCodeLengthMatch( numChars, goldNumChars ):
minLen = min( len( codes ), len( goldCodes ) )
return np.count_nonzero( numChars[0:minLen] == goldNumChars[0:minLen] )

def getCodeMatchScore( codes, goldCodes ):
totScore = 0
for i in range( min( len( codes ), len( goldCodes ) ) ):
totScore += getLCS( codes[i], goldCodes[i] ) / len( goldCodes[i] )
return totScore

numTest = 2000
path = '../FinalTrain/'

filepaths = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if '.png' in file:
filepaths.append(path + file)
filepaths.sort()
# print(filepaths)
# filepaths = [ "test/image%d.png" % i for i in range( numTest ) ]
file = open( "../FinalTrain/f1", "r" )
goldCodes = file.read().splitlines()
file.close()
goldNumChars = np.array( [ len( goldCodes[i] ) for i in range( len( goldCodes ) ) ] )

# Get recommendations from predict.py and time the thing
tic = tm.perf_counter()
(numChars, codes) = predict.decaptcha( filepaths )
toc = tm.perf_counter()

print( "Total time taken is %.6f seconds " % (toc - tic) )
print( "Fraction of code lengths that match is %.6f" % (getCodeLengthMatch( numChars, goldNumChars ) / numTest) )
print( "Code match score is %.6f" % (getCodeMatchScore( codes, goldCodes ) / numTest) )
Binary file added actual_submit/loop.cpython-36m-darwin.so
Binary file not shown.
65 changes: 65 additions & 0 deletions actual_submit/loop.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from PIL import Image
from PIL import ImageFilter
import numpy as np
import cv2
from keras.models import load_model
loaded_model=load_model("model.h5")


def loop(filenames):
errs=0
numCharsList=[]
codes = []
for file in filenames:

im = Image.open(file)
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
grey = im.convert('L')
width,height = im.size
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
img_dilation=thresh
contours, hierarchy = cv2.findContours(img_dilation,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
l = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if ((w < 120 and h > 60 and w > 20 and h < 120)):
l.append((x, y, w, h))
fl = []
for item1 in l:
flag = 0
for item2 in l:
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
flag = 1
break
if flag == 0:
fl.append(item1)
(x, y, w, h) = item1
fl.sort()
i = 0
code = ''
try:
for cnt in fl:
(x, y, w, h) = cnt
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
resized_image = cv2.resize(new_img, (int(100), int(100)))
gray = resized_image

gray = cv2.resize(255-gray, (100, 100))
flatten = gray.flatten() / 255.0

pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
code=code+chr(pred.argmax()+65)
i = i+1
except:
errs+=1
i=4
code = "AAAA"
numCharsList.append(i)
codes.append(code)
numChars = np.array(numCharsList)
return (numChars, codes)
Binary file added actual_submit/model.h5
Binary file not shown.
25 changes: 25 additions & 0 deletions actual_submit/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
import cv2
from PIL import Image
from PIL import ImageFilter

import pyximport
pyximport.install()
import loop
from keras.models import load_model

# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR

# INPUT CONVENTION
# filenames: a list of strings containing filenames of images

# OUTPUT CONVENTION
# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings.
# Make sure that the length of the array and the list is the same as the number of filenames that
# were given. The evaluation code may give unexpected results if this convention is not followed.


def decaptcha(filenames):
print("In Progress...")
numChars,codes=loop.loop(filenames)
return (numChars, codes)
6 changes: 6 additions & 0 deletions actual_submit/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from distutils.core import setup
from Cython.Build import cythonize

setup(
ext_modules = cythonize("loop.pyx")
)
10 changes: 10 additions & 0 deletions actual_submit/toSubmit/ReadMe.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Dependencies
1. cython
2. keras
3. tensorflow
4. sklearn

Troubleshooting:
1. If .so file doesn't work on your system then run the following command to create compatible .so file
We are using python3 in script.sh, if you are using python then change "python3 setup.py build_ext --inplace" to "python setup.py build_ext --inplace"
$ bash script.sh
Binary file added actual_submit/toSubmit/loop.cpython-36m-darwin.so
Binary file not shown.
65 changes: 65 additions & 0 deletions actual_submit/toSubmit/loop.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from PIL import Image
from PIL import ImageFilter
import numpy as np
import cv2
from keras.models import load_model
loaded_model=load_model("model.h5")


def loop(filenames):
errs=0
numCharsList=[]
codes = []
for file in filenames:

im = Image.open(file)
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
grey = im.convert('L')
width,height = im.size
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
img_dilation=thresh
contours, hierarchy = cv2.findContours(img_dilation,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
l = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if ((w < 120 and h > 60 and w > 20 and h < 120)):
l.append((x, y, w, h))
fl = []
for item1 in l:
flag = 0
for item2 in l:
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
flag = 1
break
if flag == 0:
fl.append(item1)
(x, y, w, h) = item1
fl.sort()
i = 0
code = ''
try:
for cnt in fl:
(x, y, w, h) = cnt
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
resized_image = cv2.resize(new_img, (int(100), int(100)))
gray = resized_image

gray = cv2.resize(255-gray, (100, 100))
flatten = gray.flatten() / 255.0

pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
code=code+chr(pred.argmax()+65)
i = i+1
except:
errs+=1
i=4
code = "AAAA"
numCharsList.append(i)
codes.append(code)
numChars = np.array(numCharsList)
return (numChars, codes)
Binary file added actual_submit/toSubmit/model.h5
Binary file not shown.
25 changes: 25 additions & 0 deletions actual_submit/toSubmit/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
import cv2
from PIL import Image
from PIL import ImageFilter

import pyximport
pyximport.install()
import loop
from keras.models import load_model

# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR

# INPUT CONVENTION
# filenames: a list of strings containing filenames of images

# OUTPUT CONVENTION
# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings.
# Make sure that the length of the array and the list is the same as the number of filenames that
# were given. The evaluation code may give unexpected results if this convention is not followed.


def decaptcha(filenames):
print("In Progress...")
numChars,codes=loop.loop(filenames)
return (numChars, codes)
6 changes: 6 additions & 0 deletions actual_submit/toSubmit/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash


python3 setup.py build_ext --inplace
rm -rf build
rm loop.c
6 changes: 6 additions & 0 deletions actual_submit/toSubmit/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from distutils.core import setup
from Cython.Build import cythonize

setup(
ext_modules = cythonize("loop.pyx")
)
65 changes: 65 additions & 0 deletions loop.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from PIL import Image
from PIL import ImageFilter
import numpy as np
import cv2
from keras.models import load_model
loaded_model=load_model("model.h5")


def loop(filenames):
errs=0
numCharsList=[]
codes = []
for file in filenames:

im = Image.open(file)
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
grey = im.convert('L')
width,height = im.size
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
img_dilation=thresh
contours, hierarchy = cv2.findContours(img_dilation,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
l = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if ((w < 120 and h > 60 and w > 20 and h < 120)):
l.append((x, y, w, h))
fl = []
for item1 in l:
flag = 0
for item2 in l:
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
flag = 1
break
if flag == 0:
fl.append(item1)
(x, y, w, h) = item1
fl.sort()
i = 0
code = ''
try:
for cnt in fl:
(x, y, w, h) = cnt
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
resized_image = cv2.resize(new_img, (int(100), int(100)))
gray = resized_image

gray = cv2.resize(255-gray, (100, 100))
flatten = gray.flatten() / 255.0

pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
code=code+chr(pred.argmax()+65)
i = i+1
except:
errs+=1
i=4
code = "AAAA"
numCharsList.append(i)
codes.append(code)
numChars = np.array(numCharsList)
return (numChars, codes)
Binary file added model.h5
Binary file not shown.
Loading

0 comments on commit b8066c0

Please sign in to comment.