forked from vipulsnk/captcha_breaker
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
vipul
committed
Apr 8, 2020
0 parents
commit b8066c0
Showing
24 changed files
with
1,006 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Dependencies | ||
1. cython | ||
2. keras | ||
3. tensorflow | ||
4. sklearn | ||
|
||
Troubleshooting: | ||
1. If .so file doesn't work on your system then run the following command to create compatible .so file | ||
$ bash script.sh |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"folders": [ | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/RotatedData" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/reference" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit 2" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/trainSplit" | ||
}, | ||
{ | ||
"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train_with_edges" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import predict | ||
import time as tm | ||
import numpy as np | ||
import os | ||
# Perform longest common subsequence search on the (truncated) code and gold code | ||
def lcs( str1, str2, p, q ): | ||
if p == 0 or q == 0: | ||
return 0 | ||
elif str1[p-1] == str2[q-1]: | ||
return 1 + lcs( str1, str2, p - 1, q - 1 ) | ||
else: | ||
return max( lcs( str1, str2, p - 1, q ), lcs( str1, str2, p, q - 1 ) ) | ||
|
||
def getLCS( code, goldCode ): | ||
return lcs( code, goldCode, min( len( code ), 4 ), len( goldCode ) ) | ||
|
||
# If there are 100 test points and the prediction code returns 110 predictions | ||
# then we only consider the first 100 and discard the last 10 predictions. On | ||
# the other hand, if the code returns only 90 predictions, then we assume that | ||
# these were predictions on the first 90 test points and evaluate accordingly | ||
|
||
def getCodeLengthMatch( numChars, goldNumChars ): | ||
minLen = min( len( codes ), len( goldCodes ) ) | ||
return np.count_nonzero( numChars[0:minLen] == goldNumChars[0:minLen] ) | ||
|
||
def getCodeMatchScore( codes, goldCodes ): | ||
totScore = 0 | ||
for i in range( min( len( codes ), len( goldCodes ) ) ): | ||
totScore += getLCS( codes[i], goldCodes[i] ) / len( goldCodes[i] ) | ||
return totScore | ||
|
||
numTest = 2000 | ||
path = '../FinalTrain/' | ||
|
||
filepaths = [] | ||
# r=root, d=directories, f = files | ||
for r, d, f in os.walk(path): | ||
for file in f: | ||
if '.png' in file: | ||
filepaths.append(path + file) | ||
filepaths.sort() | ||
# print(filepaths) | ||
# filepaths = [ "test/image%d.png" % i for i in range( numTest ) ] | ||
file = open( "../FinalTrain/f1", "r" ) | ||
goldCodes = file.read().splitlines() | ||
file.close() | ||
goldNumChars = np.array( [ len( goldCodes[i] ) for i in range( len( goldCodes ) ) ] ) | ||
|
||
# Get recommendations from predict.py and time the thing | ||
tic = tm.perf_counter() | ||
(numChars, codes) = predict.decaptcha( filepaths ) | ||
toc = tm.perf_counter() | ||
|
||
print( "Total time taken is %.6f seconds " % (toc - tic) ) | ||
print( "Fraction of code lengths that match is %.6f" % (getCodeLengthMatch( numChars, goldNumChars ) / numTest) ) | ||
print( "Code match score is %.6f" % (getCodeMatchScore( codes, goldCodes ) / numTest) ) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from PIL import Image | ||
from PIL import ImageFilter | ||
import numpy as np | ||
import cv2 | ||
from keras.models import load_model | ||
loaded_model=load_model("model.h5") | ||
|
||
|
||
def loop(filenames): | ||
errs=0 | ||
numCharsList=[] | ||
codes = [] | ||
for file in filenames: | ||
|
||
im = Image.open(file) | ||
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15)) | ||
grey = im.convert('L') | ||
width,height = im.size | ||
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())]) | ||
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR) | ||
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | ||
|
||
ret, thresh = cv2.threshold(gray_img, 200, 255, 0) | ||
img_dilation=thresh | ||
contours, hierarchy = cv2.findContours(img_dilation, | ||
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||
l = [] | ||
for cnt in contours: | ||
x, y, w, h = cv2.boundingRect(cnt) | ||
if ((w < 120 and h > 60 and w > 20 and h < 120)): | ||
l.append((x, y, w, h)) | ||
fl = [] | ||
for item1 in l: | ||
flag = 0 | ||
for item2 in l: | ||
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))): | ||
flag = 1 | ||
break | ||
if flag == 0: | ||
fl.append(item1) | ||
(x, y, w, h) = item1 | ||
fl.sort() | ||
i = 0 | ||
code = '' | ||
try: | ||
for cnt in fl: | ||
(x, y, w, h) = cnt | ||
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2] | ||
resized_image = cv2.resize(new_img, (int(100), int(100))) | ||
gray = resized_image | ||
|
||
gray = cv2.resize(255-gray, (100, 100)) | ||
flatten = gray.flatten() / 255.0 | ||
|
||
pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1)) | ||
code=code+chr(pred.argmax()+65) | ||
i = i+1 | ||
except: | ||
errs+=1 | ||
i=4 | ||
code = "AAAA" | ||
numCharsList.append(i) | ||
codes.append(code) | ||
numChars = np.array(numCharsList) | ||
return (numChars, codes) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import numpy as np | ||
import cv2 | ||
from PIL import Image | ||
from PIL import ImageFilter | ||
|
||
import pyximport | ||
pyximport.install() | ||
import loop | ||
from keras.models import load_model | ||
|
||
# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR | ||
|
||
# INPUT CONVENTION | ||
# filenames: a list of strings containing filenames of images | ||
|
||
# OUTPUT CONVENTION | ||
# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings. | ||
# Make sure that the length of the array and the list is the same as the number of filenames that | ||
# were given. The evaluation code may give unexpected results if this convention is not followed. | ||
|
||
|
||
def decaptcha(filenames): | ||
print("In Progress...") | ||
numChars,codes=loop.loop(filenames) | ||
return (numChars, codes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from distutils.core import setup | ||
from Cython.Build import cythonize | ||
|
||
setup( | ||
ext_modules = cythonize("loop.pyx") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Dependencies | ||
1. cython | ||
2. keras | ||
3. tensorflow | ||
4. sklearn | ||
|
||
Troubleshooting: | ||
1. If .so file doesn't work on your system then run the following command to create compatible .so file | ||
We are using python3 in script.sh, if you are using python then change "python3 setup.py build_ext --inplace" to "python setup.py build_ext --inplace" | ||
$ bash script.sh |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from PIL import Image | ||
from PIL import ImageFilter | ||
import numpy as np | ||
import cv2 | ||
from keras.models import load_model | ||
loaded_model=load_model("model.h5") | ||
|
||
|
||
def loop(filenames): | ||
errs=0 | ||
numCharsList=[] | ||
codes = [] | ||
for file in filenames: | ||
|
||
im = Image.open(file) | ||
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15)) | ||
grey = im.convert('L') | ||
width,height = im.size | ||
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())]) | ||
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR) | ||
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | ||
|
||
ret, thresh = cv2.threshold(gray_img, 200, 255, 0) | ||
img_dilation=thresh | ||
contours, hierarchy = cv2.findContours(img_dilation, | ||
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||
l = [] | ||
for cnt in contours: | ||
x, y, w, h = cv2.boundingRect(cnt) | ||
if ((w < 120 and h > 60 and w > 20 and h < 120)): | ||
l.append((x, y, w, h)) | ||
fl = [] | ||
for item1 in l: | ||
flag = 0 | ||
for item2 in l: | ||
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))): | ||
flag = 1 | ||
break | ||
if flag == 0: | ||
fl.append(item1) | ||
(x, y, w, h) = item1 | ||
fl.sort() | ||
i = 0 | ||
code = '' | ||
try: | ||
for cnt in fl: | ||
(x, y, w, h) = cnt | ||
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2] | ||
resized_image = cv2.resize(new_img, (int(100), int(100))) | ||
gray = resized_image | ||
|
||
gray = cv2.resize(255-gray, (100, 100)) | ||
flatten = gray.flatten() / 255.0 | ||
|
||
pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1)) | ||
code=code+chr(pred.argmax()+65) | ||
i = i+1 | ||
except: | ||
errs+=1 | ||
i=4 | ||
code = "AAAA" | ||
numCharsList.append(i) | ||
codes.append(code) | ||
numChars = np.array(numCharsList) | ||
return (numChars, codes) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import numpy as np | ||
import cv2 | ||
from PIL import Image | ||
from PIL import ImageFilter | ||
|
||
import pyximport | ||
pyximport.install() | ||
import loop | ||
from keras.models import load_model | ||
|
||
# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR | ||
|
||
# INPUT CONVENTION | ||
# filenames: a list of strings containing filenames of images | ||
|
||
# OUTPUT CONVENTION | ||
# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings. | ||
# Make sure that the length of the array and the list is the same as the number of filenames that | ||
# were given. The evaluation code may give unexpected results if this convention is not followed. | ||
|
||
|
||
def decaptcha(filenames): | ||
print("In Progress...") | ||
numChars,codes=loop.loop(filenames) | ||
return (numChars, codes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
|
||
|
||
python3 setup.py build_ext --inplace | ||
rm -rf build | ||
rm loop.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from distutils.core import setup | ||
from Cython.Build import cythonize | ||
|
||
setup( | ||
ext_modules = cythonize("loop.pyx") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from PIL import Image | ||
from PIL import ImageFilter | ||
import numpy as np | ||
import cv2 | ||
from keras.models import load_model | ||
loaded_model=load_model("model.h5") | ||
|
||
|
||
def loop(filenames): | ||
errs=0 | ||
numCharsList=[] | ||
codes = [] | ||
for file in filenames: | ||
|
||
im = Image.open(file) | ||
white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15)) | ||
grey = im.convert('L') | ||
width,height = im.size | ||
grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())]) | ||
img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR) | ||
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | ||
|
||
ret, thresh = cv2.threshold(gray_img, 200, 255, 0) | ||
img_dilation=thresh | ||
contours, hierarchy = cv2.findContours(img_dilation, | ||
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||
l = [] | ||
for cnt in contours: | ||
x, y, w, h = cv2.boundingRect(cnt) | ||
if ((w < 120 and h > 60 and w > 20 and h < 120)): | ||
l.append((x, y, w, h)) | ||
fl = [] | ||
for item1 in l: | ||
flag = 0 | ||
for item2 in l: | ||
if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))): | ||
flag = 1 | ||
break | ||
if flag == 0: | ||
fl.append(item1) | ||
(x, y, w, h) = item1 | ||
fl.sort() | ||
i = 0 | ||
code = '' | ||
try: | ||
for cnt in fl: | ||
(x, y, w, h) = cnt | ||
new_img = img_dilation[y-2:y+h+2, x-2: x+w+2] | ||
resized_image = cv2.resize(new_img, (int(100), int(100))) | ||
gray = resized_image | ||
|
||
gray = cv2.resize(255-gray, (100, 100)) | ||
flatten = gray.flatten() / 255.0 | ||
|
||
pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1)) | ||
code=code+chr(pred.argmax()+65) | ||
i = i+1 | ||
except: | ||
errs+=1 | ||
i=4 | ||
code = "AAAA" | ||
numCharsList.append(i) | ||
codes.append(code) | ||
numChars = np.array(numCharsList) | ||
return (numChars, codes) |
Binary file not shown.
Oops, something went wrong.