initial commit

harshit987 · Apr 8, 2020 · b8066c0 · b8066c0
commit b8066c0
Show file tree

Hide file tree

Showing 24 changed files with 1,006 additions and 0 deletions.
diff --git a/actual_submit/ReadMe.txt b/actual_submit/ReadMe.txt
@@ -0,0 +1,9 @@
+Dependencies
+1. cython
+2. keras
+3. tensorflow
+4. sklearn
+
+Troubleshooting:
+1. If .so file doesn't work on your system then run the following command to create compatible .so file
+$ bash script.sh
diff --git a/actual_submit/__pycache__/predict.cpython-36.pyc b/actual_submit/__pycache__/predict.cpython-36.pyc
diff --git a/actual_submit/assn3.zip b/actual_submit/assn3.zip
diff --git a/actual_submit/assn3_mlcs771.code-workspace b/actual_submit/assn3_mlcs771.code-workspace
@@ -0,0 +1,25 @@
+{
+	"folders": [
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/RotatedData"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/reference"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/sample_submit 2"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/trainSplit"
+		},
+		{
+			"path": "/Users/vipul/Desktop/.test/studies/IITK_Acads/5th_sem/cs771/assn3/train_with_edges"
+		}
+	]
+}
diff --git a/actual_submit/eval.py b/actual_submit/eval.py
@@ -0,0 +1,56 @@
+import predict
+import time as tm
+import numpy as np
+import os
+# Perform longest common subsequence search on the (truncated) code and gold code
+def lcs( str1, str2, p, q ):
+    if p == 0 or q == 0:
+        return 0
+    elif str1[p-1] == str2[q-1]:
+        return 1 + lcs( str1, str2, p - 1, q - 1 )
+    else:
+        return max( lcs( str1, str2, p - 1, q ), lcs( str1, str2, p, q - 1 ) )
+
+def getLCS( code, goldCode ):
+    return lcs( code, goldCode, min( len( code ), 4 ), len( goldCode ) )
+
+# If there are 100 test points and the prediction code returns 110 predictions
+# then we only consider the first 100 and discard the last 10 predictions. On
+# the other hand, if the code returns only 90 predictions, then we assume that
+# these were predictions on the first 90 test points and evaluate accordingly
+
+def getCodeLengthMatch( numChars, goldNumChars ):
+    minLen = min( len( codes ), len( goldCodes ) )
+    return np.count_nonzero( numChars[0:minLen] == goldNumChars[0:minLen] )
+
+def getCodeMatchScore( codes, goldCodes ):
+    totScore = 0
+    for i in range( min( len( codes ), len( goldCodes ) ) ):
+        totScore += getLCS( codes[i], goldCodes[i] ) / len( goldCodes[i] )
+    return totScore
+
+numTest = 2000
+path = '../FinalTrain/'
+
+filepaths = []
+# r=root, d=directories, f = files
+for r, d, f in os.walk(path):
+    for file in f:
+        if '.png' in file:
+            filepaths.append(path + file)
+filepaths.sort()
+# print(filepaths)
+# filepaths = [ "test/image%d.png" % i for i in range( numTest ) ]
+file = open( "../FinalTrain/f1", "r" )
+goldCodes = file.read().splitlines()
+file.close()
+goldNumChars = np.array( [ len( goldCodes[i] ) for i in range( len( goldCodes ) ) ] )
+
+# Get recommendations from predict.py and time the thing
+tic = tm.perf_counter()
+(numChars, codes) = predict.decaptcha( filepaths )
+toc = tm.perf_counter()
+
+print( "Total time taken is %.6f seconds " % (toc - tic) )
+print( "Fraction of code lengths that match is %.6f" % (getCodeLengthMatch( numChars, goldNumChars ) / numTest)  )
+print( "Code match score is %.6f" % (getCodeMatchScore( codes, goldCodes ) / numTest) )
diff --git a/actual_submit/loop.cpython-36m-darwin.so b/actual_submit/loop.cpython-36m-darwin.so
diff --git a/actual_submit/loop.pyx b/actual_submit/loop.pyx
@@ -0,0 +1,65 @@
+from PIL import Image
+from PIL import ImageFilter
+import numpy as np
+import cv2
+from keras.models import load_model
+loaded_model=load_model("model.h5")
+
+
+def loop(filenames):
+  errs=0
+  numCharsList=[]
+  codes = []
+  for file in filenames:
+
+      im = Image.open(file)
+      white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
+      grey = im.convert('L')
+      width,height = im.size
+      grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
+      img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
+      gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+      ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
+      img_dilation=thresh
+      contours, hierarchy = cv2.findContours(img_dilation,
+                                          cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+      l = []
+      for cnt in contours:
+          x, y, w, h = cv2.boundingRect(cnt)
+          if ((w < 120 and h > 60 and w > 20 and h < 120)):
+              l.append((x, y, w, h))
+      fl = []
+      for item1 in l:
+          flag = 0
+          for item2 in l:
+              if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
+                  flag = 1
+                  break
+          if flag == 0:
+              fl.append(item1)
+              (x, y, w, h) = item1
+      fl.sort()
+      i = 0
+      code = ''
+      try:
+          for cnt in fl:
+              (x, y, w, h) = cnt
+              new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
+              resized_image = cv2.resize(new_img, (int(100), int(100)))
+              gray = resized_image
+
+              gray = cv2.resize(255-gray, (100, 100))
+              flatten = gray.flatten() / 255.0
+
+              pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
+              code=code+chr(pred.argmax()+65)
+              i = i+1
+      except:
+          errs+=1
+          i=4
+          code = "AAAA"
+      numCharsList.append(i)
+      codes.append(code)
+  numChars = np.array(numCharsList) 
+  return (numChars, codes)
diff --git a/actual_submit/model.h5 b/actual_submit/model.h5
diff --git a/actual_submit/predict.py b/actual_submit/predict.py
@@ -0,0 +1,25 @@
+import numpy as np
+import cv2
+from PIL import Image
+from PIL import ImageFilter
+
+import pyximport
+pyximport.install()
+import loop
+from keras.models import load_model
+
+# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR
+
+# INPUT CONVENTION
+# filenames: a list of strings containing filenames of images
+
+# OUTPUT CONVENTION
+# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings.
+# Make sure that the length of the array and the list is the same as the number of filenames that
+# were given. The evaluation code may give unexpected results if this convention is not followed.
+
+
+def decaptcha(filenames):
+    print("In Progress...")
+    numChars,codes=loop.loop(filenames)
+    return (numChars, codes)
diff --git a/actual_submit/setup.py b/actual_submit/setup.py
@@ -0,0 +1,6 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+
+setup(
+  ext_modules = cythonize("loop.pyx")
+)
diff --git a/actual_submit/toSubmit/ReadMe.txt b/actual_submit/toSubmit/ReadMe.txt
@@ -0,0 +1,10 @@
+Dependencies
+1. cython
+2. keras
+3. tensorflow
+4. sklearn
+
+Troubleshooting:
+1. If .so file doesn't work on your system then run the following command to create compatible .so file
+We are using python3 in script.sh, if you are using python then change "python3 setup.py build_ext --inplace" to "python setup.py build_ext --inplace"
+$ bash script.sh
diff --git a/actual_submit/toSubmit/loop.cpython-36m-darwin.so b/actual_submit/toSubmit/loop.cpython-36m-darwin.so
diff --git a/actual_submit/toSubmit/loop.pyx b/actual_submit/toSubmit/loop.pyx
@@ -0,0 +1,65 @@
+from PIL import Image
+from PIL import ImageFilter
+import numpy as np
+import cv2
+from keras.models import load_model
+loaded_model=load_model("model.h5")
+
+
+def loop(filenames):
+  errs=0
+  numCharsList=[]
+  codes = []
+  for file in filenames:
+
+      im = Image.open(file)
+      white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
+      grey = im.convert('L')
+      width,height = im.size
+      grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
+      img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
+      gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+      ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
+      img_dilation=thresh
+      contours, hierarchy = cv2.findContours(img_dilation,
+                                          cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+      l = []
+      for cnt in contours:
+          x, y, w, h = cv2.boundingRect(cnt)
+          if ((w < 120 and h > 60 and w > 20 and h < 120)):
+              l.append((x, y, w, h))
+      fl = []
+      for item1 in l:
+          flag = 0
+          for item2 in l:
+              if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
+                  flag = 1
+                  break
+          if flag == 0:
+              fl.append(item1)
+              (x, y, w, h) = item1
+      fl.sort()
+      i = 0
+      code = ''
+      try:
+          for cnt in fl:
+              (x, y, w, h) = cnt
+              new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
+              resized_image = cv2.resize(new_img, (int(100), int(100)))
+              gray = resized_image
+
+              gray = cv2.resize(255-gray, (100, 100))
+              flatten = gray.flatten() / 255.0
+
+              pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
+              code=code+chr(pred.argmax()+65)
+              i = i+1
+      except:
+          errs+=1
+          i=4
+          code = "AAAA"
+      numCharsList.append(i)
+      codes.append(code)
+  numChars = np.array(numCharsList) 
+  return (numChars, codes)
diff --git a/actual_submit/toSubmit/model.h5 b/actual_submit/toSubmit/model.h5
diff --git a/actual_submit/toSubmit/predict.py b/actual_submit/toSubmit/predict.py
@@ -0,0 +1,25 @@
+import numpy as np
+import cv2
+from PIL import Image
+from PIL import ImageFilter
+
+import pyximport
+pyximport.install()
+import loop
+from keras.models import load_model
+
+# DO NOT CHANGE THE NAME OF THIS METHOD OR ITS INPUT OUTPUT BEHAVIOR
+
+# INPUT CONVENTION
+# filenames: a list of strings containing filenames of images
+
+# OUTPUT CONVENTION
+# The method must return a numpy array (not numpy matrix or scipy matrix) and a list of strings.
+# Make sure that the length of the array and the list is the same as the number of filenames that
+# were given. The evaluation code may give unexpected results if this convention is not followed.
+
+
+def decaptcha(filenames):
+    print("In Progress...")
+    numChars,codes=loop.loop(filenames)
+    return (numChars, codes)
diff --git a/actual_submit/toSubmit/script.sh b/actual_submit/toSubmit/script.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+
+python3 setup.py build_ext --inplace
+rm -rf build
+rm loop.c
diff --git a/actual_submit/toSubmit/setup.py b/actual_submit/toSubmit/setup.py
@@ -0,0 +1,6 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+
+setup(
+  ext_modules = cythonize("loop.pyx")
+)
diff --git a/loop.pyx b/loop.pyx
@@ -0,0 +1,65 @@
+from PIL import Image
+from PIL import ImageFilter
+import numpy as np
+import cv2
+from keras.models import load_model
+loaded_model=load_model("model.h5")
+
+
+def loop(filenames):
+  errs=0
+  numCharsList=[]
+  codes = []
+  for file in filenames:
+
+      im = Image.open(file)
+      white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(15))
+      grey = im.convert('L')
+      width,height = im.size
+      grey.putdata([min(255, max(255 + x[0] - y[0], 255 + x[1] - y[1], 255 + x[2] - y[2])) for (x, y) in zip(im.getdata(), white.getdata())])
+      img=cv2.cvtColor(np.array(grey),cv2.COLOR_RGB2BGR)
+      gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+      ret, thresh = cv2.threshold(gray_img, 200, 255, 0)
+      img_dilation=thresh
+      contours, hierarchy = cv2.findContours(img_dilation,
+                                          cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+      l = []
+      for cnt in contours:
+          x, y, w, h = cv2.boundingRect(cnt)
+          if ((w < 120 and h > 60 and w > 20 and h < 120)):
+              l.append((x, y, w, h))
+      fl = []
+      for item1 in l:
+          flag = 0
+          for item2 in l:
+              if ((item1[0] > item2[0]) and ((item1[0] + item1[2]) < (item2[0] + item2[2])) and (item1[1] > item2[1]) and ((item1[1] + item1[3]) < (item2[1] + item2[3]))):
+                  flag = 1
+                  break
+          if flag == 0:
+              fl.append(item1)
+              (x, y, w, h) = item1
+      fl.sort()
+      i = 0
+      code = ''
+      try:
+          for cnt in fl:
+              (x, y, w, h) = cnt
+              new_img = img_dilation[y-2:y+h+2, x-2: x+w+2]
+              resized_image = cv2.resize(new_img, (int(100), int(100)))
+              gray = resized_image
+
+              gray = cv2.resize(255-gray, (100, 100))
+              flatten = gray.flatten() / 255.0
+
+              pred = loaded_model.predict(flatten.reshape(1, 100, 100, 1))
+              code=code+chr(pred.argmax()+65)
+              i = i+1
+      except:
+          errs+=1
+          i=4
+          code = "AAAA"
+      numCharsList.append(i)
+      codes.append(code)
+  numChars = np.array(numCharsList) 
+  return (numChars, codes)
diff --git a/model.h5 b/model.h5