OCR Python src

devhmac · Mar 24, 2024 · 80bbac0 · 80bbac0
1 parent 739dacf
commit 80bbac0
Show file tree

Hide file tree

Showing 24 changed files with 178 additions and 68 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 # Ignore files generated by the IDE
 .idea/
 .env
+node_modules/
+__pycache__/
+*/__pycache__/
 
 # Ignore compiled binaries
 *.exe

diff --git a/Procfile b/Procfile
@@ -0,0 +1 @@
+web: uvicorn main:app --host 0.0.0.0 --port $PORT
diff --git a/README.md b/README.md
@@ -16,3 +16,14 @@ date_of_birth: string (ISO 8601)
 license_number: NNNNNN-NNN
 sex: "M" | "F" | "X"
 ```
+
+
+## OCR API
+
+```
+curl -X 'POST' \
+  'http://127.0.0.1:8000/upload-image/' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: multipart/form-data' \
+  -F 'file=@/Users/habib/Documents/idify/pysrc/img/high_contrast.jpeg;type=image/jpeg'
+```
diff --git a/ocr/index.js b/ocr/index.js
@@ -0,0 +1,34 @@
+import { createWorker } from 'tesseract.js';
+
+(async () => {
+  const worker = await createWorker('eng');
+  const rectangle = [{
+    top: 44,
+    left: 180,
+    width: 150,
+    height: 100,
+  },
+  {
+    top: 185,
+    left: 176,
+    width: 175,
+    height: 75,
+  },{
+    top: 222,
+    left: 176,
+    width: 42,
+    height: 14,
+  }
+];
+  worker.setParameters({
+    tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 -',
+  });
+  for(let i = 0; i < rectangle.length; i++) {
+    const ret = await worker.recognize('img/high_contrast.jpeg', {
+      rectangle: rectangle[i]
+    }); 
+    console.log(ret.data.text);
+  }
+//   console.log(ret.data.text);
+  await worker.terminate();
+})();
diff --git a/pysrc/__pycache__/UserDetails.cpython-311.pyc b/pysrc/__pycache__/UserDetails.cpython-311.pyc
diff --git a/pysrc/hugging_face_ocr.py b/pysrc/hugging_face_ocr.py
@@ -0,0 +1,15 @@
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image
+
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+
+image = Image.open("/Users/habib/Documents/idify/pysrc/img/hubert_lin_id.jpeg").convert("RGB")
+
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+
+
+print(generated_text)
diff --git a/pysrc/img/high_contrast.jpeg b/pysrc/img/high_contrast.jpeg
diff --git a/pysrc/img/hubert_lin_id copy.jpeg b/pysrc/img/hubert_lin_id copy.jpeg
diff --git a/pysrc/img/hubert_lin_id.jpeg b/pysrc/img/hubert_lin_id.jpeg
diff --git a/pysrc/img/image.png b/pysrc/img/image.png
diff --git a/pysrc/img/image_info.png b/pysrc/img/image_info.png
diff --git a/pysrc/img/info_high_contrast.png b/pysrc/img/info_high_contrast.png
diff --git a/pysrc/img/info_part_2_high_contrast.png b/pysrc/img/info_part_2_high_contrast.png
diff --git a/pysrc/img/sample_health_card copy.jpeg b/pysrc/img/sample_health_card copy.jpeg
diff --git a/pysrc/img/sample_id_1 copy 2.png b/pysrc/img/sample_id_1 copy 2.png
diff --git a/pysrc/img/sample_id_1 copy.jpeg b/pysrc/img/sample_id_1 copy.jpeg
diff --git a/pysrc/img/sample_id_2 copy.webp b/pysrc/img/sample_id_2 copy.webp
diff --git a/pysrc/img/sample_id_card copy.jpeg b/pysrc/img/sample_id_card copy.jpeg
diff --git a/pysrc/img/sample_lin_id.jpeg b/pysrc/img/sample_lin_id.jpeg
diff --git a/pysrc/main.py b/pysrc/main.py
@@ -0,0 +1,34 @@
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse
+import io
+from PIL import Image
+import base64
+from openai_helper import extract_user_details
+
+app = FastAPI()
+
+@app.post("/upload-image/")
+async def create_upload_file(file: UploadFile = File(...)):
+    try:
+        # Read the file content
+        file_content = await file.read()
+
+        # Optionally, you might want to verify if the uploaded file is a valid image.
+        # You can do this by trying to open it with PIL.
+        # Resetting the file pointer is necessary here because 'file.read()' above would have exhausted the stream.
+        try:
+            Image.open(io.BytesIO(file_content)).verify()
+        except Exception:
+            return JSONResponse(content={"message": "Invalid image format!"}, status_code=400)
+
+        # Encode the file content to base64
+        base64image = base64.b64encode(file_content).decode('utf-8')
+
+        # Pass the base64 string to your function
+        response = extract_user_details(base64image)
+
+        # Return a response including the data from 'extract_user_details'
+        return JSONResponse(content={"message": "Image received and processed!", "data": response}, status_code=200)
+
+    except Exception as e:
+        return JSONResponse(content={"message": str(e)}, status_code=500)
diff --git a/pysrc/main_openai.py b/pysrc/main_openai.py
diff --git a/pysrc/openai_helper.py b/pysrc/openai_helper.py
@@ -0,0 +1,67 @@
+import base64
+from openai import OpenAI
+import instructor
+import requests
+from pydantic.main import BaseModel
+from load_env import openai_api_key
+from UserDetails import UserDetail
+
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+
+def extract_user_details(base64_image):
+  # image_path = "/Users/habib/Documents/idify/pysrc/img/sample_lin_id.jpeg"
+  # image_path = "/Users/habib/Documents/idify/pysrc/img/sample_health_card.jpeg"
+  # image_path = "/Users/habib/Documents/idify/pysrc/img/hubert_lin_id.jpeg"
+  # base64_image = encode_image(image_path)
+
+  headers = {
+      "Content-Type": "application/json",
+      "Authorization": f"Bearer {openai_api_key}"
+  }
+  payload = {
+      "model": "gpt-4-vision-preview",
+      "messages": [
+        {
+          "role": "user",
+          "content": [
+            {
+              "type": "text",
+              "text": "What are the text on this photo?"
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+              }
+            }
+          ]
+        }
+      ],
+      "max_tokens": 300
+  }
+  response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+
+
+  client = instructor.patch(OpenAI())
+
+  response_json = response.json()
+
+  # print("Response JSON:", response_json)
+
+  content = response_json['choices'][0]['message']['content']
+
+  user_detail = client.chat.completions.create(
+      model="gpt-4",
+      response_model=UserDetail,
+      messages=[
+          {"role": "user", "content": "Extract driver\'s first name, last name, street address, post code, city, province, country, license number, birthdate, sex, eye color, hair color, height, and weight the following ID description json:" + content},
+      ]
+  )
+
+  json_user_detail = user_detail.dict()
+  # print(json_user_detail)
+  return json_user_detail
+
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,13 @@
+fastapi==0.110.0
+uvicorn==0.29.0
+Pillow
+requests
+openai==1.1.1
+opencv-python==4.9.0.80
+pandas==2.2.1
+pydantic==2.6.4
+pydantic_core==2.16.3
+Pygments==2.17.2
+instructor==0.6.7
+python-dotenv==1.0.1
+python-multipart==0.0.9
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		web: uvicorn main:app --host 0.0.0.0 --port $PORT