-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_detection.py
executable file
·131 lines (110 loc) · 4.05 KB
/
image_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import google.generativeai as genai
import sys
import os
from PIL import Image, ImageDraw
import json
from dotenv import load_dotenv
from typing import List, Dict, Any, Tuple, Optional
import logging
load_dotenv()
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
IMAGE_SCALE = 1000
def draw_boxes(
image: Image.Image, detections: List[Dict[str, Any]], scale_x: float, scale_y: float
) -> None:
draw = ImageDraw.Draw(image)
for detection in detections:
box = detection.get("box_2d")
label = detection.get("label")
if box and label:
xmin = box.get("xmin")
ymin = box.get("ymin")
xmax = box.get("xmax")
ymax = box.get("ymax")
if (
xmin is not None
and ymin is not None
and xmax is not None
and ymax is not None
):
x1 = int(float(xmin) * scale_x)
y1 = int(float(ymin) * scale_y)
x2 = int(float(xmax) * scale_x)
y2 = int(float(ymax) * scale_y)
logging.info(f"draw box: ({x1}, {y1}), ({x2}, {y2})")
draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=5)
draw.text((x1, y1 - 10), label, fill="red")
def _get_gemini_response(
model: genai.GenerativeModel, prompt: str, image: Image.Image
) -> Optional[str]:
try:
response = model.generate_content([prompt, image], stream=False)
if response and response.text:
return response.text
else:
logging.error("Error: Gemini API returned an empty response.")
return None
except Exception as e:
logging.error(f"Error during API call: {e}")
return None
def detect_objects(image_path: str, api_key: str) -> None:
genai.configure(api_key=api_key)
generation_config = {
"temperature": 1,
"response_mime_type": "application/json",
}
model_name = os.getenv("GEMINI_MODEL_NAME")
if not model_name:
logging.error(
"Error: GEMINI_MODEL_NAME not found in environment variables or .env file."
)
sys.exit(1)
model = genai.GenerativeModel(
model_name=model_name,
generation_config=generation_config,
)
try:
image = Image.open(image_path)
except FileNotFoundError:
logging.error(f"Error: Image file not found at {image_path}")
return
except Exception as e:
logging.error(f"Error opening image: {e}")
return
prompt = 'Detect up to 5 furnitures in the image. Provide the bounding box coordinates. The answer should follow the json format: [{"box_2d": {"xmax": <xmax>, "ymax": <ymax>,"xmin": <xmin>,"ymin": <ymin>}, "label": <label> }, ...]. Considere 0-1000 scale.'
response_text = _get_gemini_response(model, prompt, image)
if not response_text:
return
try:
if isinstance(response_text, str):
detections = json.loads(response_text)
else:
detections = response_text
logging.info(detections)
except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON response: {e}")
return
except Exception as e:
logging.error(f"An unexpected error occurred: {e}")
return
image_width, image_height = image.size
scale_x = image_width / IMAGE_SCALE
scale_y = image_height / IMAGE_SCALE
draw_boxes(image, detections, scale_x, scale_y)
output_path = os.path.splitext(image_path)[0] + "_detected.jpg"
image.save(output_path)
logging.info(f"Image with bounding boxes saved to {output_path}")
if __name__ == "__main__":
if len(sys.argv) != 2:
logging.error("Usage: python image_detection.py <image_path>")
sys.exit(1)
image_path = sys.argv[1]
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
logging.error(
"Error: GEMINI_API_KEY not found in environment variables or .env file."
)
sys.exit(1)
detect_objects(image_path, api_key)