Skip to content

Commit

Permalink
feat: recording and sending images to psi
Browse files Browse the repository at this point in the history
  • Loading branch information
sohamtiwari3120 committed Oct 1, 2023
1 parent 550e901 commit a57180a
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 21 deletions.
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
audio_port = 60001
doa_port = 60002
vad_port = 60003
images_port = 60004

confusion_classifier_res_port = 61001

Expand Down
10 changes: 6 additions & 4 deletions confusion_model/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
data_type: str = "window",
label_dict: dict = EMOTION_NO,
device: str = "cpu",
verbose: bool = "False"
verbose: bool = False
):
"""
Initialize trained model for inference
Expand Down Expand Up @@ -74,6 +74,7 @@ def __init__(
data_type: str = "window",
label_dict: dict = EMOTION_NO,
device: str = "cpu",
cv2_device: str = "cpu",
multiclass: bool = False,
haar_path: str = None,
):
Expand All @@ -82,14 +83,15 @@ def __init__(
If needed load CNN featurizer models for embedding
"""
self.feat_type = load_model_path.split("/")[-1].split(".")[0].split("_")[-3]
self.cv2_device = cv2_device
if self.feat_type == "CNN":
# Default extraction, only works on newer cv2 releases
if haar_path is None:
haar_path = cv2.data.haarcascades + "haarcascade_frontalface_alt.xml"

# If running Haar Cascades on Cuda, will need to use cuda optimized classifier
# Currently hard-coding Haar cascade Hyperparams
if self.device == "cuda":
if self.cv2_device == "cuda":
self.face_extractor = cv2.cuda_CascadeClassifier.create(haar_path)
self.face_extractor.setMinNeighbors(5)
self.face_extractor.setMinObjectSize((10, 10))
Expand Down Expand Up @@ -127,7 +129,7 @@ def _face_extraction_harr(self, image: Image):
# Take PIL image and turn it into CV2 image
col_img, gray_img = convert_from_image_to_cv2(image, new_area=None)
# If GPU, need to turn from numpy array to GPU Matrix and back
if self.device == "cuda":
if self.cv2_device == "cuda":
cuFrame = cv2.cuda_GpuMat(gray_img)
boxes = self.face_extractor.detectMultiScale(cuFrame).download()
# Given we return anything, then unpack the value
Expand Down Expand Up @@ -250,7 +252,7 @@ def run_inference(
multiclass=False,
label_dict=EMOTION_NO,
device="cuda",
haar_path="/home/teledia/Desktop/nvaikunt/ConfusionDataset/data/haarcascade_frontalface_alt_cuda.xml",
haar_path="/home/teledia/Desktop/nvaikunt/ConfusionDataset/data/haarcascade_frontalface_alt.xml",
# device="cpu",
# haar_path=None
)
Expand Down
2 changes: 1 addition & 1 deletion send_nano_ip_to_psi.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def main():
"doa": f"tcp://{jetson_ip}:{doa_port}",
"vad": f"tcp://{jetson_ip}:{vad_port}",
"cvPreds": f"tcp://{jetson_ip}:{confusion_classifier_res_port}",

"images": f"tcp://{jetson_ip}:{images_port}",
}
) # erebor"
# request = json.dumps({"sensorVideoText":"tcp://128.2.212.138:40000", "sensorAudio": "tcp://128.2.212.138:40001", "sensorDOA": "tcp://128.2.212.138:40002", "sensorVAD": "tcp://128.2.212.138:40003"}) # erebor"
Expand Down
33 changes: 17 additions & 16 deletions video_scripts/send_video_dict_with_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
from zmq_utils import *
from confusion_model.inference import ConfusionInference
from video_scripts.camera import RealSenseCamera

from confusion_model.constants import *
from PIL import Image
import base64

camera = RealSenseCamera(res=(640, 480))

context = zmq.Context()
socket = context.socket(zmq.PUB)
socket.bind(f"tcp://*:{confusion_classifier_res_port}")
socket.bind(f"tcp://*:{images_port}")

# Initialize global buffer
BUF_MAX_LEN = 6
Expand All @@ -27,11 +29,11 @@ def confusion_cnn_embed():
multiclass=False,
label_dict=EMOTION_NO,
device="cuda",
haar_path="/home/recrafting5/Desktop/DANCEcollaborative/nvaikunt/ConfusionDataset/data/haarcascade_frontalface_alt_cuda.xml"
haar_path="/home/recrafting5/Desktop/DANCEcollaborative/nvaikunt/ConfusionDataset/data/haarcascade_frontalface_alt.xml"
)
window_len = inference_model.window_len
num_preds = 0
start = time()
start = time.time()

while buffer:
if len(buffer) > window_len:
Expand All @@ -40,13 +42,14 @@ def confusion_cnn_embed():

current_images = []
for bo in buffer_outputs:
h, w = bo[0].size
img = Image.fromarray(bo[0])
h, w = img.size
print("hello", h, w)
curr_image = bo[0].resize((3 * h // 4, 3 * w // 4))
curr_image = img.resize((3 * h // 4, 3 * w // 4))
print(curr_image.size)
curr_images.append(curr_image)
current_images.append(curr_image)

preds = inference_model.run_inference()
preds = inference_model.run_inference(current_images)
payload = preds #ToDo: Convert "preds" type to something that send_payload expects
print(preds)
num_preds += 1
Expand All @@ -59,10 +62,10 @@ def confusion_cnn_embed():
# send_payload(socket, "Remote_PSI_Text", payload)
# print(preds)
# inference_model.feats.pop(0)
send_payload(socket, "cvpreds", payload, originatingTime=buffer_outputs[0][1]) # sending the time when the first image of input window was captured as the originatingTime
# send_payload(socket, "cvpreds", payload, originatingTime=buffer_outputs[0][1]) # sending the time when the first image of input window was captured as the originatingTime
time.sleep(0.01)
print(f"Total number of predictions {num_preds}")
print(f"Total inference time: {time() - start}")
print(f"Total inference time: {time.time() - start}")

def capture_frames():
try:
Expand All @@ -71,19 +74,20 @@ def capture_frames():
depth, img = camera.get_frame_stream()
height = img.shape[0]
width = img.shape[1]
print(height, width)

# if frame_count % 10 == 0: # Add every 10th frame to buffer
with buffer_lock:
buffer.append((img, generate_current_dotnet_datetime_ticks())) # Appending image and current time as tuple to buffer

time.sleep(0.01)

cv.imshow("demo", img)
# cv.imshow("demo", img)

# print('msg:', msg)
# print('msg length', len(msg))

_, img_buffer = cv.imencode('.jpg', img)
payload = base64.b64encode(img_buffer)
send_payload(socket, "images", payload)
key = cv.waitKey(1)
if key == 27:
break
Expand All @@ -97,13 +101,10 @@ def capture_frames():

def main():
capture_thread = threading.Thread(target=capture_frames, daemon=True)
# inference_thread = threading.Thread(target=perform_inference, daemon=True)

capture_thread.start()
# inference_thread.start()

capture_thread.join()
# inference_thread.join()


if __name__ == "__main__":
Expand Down

0 comments on commit a57180a

Please sign in to comment.