kalman_camshift

jeasydev · Jun 14, 2015 · ffe4f9a · ffe4f9a
1 parent 4e09e8a
commit ffe4f9a
Show file tree

Hide file tree

Showing 7 changed files with 180 additions and 47 deletions.
diff --git a/NOTES.md b/NOTES.md
@@ -0,0 +1,2 @@
+AKAZE (3.0)
+FAST/ORB with OCL and CUDA
diff --git a/chapter8/kalman.jpg b/chapter8/kalman.jpg
diff --git a/chapter8/kalman_camshift.py b/chapter8/kalman_camshift.py
@@ -0,0 +1,59 @@
+import numpy as np
+import cv2
+
+cap = cv2.VideoCapture(0)
+
+# take first frame of the video
+ret,frame = cap.read()
+
+# setup initial location of window
+r,h,c,w = 300,200,400,300  # simply hardcoded the values
+track_window = (c,r,w,h)
+
+
+roi = frame[r:r+h, c:c+w]
+hsv_roi =  cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+mask = cv2.inRange(hsv_roi, np.array((160., 30.,32.)), np.array((180.,120.,255.)))
+roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
+cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
+term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
+
+kalman = cv2.KalmanFilter(4,2)
+kalman.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]],np.float32)
+kalman.transitionMatrix = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],np.float32)
+kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
+
+measurement = np.array((2,1), np.float32) 
+prediction = np.zeros((2,1), np.float32)
+
+def center(points):
+    x = (points[0][0] + points[1][0] + points[2][0] + points[3][0]) / 4.0
+    y = (points[0][1] + points[1][1] + points[2][1] + points[3][1]) / 4.0
+    return np.array([np.float32(x), np.float32(y)], np.float32)
+
+while(1):
+    ret ,frame = cap.read()
+
+    if ret == True:
+        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
+
+        ret, track_window = cv2.CamShift(dst, track_window, term_crit)
+
+        pts = cv2.boxPoints(ret)
+        pts = np.int0(pts)
+        (cx, cy), radius = cv2.minEnclosingCircle(pts)
+        kalman.correct(center(pts))
+        img2 = cv2.polylines(frame,[pts],True, 255,2)
+        prediction = kalman.predict()
+        cv2.circle(frame, (prediction[0], prediction[1]), int(radius), (0, 255, 0))
+        cv2.imshow('img2',img2)
+        k = cv2.waitKey(60) & 0xff
+        if k == 27:
+            break
+
+    else:
+        break
+
+cv2.destroyAllWindows()
+cap.release()
diff --git a/chapter8/q.jpg b/chapter8/q.jpg
diff --git a/chapter8/surveillance_demo/main.py b/chapter8/surveillance_demo/main.py
@@ -1,83 +1,155 @@
-"""
-  Surveillance Demo: Tracking Pedestrians in Camera Feed
+#! /usr/bin/python
+
+"""Surveillance Demo: Tracking Pedestrians in Camera Feed
 
-  The application opens a video (could be a camera or a video file)
-  and tracks pedestrians in the video.
+The application opens a video (could be a camera or a video file)
+and tracks pedestrians in the video.
 """
+__author__ = "joe minichino"
+__copyright__ = "property of mankind."
+__license__ = "MIT"
+__version__ = "0.0.1"
+__maintainer__ = "Joe Minichino"
+__email__ = "[email protected]"
+__status__ = "Development"
 
 import cv2
 import numpy as np
 import os.path as path
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-a", "--algorithm",
+    help = "m (or nothing) for meanShift and c for camshift")
+args = vars(parser.parse_args())
+
+def center(points):
+    """calculates centroid of a given matrix"""
+    x = (points[0][0] + points[1][0] + points[2][0] + points[3][0]) / 4
+    y = (points[0][1] + points[1][1] + points[2][1] + points[3][1]) / 4
+    return np.array([np.float32(x), np.float32(y)], np.float32)
+
+colors = [[0,0,0],[0,0,255],[0,255,0],[255,0,0],[255,255,0],[255,0,255]]
+font = cv2.FONT_HERSHEY_SIMPLEX
 
-"""
-each pedestrian is composed of a ROI, an ID and a Kalman filter
-so we create a Pedestrian class to hold the object state
-"""
 class Pedestrian():
+  """Pedestrian class
+
+  each pedestrian is composed of a ROI, an ID and a Kalman filter
+  so we create a Pedestrian class to hold the object state
+  """
   def __init__(self, id, frame, track_window):
+    """init the pedestrian object with track window coordinates"""
     # set up the roi
     self.id = int(id)
     x,y,w,h = track_window
+    self.track_window = track_window
     self.roi = cv2.cvtColor(frame[y:y+h, x:x+w], cv2.COLOR_BGR2HSV)
-
     roi_hist = cv2.calcHist([self.roi], [0], None, [16], [0, 180])
     self.roi_hist = cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
+
     # set up the kalman
     self.kalman = cv2.KalmanFilter(4,2)
     self.kalman.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]],np.float32)
     self.kalman.transitionMatrix = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],np.float32)
     self.kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
     self.measurement = np.array((2,1), np.float32) 
     self.prediction = np.zeros((2,1), np.float32)
+    self.term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
+    self.center = None
+    self.update(frame)
+
+  def __del__(self):
+    print "Pedestrian %d destroyed" % self.id
 
-  def update(frame):
+  def update(self, frame):
+    # print "updating %d " % self.id
     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
-    dst = cv2.calcBackProject([hsv],[0], self.roi_hist,[0,180],1)
-
-    ret, track_window = cv2.CamShift(dst, track_window, term_crit)
+    back_project = cv2.calcBackProject([hsv],[0], self.roi_hist,[0,180],1)
 
-    pts = cv2.boxPoints(ret)
-    pts = np.int0(pts)
-    (cx, cy), radius = cv2.minEnclosingCircle(pts)
-    kalman.correct(center(pts))
-    img2 = cv2.polylines(frame,[pts],True, 255,2)
-    prediction = kalman.predict()
-    cv2.circle(frame, (prediction[0], prediction[1]), int(radius), (0, 255, 0))
-    cv2.imshow('img2',img2)
-
-    print "updating %d" % self.id
-
+    if args.get("algorithm") == "c":
+      ret, self.track_window = cv2.CamShift(back_project, self.track_window, self.term_crit)
+      pts = cv2.boxPoints(ret)
+      pts = np.int0(pts)
+      self.center = center(pts)
+      cv2.polylines(frame,[pts],True, 255,1)
+
+    if not args.get("algorithm") or args.get("algorithm") == "m":
+      ret, self.track_window = cv2.meanShift(back_project, self.track_window, self.term_crit)
+      x,y,w,h = self.track_window
+      self.center = center([[x,y],[x+w, y],[x,y+h],[x+w, y+h]])  
+      cv2.rectangle(frame, (x,y), (x+w, y+h), (0, 0, 255), 1)
 
+    self.kalman.correct(self.center)
+    prediction = self.kalman.predict()
+    cv2.circle(frame, (int(prediction[0]), int(prediction[1])), 4, (0, 255, 0), -1)
+    # fake shadow
+    cv2.putText(frame, "ID: %d -> %s" % (self.id, self.center), (11, (self.id + 1) * 25 + 1),
+        font, 0.6,
+        (0, 0, 0),
+        1,
+        cv2.LINE_AA)
+    # actual info
+    cv2.putText(frame, "ID: %d -> %s" % (self.id, self.center), (10, (self.id + 1) * 25),
+        font, 0.6,
+        (0, 255, 0),
+        1,
+        cv2.LINE_AA)
 
 def main():
+  # camera = cv2.VideoCapture(path.join(path.dirname(__file__), "traffic.flv"))
   camera = cv2.VideoCapture(path.join(path.dirname(__file__), "768x576.avi"))
-  ret, frame = camera.read()
-  counter = 0
+  # camera = cv2.VideoCapture(path.join(path.dirname(__file__), "..", "movie.mpg"))
+
+  bs = cv2.createBackgroundSubtractorKNN(detectShadows = True)
+  cv2.namedWindow("surveillance")
   pedestrians = {}
-  if (ret is False):
-    print "failed to read frame... exiting."
-    return
-  else:
-      ret, frame = camera.read()
-      fgmask = bs.apply(frame)
-      th = cv2.threshold(fgmask.copy(), 244, 255, cv2.THRESH_BINARY)[1]
-      th = cv2.erode(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)), iterations = 2)
-      dilated = cv2.dilate(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8,3)), iterations = 2)
-      image, contours, hier = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-      for c in contours:
-        if cv2.contourArea(c) > 800:
-          tw = (x,y,w,h) = cv2.boundingRect(c)
-          #cv2.rectangle(frame, (x,y), (x+w, y+h), (255, 255, 0), 2)
-          pedestrians[counter] = Pedestrian(counter, frame, tw)
-          counter += 1
-
+  firstFrame = True
+  frames = 0
+
 
   while True:
-    grabbed, frame = camera.read()
-    pedstrian.update(frame)
-    cv2.imshow("video", frame)
-    if cv2.waitKey(1000 / 10) >= 0:
+    print " -------------------- FRAME %d --------------------" % frames
+    grabbed, frane = camera.read()
+    if (grabbed is False):
+      print "failed to grab frame."
       break
 
+    ret, frame = camera.read()
+    fgmask = bs.apply(frame)
+
+    # this is just to let the background subtractor build a bit of history
+    if frames < 30:
+      frames += 1
+      continue
+
+
+    th = cv2.threshold(fgmask.copy(), 127, 255, cv2.THRESH_BINARY)[1]
+    th = cv2.erode(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)), iterations = 2)
+    dilated = cv2.dilate(th, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8,3)), iterations = 2)
+    image, contours, hier = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    counter = 0
+    for c in contours:
+      if cv2.contourArea(c) > 500:
+        (x,y,w,h) = cv2.boundingRect(c)
+        cv2.rectangle(frame, (x,y), (x+w, y+h), colors[counter % 6], 1)
+        # only create pedestrians in the first frame, then just follow the ones you have
+        if firstFrame is True:
+          pedestrians[counter] = Pedestrian(counter, frame, (x,y,w,h))
+        counter += 1
+
+
+    for i, p in pedestrians.iteritems():
+      p.update(frame)
+
+    firstFrame = False
+    frames += 1
+
+    cv2.imshow("surveillance", frame)
+    cv2.imshow("diff", image)
+    if cv2.waitKey(90) & 0xff == 27:
+        break
+
 if __name__ == "__main__":
   main()
diff --git a/chapter8/surveillance_demo/traffic.flv b/chapter8/surveillance_demo/traffic.flv
diff --git a/images/test.jpg b/images/test.jpg