allow rectangular images, RGB images

jingdao · Apr 14, 2021 · bcd4190 · bcd4190
1 parent cc5c60f
commit bcd4190
Show file tree

Hide file tree

Showing 5 changed files with 289 additions and 277 deletions.
diff --git a/README.md b/README.md
@@ -53,7 +53,7 @@ Result images will be displayed if the "--viz" flag is used, otherwise only the
 Use the "--save_frame" flag to save results from a specific frame to the "results" folder.
 
 ```
-python baselines.py --dataset university --method threshold --viz --save_frame 100
+python baselines.py --dataset university --method threshold --viz --save_frame 100 --min_cluster 200
 ```
 
 ## Convolutional Neural Network
@@ -67,7 +67,7 @@ Before doing this, make sure to run "baselines.py" with "--method backSub" to sa
 Otherwise, only the first component is used. 
 
 ```
-python process_record.py --dataset university
+python process_record.py --dataset university --imsize 385
 ```
 
 Train the network using "train.py" (150 epochs). The trained model will be saved in "dataset/myfolder/model.ckpt".
@@ -81,7 +81,7 @@ the confidence level above which a pixel will be considered a positive detection
 Result images will be displayed if the "--viz" flag is used, otherwise only the accuracy is computed.
 
 ```
-python test.py --dataset university --detection_threshold 0.99 --viz
+python test.py --dataset university --imsize 385 --detection_threshold 0.99 --viz --min_cluster 200
 ```
 
 ## Dependencies

diff --git a/baselines.py b/baselines.py
@@ -15,19 +15,26 @@
 #method = 'kmeans'
 dataset = 'beach'
 save_frame = -1 #98, 130
+min_cluster = 10
 for i in range(len(sys.argv)-1):
 	if sys.argv[i]=='--method':
 		method = sys.argv[i+1]
 	elif sys.argv[i]=='--dataset':
 		dataset = sys.argv[i+1]
 	elif sys.argv[i]=='--save_frame':
 		save_frame = int(sys.argv[i+1])
+	elif sys.argv[i]=='--min_cluster':
+		min_cluster = int(sys.argv[i+1])
 
 backSub = cv2.createBackgroundSubtractorMOG2()
 #backSub = cv2.createBackgroundSubtractorKNN()
 image_id = 1
 fig = plt.figure(figsize=(20,30))
-xbound, ybound, imscale = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+try:
+    xbound, ybound, imwidth, imheight = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+except ValueError:
+    xbound, ybound, imscale = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+    imwidth = imheight = imscale
 num_samples = len(glob.glob('dataset/%s/label*.png'%dataset))
 num_test = num_samples - int(num_samples*0.8)
 test_idx = num_samples - num_test + 1
@@ -46,16 +53,17 @@
 		image_id += 1
 		continue
 	image_filename = 'dataset/%s/%d.png' % (dataset,image_id)
-	if os.path.exists(image_filename):
+	label_filename = 'dataset/%s/label%d.png'%(dataset,image_id)
+	if os.path.exists(image_filename) and os.path.exists(label_filename):
 		I = numpy.array(Image.open(image_filename))
 		if len(I.shape)>2:
 			I = numpy.mean(I, axis=2)
 	else:
 		break
-	gt = numpy.array(Image.open('dataset/%s/label%d.png'%(dataset,image_id)))
+	gt = numpy.array(Image.open(label_filename))
 	gt = gt > 0
 	dt = numpy.zeros(I.shape, dtype=bool)
-	image_np = I[ybound:ybound+imscale, xbound:xbound+imscale]
+	image_np = I[ybound:ybound+imheight, xbound:xbound+imwidth]
 	t1 = time.time()
 	if method=='threshold':
 		Isub = image_np.astype(numpy.uint8)
@@ -137,14 +145,14 @@
 			xm += xl
 			mask[ym,xm] = True
 	t2 = time.time()
-	dt[ybound:ybound+imscale,xbound:xbound+imscale] = mask
+	dt[ybound:ybound+imheight,xbound:xbound+imwidth] = mask
 	err_viz = numpy.zeros((image_np.shape[0], image_np.shape[1], 3), dtype=numpy.uint8)
 	if image_id < test_idx:
 		image_id += 1
 		continue
 
-	gt_sub = gt[ybound:ybound+imscale, xbound:xbound+imscale] > 0
-	dt_sub = dt[ybound:ybound+imscale, xbound:xbound+imscale]	
+	gt_sub = gt[ybound:ybound+imheight, xbound:xbound+imwidth] > 0
+	dt_sub = dt[ybound:ybound+imheight, xbound:xbound+imwidth]	
 	current_tp = numpy.logical_and(gt_sub,dt_sub)
 	current_fp = numpy.logical_and(numpy.logical_not(gt_sub),dt_sub)
 	current_fn = numpy.logical_and(gt_sub,numpy.logical_not(dt_sub))
@@ -164,15 +172,14 @@
 	ret, dt_com = cv2.connectedComponents(dt_sub.astype(numpy.uint8))
 	num_gt = 0
 	num_dt = 0
-	min_cluster_size = 10 if dataset=='beach' or dataset=='shore' else 200
 	for i in range(1, gt_com.max()+1):
-		if numpy.sum(gt_com==i) > min_cluster_size:
+		if numpy.sum(gt_com==i) > min_cluster:
 			num_gt += 1
 			gt_com[gt_com==i] = num_gt
 		else:
 			gt_com[gt_com==i] = 0
 	for i in range(1, dt_com.max()+1):
-		if numpy.sum(dt_com==i) > min_cluster_size:
+		if numpy.sum(dt_com==i) > min_cluster:
 			num_dt += 1
 			dt_com[dt_com==i] = num_dt
 		else:
@@ -226,7 +233,7 @@
 		dt_viz[y1:y2, x2, :] = [255,255,0]
 
 	comp_time.append(t2 - t1)
-#	print('Image #%d Precision:%.2f/%.2f Recall:%.2f/%.2f (%.2fs)'%(image_id, prc,obj_prc,rcl,obj_rcl, t2-t1))
+	print('Image #%d Precision:%.2f/%.2f Recall:%.2f/%.2f (%.2fs)'%(image_id, prc,obj_prc,rcl,obj_rcl, t2-t1))
 
 	if image_id == save_frame:
 		Image.fromarray(image_np.astype(numpy.uint8), mode='L').save('results/original_%d.png'%save_frame)

diff --git a/process_record.py b/process_record.py
@@ -6,76 +6,86 @@
 
 dataset = 'beach'
 use_history = False
+use_rgb = False
+imsize = None
 for i in range(len(sys.argv)):
-	if sys.argv[i]=='--dataset':
-		dataset = sys.argv[i+1]
-	if sys.argv[i]=='--use_history':
-		use_history = True
+    if sys.argv[i]=='--dataset':
+        dataset = sys.argv[i+1]
+    if sys.argv[i]=='--use_history':
+        use_history = True
+    if sys.argv[i]=='--use_rgb':
+        use_rgb = True
+    if sys.argv[i]=='--imsize':
+        imsize = int(sys.argv[i+1])
 
 num_samples = len(glob.glob('dataset/%s/label*.png' % dataset))
 train_samples = set(range(int(0.8*num_samples)))
 test_samples = set(range(num_samples)) - train_samples
 if dataset == 'combined':
-	train_samples -= set([0,96])
-	test_samples -= set([224,248])
+    train_samples -= set([0,96])
+    test_samples -= set([224,248])
 else:
-	train_samples -= set([0])
+    train_samples -= set([0])
 print('train',train_samples)
 print('test',test_samples)
-xbound, ybound, imscale = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+try:
+    xbound, ybound, imwidth, imheight = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+except ValueError:
+    xbound, ybound, imscale = [int(t) for t in open('dataset/%s/params.txt'%dataset).readline().split()]
+    imwidth = imheight = imscale
 pos_idx = set()
-imsize = 385
-train_img = numpy.zeros((len(train_samples), imsize, imsize, 3), dtype=numpy.uint8)
-train_labels = numpy.zeros((len(train_samples), imsize, imsize), dtype=numpy.uint8)
-test_img = numpy.zeros((len(test_samples), imsize, imsize, 3), dtype=numpy.uint8)
-test_labels = numpy.zeros((len(test_samples), imsize, imsize), dtype=numpy.uint8)
+train_img = numpy.zeros((len(train_samples), imheight, imwidth, 3), dtype=numpy.uint8)
+train_labels = numpy.zeros((len(train_samples), imheight, imwidth), dtype=numpy.uint8)
+test_img = numpy.zeros((len(test_samples), imheight, imwidth, 3), dtype=numpy.uint8)
+test_labels = numpy.zeros((len(test_samples), imheight, imwidth), dtype=numpy.uint8)
 train_count = 0
 test_count = 0
 previous_img = None
 
 for i in range(1,num_samples+1):
-	image_np = numpy.array(Image.open('dataset/%s/%d.png'%(dataset,i)))
-	image_np = image_np[ybound:ybound+imscale,xbound:xbound+imscale].mean(axis=2)
-	if previous_img is None:
-		diff_img = numpy.zeros(image_np.shape, dtype=numpy.uint8)
-		backSub_img = numpy.zeros(image_np.shape, dtype=numpy.uint8)
-	else:
-		diff_img = ((image_np - previous_img)/2 + 128).astype(numpy.uint8)
-		if use_history:
-			backSub_img = numpy.array(Image.open('dataset/%s/backSub/%d.png'%(dataset,i))).mean(axis=2)
-	previous_img = image_np
-	image_h = image_np.shape[0]
-	image_w = image_np.shape[1]
-	if use_history:
-		image_np = numpy.dstack((image_np, diff_img, backSub_img)).astype(numpy.uint8)
-	else:
-		image_np = numpy.dstack((image_np, image_np, image_np)).astype(numpy.uint8)
-	annotation = numpy.array(Image.open('dataset/%s/label%d.png'%(dataset,i)))
-	for p in numpy.array(numpy.nonzero(annotation)).T:
-		pos_idx.add(tuple(p))
-	annotation = annotation[ybound:ybound+imscale,xbound:xbound+imscale]
-	if imscale!=imsize:
-		image_np = numpy.array(Image.fromarray(image_np).resize((imsize, imsize), resample=Image.BILINEAR))
-		annotation = numpy.array(Image.fromarray(annotation).resize((imsize, imsize), resample=Image.BILINEAR))
-	annotation = numpy.array(annotation > 0, dtype=numpy.uint8)
-	print(i,image_np.shape,image_np.dtype)
-	if i-1 in train_samples:
-		train_img[train_count] = image_np
-		train_labels[train_count] = annotation
-		train_count += 1
-	elif i-1 in test_samples:
-		test_img[test_count] = image_np
-		test_labels[test_count] = annotation
-		test_count += 1
+    image_np = numpy.array(Image.open('dataset/%s/%d.png'%(dataset,i)))
+    image_np = image_np[ybound:ybound+imheight,xbound:xbound+imwidth]
+    image_gray = image_np.mean(axis=2)
+    if previous_img is None:
+        diff_img = numpy.zeros(image_gray.shape, dtype=numpy.uint8)
+        backSub_img = numpy.zeros(image_gray.shape, dtype=numpy.uint8)
+    else:
+        diff_img = ((image_gray - previous_img)/2 + 128).astype(numpy.uint8)
+        if use_history:
+            backSub_img = numpy.array(Image.open('dataset/%s/backSub/%d.png'%(dataset,i))).mean(axis=2)
+    previous_img = image_gray
+    image_h = image_np.shape[0]
+    image_w = image_np.shape[1]
+    if use_history:
+        image_np = numpy.dstack((image_gray, diff_img, backSub_img)).astype(numpy.uint8)
+    elif not use_rgb:
+        image_np = numpy.dstack((image_gray, image_gray, image_gray)).astype(numpy.uint8)
+    annotation = numpy.array(Image.open('dataset/%s/label%d.png'%(dataset,i)))
+    for p in numpy.array(numpy.nonzero(annotation)).T:
+        pos_idx.add(tuple(p))
+    annotation = annotation[ybound:ybound+imheight,xbound:xbound+imwidth]
+    if imsize is not None and (imwidth!=imsize or imheight!=imsize):
+        image_np = numpy.array(Image.fromarray(image_np).resize((imsize, imsize), resample=Image.BILINEAR))
+        annotation = numpy.array(Image.fromarray(annotation).resize((imsize, imsize), resample=Image.BILINEAR))
+    annotation = numpy.array(annotation > 0, dtype=numpy.uint8)
+    print(i,image_np.shape,image_np.dtype)
+    if i-1 in train_samples:
+        train_img[train_count] = image_np
+        train_labels[train_count] = annotation
+        train_count += 1
+    elif i-1 in test_samples:
+        test_img[test_count] = image_np
+        test_labels[test_count] = annotation
+        test_count += 1
 
 pos_idx = numpy.array(list(pos_idx))
 print('pos_idx(%d) x:%d->%d y:%d->%d'%(len(pos_idx),pos_idx[:,1].min(),pos_idx[:,1].max(),pos_idx[:,0].min(),pos_idx[:,0].max()))
 print('train_count: %d test_count: %d'%(train_count, test_count))
 
 if use_history:
-	f = h5py.File('dataset/%s/data_history.h5'%dataset,'w')
+    f = h5py.File('dataset/%s/data_history.h5'%dataset,'w')
 else:
-	f = h5py.File('dataset/%s/data.h5'%dataset,'w')
+    f = h5py.File('dataset/%s/data.h5'%dataset,'w')
 f.create_dataset('train_img',data=train_img, compression='gzip', compression_opts=4, dtype=numpy.uint8)
 f.create_dataset('train_labels',data=train_labels, compression='gzip', compression_opts=4, dtype=numpy.uint8)
 f.create_dataset('test_img',data=test_img, compression='gzip', compression_opts=4, dtype=numpy.uint8)