Option to NOT recenter and resize during preprocess_image.py (ashawke…

…y#265) - Updates preprocess_image.py to add options recenter and resize. Will turn them off for multiview_zero123 - Saves a copy of image_config csv file in the experiment workspace (since we occasionally modify it across experiments)
junyiouy · May 12, 2023 · 5573b5e · 5573b5e
1 parent 04062c9
commit 5573b5e
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 32 deletions.
diff --git a/nerf/utils.py b/nerf/utils.py
@@ -6,6 +6,7 @@
 import psutil
 from pathlib import Path
 import random
+import shutil
 import warnings
 import tensorboardX
 
@@ -244,6 +245,10 @@ def __init__(self,
             self.best_path = f"{self.ckpt_path}/{self.name}.pth"
             os.makedirs(self.ckpt_path, exist_ok=True)
 
+            # Save a copy of image_config in the experiment workspace
+            if opt.image_config is not None:
+                shutil.copyfile(opt.image_config, os.path.join(self.workspace, os.path.basename(opt.image_config)))
+
         self.log(f'[INFO] Cmdline: {self.argv}')
         self.log(f'[INFO] Trainer: {self.name} | {self.time_stamp} | {self.device} | {"fp16" if self.fp16 else "fp32"} | {self.workspace}')
         self.log(f'[INFO] #parameters: {sum([p.numel() for p in model.parameters() if p.requires_grad])}')
@@ -713,7 +718,7 @@ def train(self, train_loader, valid_loader, test_loader, max_epochs):
             if self.epoch % self.opt.eval_interval == 0:
                 self.evaluate_one_epoch(valid_loader)
                 self.save_checkpoint(full=False, best=True)
-            
+
             if self.epoch % self.opt.test_interval == 0 or self.epoch == max_epochs:
                 self.test(test_loader)
 

diff --git a/preprocess_image.py b/preprocess_image.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import cv2
 import argparse
 import numpy as np
@@ -26,7 +27,7 @@ def __init__(self, device='cuda'):
             trimap_erosion_iters=5,
             fp16=True,
         )
-    
+
     @torch.no_grad()
     def __call__(self, image):
         # image: [H, W, 3] array in [0, 255].
@@ -36,7 +37,7 @@ def __call__(self, image):
         image = np.array(image)
 
         return image
-    
+
 class BLIP2():
     def __init__(self, device='cuda'):
         self.device = device
@@ -91,7 +92,7 @@ def __init__(self, task='depth', device='cuda'):
         self.model.load_state_dict(state_dict)
         self.model.eval().to(device)
 
-        
+
     @torch.no_grad()
     def __call__(self, image):
         # image: np.ndarray, uint8, [H, W, 3]
@@ -119,8 +120,10 @@ def __call__(self, image):
     parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
     parser.add_argument('--size', default=256, type=int, help="output resolution")
     parser.add_argument('--border_ratio', default=0.2, type=float, help="output border ratio")
+    parser.add_argument('--recenter', type=bool, default=True, help="recenter, potentially not helpful for multiview zero123")
+    parser.add_argument('--resize', type=bool, default=True, help="resize image to opt.size")
     opt = parser.parse_args()
-    
+
     out_dir = os.path.dirname(opt.path)
     out_rgba = os.path.join(out_dir, os.path.basename(opt.path).split('.')[0] + '_rgba.png')
     out_depth = os.path.join(out_dir, os.path.basename(opt.path).split('.')[0] + '_depth.png')
@@ -157,28 +160,40 @@ def __call__(self, image):
     normal[~mask] = 0
     del dpt_normal_model
 
-    # rescale and recenter
-    final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
-    final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
-    final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
-
-    coords = np.nonzero(mask)
-    x_min, x_max = coords[0].min(), coords[0].max()
-    y_min, y_max = coords[1].min(), coords[1].max()
-    h = x_max - x_min
-    w = y_max - y_min
-    desired_size = int(opt.size * (1 - opt.border_ratio))
-    scale = desired_size / max(h, w)
-    h2 = int(h * scale)
-    w2 = int(w * scale)
-    x2_min = (opt.size - h2) // 2
-    x2_max = x2_min + h2
-    y2_min = (opt.size - w2) // 2
-    y2_max = y2_min + w2
-    final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-    final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-    final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-
+    # recenter
+    if opt.recenter:
+        final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
+        final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
+        final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
+
+        coords = np.nonzero(mask)
+        x_min, x_max = coords[0].min(), coords[0].max()
+        y_min, y_max = coords[1].min(), coords[1].max()
+        h = x_max - x_min
+        w = y_max - y_min
+        desired_size = int(opt.size * (1 - opt.border_ratio))
+        scale = desired_size / max(h, w)
+        h2 = int(h * scale)
+        w2 = int(w * scale)
+        x2_min = (opt.size - h2) // 2
+        x2_max = x2_min + h2
+        y2_min = (opt.size - w2) // 2
+        y2_max = y2_min + w2
+        final_rgba[x2_min:x2_max, y2_min:y2_max] = carved_image[x_min:x_max, y_min:y_max]
+        final_depth[x2_min:x2_max, y2_min:y2_max] = depth[x_min:x_max, y_min:y_max]
+        final_normal[x2_min:x2_max, y2_min:y2_max] = normal[x_min:x_max, y_min:y_max]
+
+    else:
+        final_rgba = carved_image
+        final_depth = depth
+        final_normal = normal
+
+    # resize
+    if opt.resize:
+        final_rgba = cv2.resize(final_rgba, (w2, h2), interpolation=cv2.INTER_AREA)
+        final_depth = cv2.resize(final_depth, (w2, h2), interpolation=cv2.INTER_AREA)
+        final_normal = cv2.resize(final_normal, (w2, h2), interpolation=cv2.INTER_AREA)
+
     # write output
     cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
     cv2.imwrite(out_depth, final_depth)
@@ -191,6 +206,3 @@ def __call__(self, image):
     # with open(out_caption, 'w') as f:
     #     f.write(caption)
 
-
-
-
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ tqdm
 rich
 ninja
 numpy
-pandas 
+pandas
 scipy
 scikit-learn
 matplotlib
@@ -24,7 +24,7 @@ dearpygui
 
 # for stable-diffusion
 huggingface_hub
-diffusers >= 0.9.0 
+diffusers >= 0.9.0
 accelerate
 transformers