Skip to content

Commit

Permalink
Option to NOT recenter and resize during preprocess_image.py (ashawke…
Browse files Browse the repository at this point in the history
…y#265)

- Updates preprocess_image.py to add options recenter and resize. Will turn them off for multiview_zero123
- Saves a copy of image_config csv file in the experiment workspace (since we occasionally modify it across experiments)
  • Loading branch information
voletiv authored May 12, 2023
1 parent 04062c9 commit 5573b5e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 32 deletions.
7 changes: 6 additions & 1 deletion nerf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import psutil
from pathlib import Path
import random
import shutil
import warnings
import tensorboardX

Expand Down Expand Up @@ -244,6 +245,10 @@ def __init__(self,
self.best_path = f"{self.ckpt_path}/{self.name}.pth"
os.makedirs(self.ckpt_path, exist_ok=True)

# Save a copy of image_config in the experiment workspace
if opt.image_config is not None:
shutil.copyfile(opt.image_config, os.path.join(self.workspace, os.path.basename(opt.image_config)))

self.log(f'[INFO] Cmdline: {self.argv}')
self.log(f'[INFO] Trainer: {self.name} | {self.time_stamp} | {self.device} | {"fp16" if self.fp16 else "fp32"} | {self.workspace}')
self.log(f'[INFO] #parameters: {sum([p.numel() for p in model.parameters() if p.requires_grad])}')
Expand Down Expand Up @@ -713,7 +718,7 @@ def train(self, train_loader, valid_loader, test_loader, max_epochs):
if self.epoch % self.opt.eval_interval == 0:
self.evaluate_one_epoch(valid_loader)
self.save_checkpoint(full=False, best=True)

if self.epoch % self.opt.test_interval == 0 or self.epoch == max_epochs:
self.test(test_loader)

Expand Down
70 changes: 41 additions & 29 deletions preprocess_image.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
import cv2
import argparse
import numpy as np
Expand Down Expand Up @@ -26,7 +27,7 @@ def __init__(self, device='cuda'):
trimap_erosion_iters=5,
fp16=True,
)

@torch.no_grad()
def __call__(self, image):
# image: [H, W, 3] array in [0, 255].
Expand All @@ -36,7 +37,7 @@ def __call__(self, image):
image = np.array(image)

return image

class BLIP2():
def __init__(self, device='cuda'):
self.device = device
Expand Down Expand Up @@ -91,7 +92,7 @@ def __init__(self, task='depth', device='cuda'):
self.model.load_state_dict(state_dict)
self.model.eval().to(device)


@torch.no_grad()
def __call__(self, image):
# image: np.ndarray, uint8, [H, W, 3]
Expand Down Expand Up @@ -119,8 +120,10 @@ def __call__(self, image):
parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
parser.add_argument('--size', default=256, type=int, help="output resolution")
parser.add_argument('--border_ratio', default=0.2, type=float, help="output border ratio")
parser.add_argument('--recenter', type=bool, default=True, help="recenter, potentially not helpful for multiview zero123")
parser.add_argument('--resize', type=bool, default=True, help="resize image to opt.size")
opt = parser.parse_args()

out_dir = os.path.dirname(opt.path)
out_rgba = os.path.join(out_dir, os.path.basename(opt.path).split('.')[0] + '_rgba.png')
out_depth = os.path.join(out_dir, os.path.basename(opt.path).split('.')[0] + '_depth.png')
Expand Down Expand Up @@ -157,28 +160,40 @@ def __call__(self, image):
normal[~mask] = 0
del dpt_normal_model

# rescale and recenter
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)

coords = np.nonzero(mask)
x_min, x_max = coords[0].min(), coords[0].max()
y_min, y_max = coords[1].min(), coords[1].max()
h = x_max - x_min
w = y_max - y_min
desired_size = int(opt.size * (1 - opt.border_ratio))
scale = desired_size / max(h, w)
h2 = int(h * scale)
w2 = int(w * scale)
x2_min = (opt.size - h2) // 2
x2_max = x2_min + h2
y2_min = (opt.size - w2) // 2
y2_max = y2_min + w2
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)

# recenter
if opt.recenter:
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)

coords = np.nonzero(mask)
x_min, x_max = coords[0].min(), coords[0].max()
y_min, y_max = coords[1].min(), coords[1].max()
h = x_max - x_min
w = y_max - y_min
desired_size = int(opt.size * (1 - opt.border_ratio))
scale = desired_size / max(h, w)
h2 = int(h * scale)
w2 = int(w * scale)
x2_min = (opt.size - h2) // 2
x2_max = x2_min + h2
y2_min = (opt.size - w2) // 2
y2_max = y2_min + w2
final_rgba[x2_min:x2_max, y2_min:y2_max] = carved_image[x_min:x_max, y_min:y_max]
final_depth[x2_min:x2_max, y2_min:y2_max] = depth[x_min:x_max, y_min:y_max]
final_normal[x2_min:x2_max, y2_min:y2_max] = normal[x_min:x_max, y_min:y_max]

else:
final_rgba = carved_image
final_depth = depth
final_normal = normal

# resize
if opt.resize:
final_rgba = cv2.resize(final_rgba, (w2, h2), interpolation=cv2.INTER_AREA)
final_depth = cv2.resize(final_depth, (w2, h2), interpolation=cv2.INTER_AREA)
final_normal = cv2.resize(final_normal, (w2, h2), interpolation=cv2.INTER_AREA)

# write output
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
cv2.imwrite(out_depth, final_depth)
Expand All @@ -191,6 +206,3 @@ def __call__(self, image):
# with open(out_caption, 'w') as f:
# f.write(caption)




4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ tqdm
rich
ninja
numpy
pandas
pandas
scipy
scikit-learn
matplotlib
Expand All @@ -24,7 +24,7 @@ dearpygui

# for stable-diffusion
huggingface_hub
diffusers >= 0.9.0
diffusers >= 0.9.0
accelerate
transformers

Expand Down

0 comments on commit 5573b5e

Please sign in to comment.