init

autonomousvision · Jul 9, 2024 · 825856d · 825856d
commit 825856d
Show file tree

Hide file tree

Showing 164 changed files with 25,678 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,15 @@
+__pycache__/
+.idea/
+.ipynb_checkpoints/
+*.py[cod]
+*.so
+*.orig
+*.o
+*.json
+*.pth
+*.npy
+*.ipynb
+*.png
+logs/*
+outputs/*
+ckpts/*
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "third_party/diff-surfel-rasterization"]
+	path = third_party/diff-surfel-rasterization
+	url = [email protected]:hbb1/diff-surfel-rasterization.git
diff --git a/README.md b/README.md
@@ -0,0 +1,94 @@
+# LaRa: Efficient Large-Baseline Radiance Fields
+
+[Project page](https://apchenstu.github.io/LaRa/) | [Paper](https://arxiv.org/abs/2407.04699) | [Data](https://huggingface.co/apchen/LaRa/tree/main/dataset) | [Checkpoint](https://huggingface.co/apchen/LaRa/tree/main/ckpts) |<br>
+
+![Teaser image](assets/demo.gif)
+
+## ⭐ New Features 
+- 2024/04/05: Important updates - 
+Now our method supports half precision training, achieving over **100% faster** convergence and about **1.5dB** gains with less iterations!
+
+    | Model    | PSNR ↑     | SSIM ↑    | Abs err (Geo) ↓   | Epoch  | Time（day）      | ckpt |
+    | ------   | ------     | ------    | ------    | ------ | ------ | ------ |
+    | Paper    | 27.65      |  0.951    | 0.0654    |  50    |   3.5  | ------ |
+    | bf16     | 29.15      |  0.956    | 0.0574    |  30    |   1.5  | [Download](https://huggingface.co/apchen/LaRa/tree/main/ckpts/) |
+
+    Please download the pre-trained checkpoint from the provided link and place it in the `ckpts` folder.
+
+# Installation
+
+```
+git clone https://github.com/autonomousvision/LaRa.git --recursive
+conda env create --file environment.yml
+conda activate lara
+```
+
+
+# Dataset 
+We used the processed [gobjaverse dataset](https://aigc3d.github.io/gobjaverse/) for training. A download script `tools/download_dataset.py` is provided to automatically download the datasets.
+
+```
+python tools/download_dataset.py all
+```
+Note: The GObjaverse dataset requires approximately 1.4 TB of storage. You can also download a subset of the dataset. Please refer to the provided script for details. Please manually delete the `_temp` folder after completing the download.
+
+If you would like to process the data by yourself, we provide preprocess scripts for the gobjaverse and co3d datasets, please check `tools/prepare_dataset_*`.
+You can also download our preprocessed data and put them to `dataset` folder:
+* [gobjaverse](#gobjaverse)
+* [Google Scaned Object](#GSO)
+* [Co3D](#Co3D) 
+* Instant3D - Please contact the authors of Instant3D if you wish to obtain the data for comparison.
+
+# Training
+```
+python train_lightning.py
+```
+**note:** You can configure the GPU id and other parameter with `configs/base.yaml`.
+
+# Evaluation
+Our method supports the reconstruction of radiance fields from **multi-view**, **text**, and **single view** inputs. We provide a pre-trained checkpoint at [ckpt](#https://huggingface.co/apchen/LaRa/tree/main/ckpts).
+
+## multi-view to 3D
+To reproduce the table results, you can simply use:
+```
+python eval_all.py
+```
+**note:** Please double-check that the paths inside the script are correct for your specific case.
+
+## text to 3D
+```
+python evaluation.py configs/infer.yaml 
+       infer.ckpt_path=ckpts/lara.ckpt
+       infer.save_folder=outputs/prompts/
+       infer.dataset.generator_type=instant3d
+       infer.dataset.prompts=["a car made out of sushi","a beautiful rainbow fish"]
+```
+**note:** You can specify multiply prompts.
+
+## single view to 3D
+```
+python evaluation.py configs/infer.yaml 
+       infer.ckpt_path=ckpts/lara.ckpt
+       infer.save_folder=outputs/single-view/
+       infer.dataset.generator_type="zero123plus-v1.1"
+       infer.dataset.image_pathes=\["assets/examples/19_dalle3_stump1.png"\]
+```
+**note:** It supports the generator types `zero123plus-v1.1` and `zero123plus-v1`.
+
+
+
+## Acknowledgements
+Our render is built upon [2DGS](https://github.com/hbb1/2d-gaussian-splatting). The data preprocessing code for the Co3D dataset is partially borrowed from [Splatter-Image](https://github.com/szymanowiczs/splatter-image/blob/main/data_preprocessing/preprocess_co3d.py). Additionally, the script for generating multi-view images from text and single view image is sourced from [GRM](https://github.com/justimyhxu/grm). We thank all the authors for their great repos. 
+
+## Citation
+If you find our code or paper helps, please consider citing:
+```bibtex
+@inproceedings{LaRa,
+         author = {Anpei Chen and Haofei Xu and Stefano Esposito and Siyu Tang and Andreas Geiger},
+         title = {LaRa: Efficient Large-Baseline Radiance Fields},
+         booktitle = {European Conference on Computer Vision (ECCV)},
+         year = {2024}
+        } 
+```
+
+
diff --git a/assets/demo.gif b/assets/demo.gif
diff --git a/configs/base.yaml b/configs/base.yaml
@@ -0,0 +1,70 @@
+gpu_id: [4,5,6,7]
+
+exp_name: LaRa/release-test
+n_views: 4
+
+model:
+
+    encoder_backbone: 'vit_base_patch16_224.dino' # ['vit_small_patch16_224.dino','vit_base_patch16_224.dino']
+
+    n_groups: [16]  # n_groups for local attention
+    n_offset_groups: 32     # offset radius of 1/n_offset_groups of the scene size
+
+    K: 2    # primitives per-voxel
+    sh_degree: 1    # view dependent color
+
+    num_layers: 12
+    num_heads: 16
+
+    view_embed_dim: 32
+    embedding_dim: 256
+
+    vol_feat_reso: 16
+    vol_embedding_reso: 32
+
+    vol_embedding_out_dim: 80
+
+    ckpt_path: null # specify a ckpt path if you want to continue training  
+
+train_dataset:
+    dataset_name: gobjeverse
+    data_root: dataset/gobjaverse/gobjaverse.h5
+
+    split: train
+    img_size: [512,512] # image resolution
+    n_group: ${n_views}   # image resolution
+    n_scenes: 3000000
+    load_normal: True
+
+
+
+test_dataset:
+    dataset_name: gobjeverse
+    data_root: dataset/gobjaverse/gobjaverse.h5
+
+    split: test
+    img_size: [512,512]
+    n_group: ${n_views}
+    n_scenes: 3000000
+    load_normal: True
+
+train:
+    batch_size: 3
+    lr: 4e-4
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 0.05
+    # betas: [0.9, 0.95]
+    warmup_iters: 1000
+    n_epoch: 30
+    limit_train_batches: 0.2
+    limit_val_batches: 0.02
+    check_val_every_n_epoch: 1
+    start_fine: 5000
+    use_rand_views: False
+test:
+    batch_size: 3
+
+logger: 
+    name: tensorboard
+    dir: logs/${exp_name}
diff --git a/configs/infer.yaml b/configs/infer.yaml
@@ -0,0 +1,65 @@
+n_views: 4
+
+infer:
+    dataset:
+        # dataset_name: gobjeverse
+        # data_root: dataset/gobjaverse_280k/gobjaverse_280k.hdf5
+        # data_root: dataset/Co3D/co3d_teddybear.hdf5
+        # data_root: dataset/Co3D/co3d_hydrant.hdf5
+
+        # dataset_name: GSO
+        # data_root: dataset/google_scanned_objects
+
+        # dataset_name: instant3d
+        # data_root: dataset/instant3D
+
+        # text to 3D
+        dataset_name: mvgen
+        generator_type: instant3d
+        prompts: ["a car made out of sushi"]
+        image_pathes: []
+
+        ## single view to 3D
+        # dataset_name: mvgen
+        # generator_type: zero123plus-v1.1 # zero123plus-v1.1,zero123plus-v1.2,sv3d
+        # prompts: []
+        # image_pathes: ['examples/19_dalle3_stump1.png']
+
+        # # unposed inputs
+        # dataset_name: unposed
+        # image_pathes: examples/unposed/*.png
+
+        split: test
+        img_size: [512,512]
+        n_group: 4
+        n_scenes: 30000
+        num_workers: 0
+        batch_size: 1
+
+        load_normal: False
+
+    ckpt_path: ckpts/lara.ckpt
+
+    eval_novel_view_only: True
+    eval_depth: []
+    metric_path: None
+
+    save_folder: outputs/video_vis/mvgen
+    video_frames: 120
+    mesh_video_frames: 0
+
+    save_mesh: True
+    aabb: [-0.5,-0.5,-0.5,0.5,0.5,0.5]
+
+    finetuning: 
+        with_ft: False
+        steps: 500
+
+        # lr
+        position_lr: 0.000016
+        feature_lr: 0.0025
+        opacity_lr: 0.05
+        scaling_lr: 0.005
+        rotation_lr: 0.001
+
+
diff --git a/configs/render/cathedral.hdr b/configs/render/cathedral.hdr
diff --git a/configs/render/cathedral.xml b/configs/render/cathedral.xml
@@ -0,0 +1,17 @@
+<scene version="2.1.0">
+	<emitter type="envmap">
+		<string name="filename" value="configs/render/cathedral.hdr" />
+		<float name="scale" value="0.5"/>
+        <transform name="to_world">
+            <rotate x="0.5" y="-1.5" z="0.5" angle="20"/>
+        </transform>
+	</emitter>
+
+        <!-- Point Light -->
+    <emitter type="point">
+        <point name="position" x="-2" y="-2" z="-3"/>
+        <spectrum name="intensity" value="50"/>
+    </emitter>
+
+
+</scene>
diff --git a/configs/render/common.xml b/configs/render/common.xml
@@ -0,0 +1,41 @@
+<scene version="2.1.0">
+	<default name="spp" value="32"/>
+	<default name="resx" value="64"/>
+	<default name="resy" value="64"/>
+	<default name="disable_edge_gradient" value="false"/>
+	<default name="disable_shading_gradient" value="false"/>
+	<default name="edge_epsilon" value="0.01"/>
+	<default name="legacy_mode" value="false"/>
+	<default name="max_depth" value="3"/>
+	<default name="refined_intersection" value="false"/>
+	<default name="integrator" value="path"/>
+	<default name="integrator_file" value="integrator_sdf.xml"/>
+	<default name="pixel_format" value="rgb"/>
+	<default name="sample_border" value="true"/>
+	<default name="use_aovs" value="false"/>
+	<default name="pixel_filter" value="gaussian"/>
+	<default name="sdf_filename" value=""/>
+	<default name="sensors_filename" value="sensors.xml"/>
+	<default name="use_mis" value="false"/>
+	<default name="hide_emitters" value="false"/>
+	<default name="use_antithetic_sampling" value="false"/>
+	<default name="detach_weight_sum" value="false"/>
+	<default name="decouple_reparam" value="false"/>
+	<default name="detach_indirect_si" value="false"/>
+	<default name="reparam_primary_only" value="false"/>
+
+	<include filename="$integrator_file"/>
+	<include filename="$sensors_filename"/>
+
+	<bsdf type="twosided" id="default-bsdf">
+		<bsdf type="diffuse">
+			<rgb name="reflectance" value="0.5, 0.5, 0.5"/>
+		</bsdf>
+	</bsdf>
+
+	<bsdf type="twosided" id="red-bsdf">
+		<bsdf type="diffuse">
+			<rgb name="reflectance" value="0.8, 0.2, 0.2"/>
+		</bsdf>
+	</bsdf>
+</scene>
diff --git a/configs/render/integrator_path.xml b/configs/render/integrator_path.xml
@@ -0,0 +1,7 @@
+<scene version="2.1.0">
+	<integrator type="path">
+		<integer name="max_depth" value="$max_depth"/>
+        <boolean name="hide_emitters" value="true"/>
+	</integrator>
+</scene>
+
diff --git a/configs/render/scene.xml b/configs/render/scene.xml
@@ -0,0 +1,26 @@
+<scene version="2.1.0">
+	<!-- <path value="../../"/> -->
+	<include filename="configs/render/common.xml"/>
+	<default name="emitter_scene" value="configs/render/cathedral.xml"/>
+    <include filename="$emitter_scene"/>
+
+    <!-- <bsdf type="thindielectric" id="plastic_bsdf">
+        <rgb name="specular_transmittance" value="0.7, 0.8, 0.95"/>
+    </bsdf> -->
+
+    <bsdf type="roughplastic" id="plastic_bsdf">
+        <float name="alpha" value="0.1" />
+        <rgb name="diffuse_reflectance" value="0.25, 0.5, 0.8"/>
+    </bsdf>
+
+	<default name="main_bsdf_name" value="plastic_bsdf"/>
+
+	<shape type="$mesh_type">
+		<ref id="$main_bsdf_name" name="bsdf"/>
+        <string name="filename" value="$mesh_path"/>
+        <transform name="to_world">
+            <translate x="0.0" y="0.0" z="0.0"/>
+        </transform>
+	</shape>
+
+</scene>