release training/validation code

MyForking · Jul 3, 2024 · beed12c · beed12c
1 parent 6fbe1bb
commit beed12c
Show file tree

Hide file tree

Showing 19 changed files with 2,152 additions and 71 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,5 @@ models
 ._.DS_Store
 .DS_Store
 tmp_data
+*.jpg
+._*.jpg
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@
   <a href="https://paperswithcode.com/sota/human-mesh-recovery-on-bedlam?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/human-mesh-recovery-on-bedlam"></a><br>
   <a href="https://paperswithcode.com/sota/3d-human-reconstruction-on-ehf?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/3d-human-reconstruction-on-ehf"></a><br>
   <a href="https://paperswithcode.com/sota/3d-human-pose-estimation-on-ubody?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/3d-human-pose-estimation-on-ubody"></a><br>
+  <a href="https://paperswithcode.com/sota/3d-multi-person-mesh-recovery-on-agora?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/3d-multi-person-mesh-recovery-on-agora"></a><br>
   <a href="https://paperswithcode.com/sota/3d-multi-person-human-pose-estimation-on?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/3d-multi-person-human-pose-estimation-on"></a><br>
   <a href="https://paperswithcode.com/sota/3d-human-pose-estimation-on-3dpw?p=multi-hmr-multi-person-whole-body-human-mesh"><img src="https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/multi-hmr-multi-person-whole-body-human-mesh/3d-human-pose-estimation-on-3dpw"></a><br>
   </p>
@@ -42,6 +43,12 @@
 </div>
 </p>
 
+## News
+- 2024/07/03: Release of training-evaluation code.
+- 2024/07/01: Multi-HMR is accepted to ECCV'24.
+- 2024/06/17: Multi-HMR won [Robin Challenge @CVPR'24](https://rhobin-challenge.github.io/): 3D human reconstruction track.
+- 2024/02/22: Release of demo code.
+
 ## Installation
 First, you need to clone the repo.
 
@@ -59,7 +66,7 @@ conda env create -f conda.yaml
 conda activate multihmr
 ```
 
-The installation has been tested with python3.9 and CUDA 11.7.
+The installation has been tested with python3.9 and CUDA 12.1.
 
 Checkpoints will automatically be downloaded to `$HOME/models/multiHMR` the first time you run the demo code.
 
@@ -91,6 +98,128 @@ Once downloaded you need to place them into `$HOME/models/multiHMR`.
 
 We compute the runtime on GPU V100-32GB.
 
+## Training Multi-HMR
+We provide code for training Multi-HMR using a single GPU on BEDLAM-training and evaluating it on BEDLAM-validation, EHF and 3DPW-test.
+
+Activate environnement
+```bash
+source .multihmr/bin/activate
+export PYTHONPATH=`pwd`
+```
+
+### Preprocessing BEDLAM
+The first thing that you need to do is to download the BEDLAM dataset (6fps version) and place the files into ```data/BEDLAM```
+The data structure of the directory should look like this:
+```bash
+data/BEDLAM
+      |
+      |---validation
+                  |
+                  |---20221018_1_250_batch01hand_zoom_suburb_b_6fps
+                                                              |
+                                                              |---png
+                                                                  |
+                                                                  |---seq_000000
+                                                                              |
+                                                                              |---seq_000000_0000.png
+                                                                              ...
+                                                                              |---seq_000000_0235.png
+                                                                  ...
+                                                                  |---seq_000249
+                  ...
+                  |---20221019_3-8_250_highbmihand_orbit_stadium_6fps
+      |---training
+              |
+              |---20221010_3_1000_batch01hand_6fps
+              ...
+              |---20221024_3-10_100_batch01handhair_static_highSchoolGym_30fps
+      |---all_npz_12_training
+              |
+              |---20221010_3_1000_batch01hand_6fps.npz
+              ...
+              |---20221024_3-10_100_batch01handhair_static_highSchoolGym_30fps.npz
+      |---all_npz_12_validation
+            |
+            |---20221018_1_250_batch01hand_zoom_suburb_b_6fps.npz
+            ...
+            |---20221019_3-8_250_highbmihand_orbit_stadium_6fps.npz
+```
+
+We need to build the annotation files for the training and validation sets. It may takes around 20 minutes for bulding the pkl files depending on your CPU.
+```bash
+python3.9 datasets/bedlam.py "create_annots(['validation', 'training'])"
+```
+You will get two files ```data/bedlam_validation.pkl``` and ```data/bedlam_training.pkl```.
+
+### Checking annotations
+Visualize the annotation of a specific image.
+```bash
+python3.9 datasets/bedlam.py "visualize(split='validation', i=1500)"
+```
+It will create a file ```bedlam_validation_15000.jpg``` where you can see the RGB image on the left side and the RGB image with meshes overlayed on the right side.
+
+### (Optional) Creating jpg files to fast data-loading
+BEDLAM is composed of PNG files and loading them could be a bit slow depending our your infrastucture.
+The following command will generate one jpg file for each png file with maximal resolution of 1280.
+It may take a while because BEDLAM has more than 300k images. You can run the command lines on some specific subdirectories to speed-up the generation of jpg files. You can chose the target size of your choice.
+```bash
+# Can be slow
+python3.9 datasets/bedlam.py "create_jpeg(root_dir='data/BEDLAM', target_size=1280)
+
+# Or parallelize
+python3.9 datasets/bedlam.py "create_jpeg(root_dir='data/BEDLAM/validation/20221019_3-8_250_highbmihand_orbit_stadium_6fps', target_size=1280)
+...
+python3.9 datasets/bedlam.py "create_jpeg(root_dir='data/BEDLAM/training/20221010_3-10_500_batch01hand_zoom_suburb_d_6fps', target_size=1280)
+```
+
+### Checking the data-loading time
+You can check the quality of your dataloader by running the command above. It will use the png version of BEDLAM.
+```bash
+python3.9 datasets/bedlam.py "dataloader(split='validation', batch_size=16, num_workers=4, extension='png', img_size=1280, n_iter=100)"
+```
+
+### Preprocessing additional validation sets
+We also provide code for evaluating on EHF and 3DPW.
+Run the command for bulding the annotation fiel for EHF.
+```bash
+python3.9 datasets/ehf.py "create_annots()"
+python3.9 datasets/ehf.py "visualize(i=10)"
+```
+And for 3DPW. Please download SMPL-male and SMPL-female models, put them into ```models/smpl/SMPL_MALE.pkl``` and ```models/smpl/SMPL_FEMALE.pkl```. And ```smplx2smpl.pkl``` is mandatory for moving from SMPLX to SMPL.
+```bash
+python3.9 datasets/threedpw.py "create_annots()"
+python3.9 datasets/threedpw.py "visualize(i=1011)"
+```
+
+### Training on BEDLAM-train
+We provide the command for training on BEDLAM-train at resolution 336 on a single GPU.
+```bash
+# python command
+CUDA_VISIBLE_DEVICES=1 python3.9 train.py \
+--backbone dinov2_vits14 \
+--img_size 336 \
+-j 4 \
+--batch_size 32 \
+-iter 10000 \
+--max_iter 500000 \
+--name multi-hmr_s_336
+```
+To decrease data-loading time use ```--extension jpg --res 1280```
+
+### Evaluating BEDLAM-val / EHF-test / 3DPW-test
+Above command is for evaluating a pretrained ckpt on validation sets.
+```bash
+CUDA_VISIBLE_DEVICES=0 python3.9 train.py \
+--eval_only 1 \
+--backbone dinov2_vitl14 \
+--img_size 896 \
+--val_data EHF THREEDPW BEDLAM \
+--val_split test test validation \
+--val_subsample 1 20 25 \
+--pretrained models/multiHMR/multiHMR_896_L.pt
+```
+Either check the log or open the tensorboard for checking the results.
+
 ## License
 The code is distributed under the CC BY-NC-SA 4.0 License.\
 See [Multi-HMR LICENSE](Multi-HMR_License.txt), [Checkpoint LICENSE](Checkpoint_License.txt) and [Example Data LICENSE](Example_Data_License.txt) for more information.
@@ -108,7 +237,7 @@ If you find this code useful for your research, please consider citing the follo
             Rogez, Gr{\'e}gory and
             Lucas*, Thomas
             },
-    booktitle={arXiv},
+    booktitle={ECCV},
     year={2024}
 }
 ```

diff --git a/app.py b/app.py
@@ -146,8 +146,8 @@ def infer(fn, det_thresh, nms_kernel_size, fov):
 
         # Save into glb
         start = time.time()
-        l_mesh = [humans[j]['verts_smplx'].detach().cpu().numpy() for j in range(len(humans))]
-        l_face = [model.smpl_layer['neutral'].bm_x.faces for j in range(len(humans))]
+        l_mesh = [humans[j]['v3d'].detach().cpu().numpy() for j in range(len(humans))]
+        l_face = [model.smpl_layer['neutral_10'].bm_x.faces for j in range(len(humans))]
         scene = create_scene(img_pil_visu, l_mesh, l_face, color=color, metallicFactor=0., roughnessFactor=0.5)
         scene.export(glb_fn)
         print(f"Exporting scene in glb: {time.time() - start:.2f}sec")

diff --git a/blocks/dinov2.py b/blocks/dinov2.py
@@ -6,10 +6,10 @@
 from torch import nn
 
 class Dinov2Backbone(nn.Module):
-    def __init__(self, name='dinov2_vitb14', *args, **kwargs):
+    def __init__(self, name='dinov2_vitb14', pretrained=False, *args, **kwargs):
         super().__init__()
         self.name = name
-        self.encoder = torch.hub.load('facebookresearch/dinov2', self.name, pretrained=False)
+        self.encoder = torch.hub.load('facebookresearch/dinov2', self.name, pretrained=pretrained)
         self.patch_size = self.encoder.patch_size
         self.embed_dim = self.encoder.embed_dim
 

diff --git a/blocks/smpl_layer.py b/blocks/smpl_layer.py
@@ -141,11 +141,13 @@ def forward(self,
 
         # Projection in camera plane
         j2d = perspective_projection(j3d_cam, K)
+        v2d = perspective_projection(verts_cam, K)
 
         out.update({
-            'verts_smplx_cam': verts_cam,
-            'j3d': j3d_cam, 
+            'v3d': verts_cam, # in 3d camera space
+            'j3d': j3d_cam, # in 3d camera space
             'j2d': j2d, 
+            'v2d': v2d, 
             'transl': transl, # translation of the primary keypoint
             'transl_pelvis': j3d_cam[:,[0]], # root=pelvis
         })