add stylegan2 and pSp predictor (PaddlePaddle#113)

* add stylegan2 and pixel2style2pixel models, predictors and documents
hao-qiang · Dec 25, 2020 · 5519d09 · 5519d09
1 parent 0977227
commit 5519d09
Show file tree

Hide file tree

Showing 25 changed files with 2,068 additions and 1 deletion.
diff --git a/applications/tools/pixel2style2pixel.py b/applications/tools/pixel2style2pixel.py
@@ -0,0 +1,72 @@
+import paddle
+import os
+import sys
+sys.path.insert(0, os.getcwd())
+from ppgan.apps import Pixel2Style2PixelPredictor
+import argparse
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_image", type=str, help="path to source image")
+
+    parser.add_argument("--output_path",
+                        type=str,
+                        default='output_dir',
+                        help="path to output image dir")
+
+    parser.add_argument("--weight_path",
+                        type=str,
+                        default=None,
+                        help="path to model checkpoint path")
+
+    parser.add_argument("--model_type",
+                        type=str,
+                        default=None,
+                        help="type of model for loading pretrained model")
+
+    parser.add_argument("--seed",
+                        type=int,
+                        default=None,
+                        help="sample random seed for model's image generation")
+
+    parser.add_argument("--size",
+                        type=int,
+                        default=1024,
+                        help="resolution of output image")
+
+    parser.add_argument("--style_dim",
+                        type=int,
+                        default=512,
+                        help="number of style dimension")
+
+    parser.add_argument("--n_mlp",
+                        type=int,
+                        default=8,
+                        help="number of mlp layer depth")
+
+    parser.add_argument("--channel_multiplier",
+                        type=int,
+                        default=2,
+                        help="number of channel multiplier")
+
+    parser.add_argument("--cpu",
+                        dest="cpu",
+                        action="store_true",
+                        help="cpu mode.")
+
+    args = parser.parse_args()
+
+    if args.cpu:
+        paddle.set_device('cpu')
+
+    predictor = Pixel2Style2PixelPredictor(
+        output_path=args.output_path,
+        weight_path=args.weight_path,
+        model_type=args.model_type,
+        seed=args.seed,
+        size=args.size,
+        style_dim=args.style_dim,
+        n_mlp=args.n_mlp,
+        channel_multiplier=args.channel_multiplier
+    )
+    predictor.run(args.input_image)
diff --git a/applications/tools/styleganv2.py b/applications/tools/styleganv2.py
@@ -0,0 +1,80 @@
+import paddle
+import os
+import sys
+sys.path.insert(0, os.getcwd())
+from ppgan.apps import StyleGANv2Predictor
+import argparse
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_path",
+                        type=str,
+                        default='output_dir',
+                        help="path to output image dir")
+
+    parser.add_argument("--weight_path",
+                        type=str,
+                        default=None,
+                        help="path to model checkpoint path")
+
+    parser.add_argument("--model_type",
+                        type=str,
+                        default=None,
+                        help="type of model for loading pretrained model")
+
+    parser.add_argument("--seed",
+                        type=int,
+                        default=None,
+                        help="sample random seed for model's image generation")
+
+    parser.add_argument("--size",
+                        type=int,
+                        default=1024,
+                        help="resolution of output image")
+
+    parser.add_argument("--style_dim",
+                        type=int,
+                        default=512,
+                        help="number of style dimension")
+
+    parser.add_argument("--n_mlp",
+                        type=int,
+                        default=8,
+                        help="number of mlp layer depth")
+
+    parser.add_argument("--channel_multiplier",
+                        type=int,
+                        default=2,
+                        help="number of channel multiplier")
+
+    parser.add_argument("--n_row",
+                        type=int,
+                        default=3,
+                        help="row number of output image grid")
+
+    parser.add_argument("--n_col",
+                        type=int,
+                        default=5,
+                        help="column number of output image grid")
+
+    parser.add_argument("--cpu",
+                        dest="cpu",
+                        action="store_true",
+                        help="cpu mode.")
+
+    args = parser.parse_args()
+
+    if args.cpu:
+        paddle.set_device('cpu')
+
+    predictor = StyleGANv2Predictor(
+        output_path=args.output_path,
+        weight_path=args.weight_path,
+        model_type=args.model_type,
+        seed=args.seed,
+        size=args.size,
+        style_dim=args.style_dim,
+        n_mlp=args.n_mlp,
+        channel_multiplier=args.channel_multiplier
+    )
+    predictor.run(args.n_row, args.n_col)
diff --git a/docs/en_US/tutorials/pixel2style2pixel.md b/docs/en_US/tutorials/pixel2style2pixel.md
@@ -0,0 +1,87 @@
+# Pixel2Style2Pixel
+
+## Pixel2Style2Pixel introduction
+
+The task of Pixel2Style2Pixel is image encoding. It mainly encodes an input image as the style vector of StyleGAN V2 and uses StyleGAN V2 as the decoder.
+
+<div align="center">
+  <img src="../../imgs/pSp-teaser.jpg" width="500"/>
+</div>
+
+Pixel2Style2Pixel uses a fairly large model to encode images, and encodes the image into the style vector space of StyleGAN V2, so that the image before encoding and the image after decoding have a strong correlation.
+
+Its main functions are:
+
+- Convert image to hidden codes
+- Turn face to face
+- Generate images based on sketches or segmentation results
+- Convert low-resolution images to high-definition images
+
+At present, only the models of portrait reconstruction and portrait cartoonization are realized in PaddleGAN.
+
+## How to use
+
+### Generate
+
+The user could use the following command to generate and select the local image as input：
+
+```
+cd applications/
+python -u tools/styleganv2.py \
+       --input_image <YOUR INPUT IMAGE> \
+       --output_path <DIRECTORY TO STORE OUTPUT IMAGE> \
+       --weight_path <YOUR PRETRAINED MODEL PATH> \
+       --model_type ffhq-inversion \
+       --seed 233 \
+       --size 1024 \
+       --style_dim 512 \
+       --n_mlp 8 \
+       --channel_multiplier 2 \
+       --cpu
+```
+
+**params:**
+- input_image: the input image file path
+- output_path: the directory where the generated images are stored
+- weight_path: pretrained model path
+- model_type: inner model type in PaddleGAN. If you use an existing model type, `weight_path` will have no effect.
+  Currently available: `ffhq-inversion`， `ffhq-toonify`
+- seed: random number seed
+- size: model parameters, output image resolution
+- style_dim: model parameters, dimensions of style z
+- n_mlp: model parameters, the number of multi-layer perception layers for style z
+- channel_multiplier: model parameters, channel product, affect model size and the quality of generated pictures
+- cpu: whether to use cpu inference, if not, please remove it from the command
+
+### Train (TODO)
+
+In the future, training scripts will be added to facilitate users to train more types of Pixel2Style2Pixel image encoders.
+
+
+## Results
+
+Input portrait:
+
+<div align="center">
+    <img src="../../imgs/pSp-input.jpg" width="300"/> 
+</div>
+
+Cropped portrait-Reconstructed portrait-Cartoonized portrait:
+
+<div align="center">
+    <img src="../../imgs/pSp-input-crop.png" width="100"/>
+    <img src="../../imgs/pSp-inversion.png" width="100"/>
+    <img src="../../imgs/pSp-toonify.png" width="100"/> 
+</div>
+
+## Reference
+
+```
+@article{richardson2020encoding,
+  title={Encoding in Style: a StyleGAN Encoder for Image-to-Image Translation},
+  author={Richardson, Elad and Alaluf, Yuval and Patashnik, Or and Nitzan, Yotam and Azar, Yaniv and Shapiro, Stav and Cohen-Or, Daniel},
+  journal={arXiv preprint arXiv:2008.00951},
+  year={2020}
+}
+
+```
diff --git a/docs/en_US/tutorials/styleganv2.md b/docs/en_US/tutorials/styleganv2.md
@@ -0,0 +1,83 @@
+# StyleGAN V2
+
+## StyleGAN V2 introduction
+
+The task of StyleGAN V2 is image generation. Given a vector of a specific length, generate the image corresponding to the vector. It is an upgraded version of StyleGAN, which solves the problem of artifacts generated by StyleGAN.
+
+<div align="center">
+  <img src="../../imgs/stylegan2-teaser-1024x256.png" width="500"/>
+</div>
+
+StyleGAN V2 can mix multi-level style vectors. Its core is adaptive style decoupling.
+
+Compared with StyleGAN, its main improvement is:
+
+- The quality of the generated image is significantly better (higher FID score, fewer artifacts)
+- Propose a new method to replace progressive training, with more perfect details such as teeth and eyes
+- Style mixing improved
+- Smoother interpolation
+- Train faster
+
+## How to use
+
+### Generate
+
+The user can generate different results by replacing the value of the seed or removing the seed. Use the following command to generate images：
+
+```
+cd applications/
+python -u tools/styleganv2.py \
+       --output_path <DIRECTORY TO STORE OUTPUT IMAGE> \
+       --weight_path <YOUR PRETRAINED MODEL PATH> \
+       --model_type ffhq-config-f \
+       --seed 233 \
+       --size 1024 \
+       --style_dim 512 \
+       --n_mlp 8 \
+       --channel_multiplier 2 \
+       --n_row 3 \
+       --n_col 5 \
+       --cpu
+```
+
+**params:**
+- output_path: the directory where the generated images are stored
+- weight_path: pretrained model path
+- model_type: inner model type in PaddleGAN. If you use an existing model type, `weight_path` will have no effect.
+  Currently available: `ffhq-config-f`, `animeface-512`
+- seed: random number seed
+- size: model parameters, output image resolution
+- style_dim: model parameters, dimensions of style z
+- n_mlp: model parameters, the number of multi-layer perception layers for style z
+- channel_multiplier: model parameters, channel product, affect model size and the quality of generated pictures
+- n_row: the number of rows of the sampled image
+- n_col: the number of columns of the sampled picture
+- cpu: whether to use cpu inference, if not, please remove it from the command
+
+### Train (TODO)
+
+In the future, training scripts will be added to facilitate users to train more types of StyleGAN V2 image generators.
+
+
+## Results
+
+Random Samples:
+
+![Samples](../../imgs/stylegan2-sample.png)
+
+Random Style Mixing:
+
+![Random Style Mixing](../../imgs/stylegan2-sample-mixing-0.png)
+
+
+## Reference
+
+```
+@inproceedings{Karras2019stylegan2,
+  title     = {Analyzing and Improving the Image Quality of {StyleGAN}},
+  author    = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten and Jaakko Lehtinen and Timo Aila},
+  booktitle = {Proc. CVPR},
+  year      = {2020}
+}
+
+```
diff --git a/docs/imgs/pSp-input-crop.png b/docs/imgs/pSp-input-crop.png
diff --git a/docs/imgs/pSp-input.jpg b/docs/imgs/pSp-input.jpg
diff --git a/docs/imgs/pSp-inversion.png b/docs/imgs/pSp-inversion.png
diff --git a/docs/imgs/pSp-teaser.jpg b/docs/imgs/pSp-teaser.jpg
diff --git a/docs/imgs/pSp-toonify.png b/docs/imgs/pSp-toonify.png
diff --git a/docs/imgs/stylegan2-sample-mixing-0.png b/docs/imgs/stylegan2-sample-mixing-0.png
diff --git a/docs/imgs/stylegan2-sample.png b/docs/imgs/stylegan2-sample.png
diff --git a/docs/imgs/stylegan2-teaser-1024x256.png b/docs/imgs/stylegan2-teaser-1024x256.png