forked from PaddlePaddle/PaddleGAN
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add stylegan2 and pSp predictor (PaddlePaddle#113)
* add stylegan2 and pixel2style2pixel models, predictors and documents
- Loading branch information
Showing
25 changed files
with
2,068 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import paddle | ||
import os | ||
import sys | ||
sys.path.insert(0, os.getcwd()) | ||
from ppgan.apps import Pixel2Style2PixelPredictor | ||
import argparse | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--input_image", type=str, help="path to source image") | ||
|
||
parser.add_argument("--output_path", | ||
type=str, | ||
default='output_dir', | ||
help="path to output image dir") | ||
|
||
parser.add_argument("--weight_path", | ||
type=str, | ||
default=None, | ||
help="path to model checkpoint path") | ||
|
||
parser.add_argument("--model_type", | ||
type=str, | ||
default=None, | ||
help="type of model for loading pretrained model") | ||
|
||
parser.add_argument("--seed", | ||
type=int, | ||
default=None, | ||
help="sample random seed for model's image generation") | ||
|
||
parser.add_argument("--size", | ||
type=int, | ||
default=1024, | ||
help="resolution of output image") | ||
|
||
parser.add_argument("--style_dim", | ||
type=int, | ||
default=512, | ||
help="number of style dimension") | ||
|
||
parser.add_argument("--n_mlp", | ||
type=int, | ||
default=8, | ||
help="number of mlp layer depth") | ||
|
||
parser.add_argument("--channel_multiplier", | ||
type=int, | ||
default=2, | ||
help="number of channel multiplier") | ||
|
||
parser.add_argument("--cpu", | ||
dest="cpu", | ||
action="store_true", | ||
help="cpu mode.") | ||
|
||
args = parser.parse_args() | ||
|
||
if args.cpu: | ||
paddle.set_device('cpu') | ||
|
||
predictor = Pixel2Style2PixelPredictor( | ||
output_path=args.output_path, | ||
weight_path=args.weight_path, | ||
model_type=args.model_type, | ||
seed=args.seed, | ||
size=args.size, | ||
style_dim=args.style_dim, | ||
n_mlp=args.n_mlp, | ||
channel_multiplier=args.channel_multiplier | ||
) | ||
predictor.run(args.input_image) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import paddle | ||
import os | ||
import sys | ||
sys.path.insert(0, os.getcwd()) | ||
from ppgan.apps import StyleGANv2Predictor | ||
import argparse | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--output_path", | ||
type=str, | ||
default='output_dir', | ||
help="path to output image dir") | ||
|
||
parser.add_argument("--weight_path", | ||
type=str, | ||
default=None, | ||
help="path to model checkpoint path") | ||
|
||
parser.add_argument("--model_type", | ||
type=str, | ||
default=None, | ||
help="type of model for loading pretrained model") | ||
|
||
parser.add_argument("--seed", | ||
type=int, | ||
default=None, | ||
help="sample random seed for model's image generation") | ||
|
||
parser.add_argument("--size", | ||
type=int, | ||
default=1024, | ||
help="resolution of output image") | ||
|
||
parser.add_argument("--style_dim", | ||
type=int, | ||
default=512, | ||
help="number of style dimension") | ||
|
||
parser.add_argument("--n_mlp", | ||
type=int, | ||
default=8, | ||
help="number of mlp layer depth") | ||
|
||
parser.add_argument("--channel_multiplier", | ||
type=int, | ||
default=2, | ||
help="number of channel multiplier") | ||
|
||
parser.add_argument("--n_row", | ||
type=int, | ||
default=3, | ||
help="row number of output image grid") | ||
|
||
parser.add_argument("--n_col", | ||
type=int, | ||
default=5, | ||
help="column number of output image grid") | ||
|
||
parser.add_argument("--cpu", | ||
dest="cpu", | ||
action="store_true", | ||
help="cpu mode.") | ||
|
||
args = parser.parse_args() | ||
|
||
if args.cpu: | ||
paddle.set_device('cpu') | ||
|
||
predictor = StyleGANv2Predictor( | ||
output_path=args.output_path, | ||
weight_path=args.weight_path, | ||
model_type=args.model_type, | ||
seed=args.seed, | ||
size=args.size, | ||
style_dim=args.style_dim, | ||
n_mlp=args.n_mlp, | ||
channel_multiplier=args.channel_multiplier | ||
) | ||
predictor.run(args.n_row, args.n_col) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# Pixel2Style2Pixel | ||
|
||
## Pixel2Style2Pixel introduction | ||
|
||
The task of Pixel2Style2Pixel is image encoding. It mainly encodes an input image as the style vector of StyleGAN V2 and uses StyleGAN V2 as the decoder. | ||
|
||
<div align="center"> | ||
<img src="../../imgs/pSp-teaser.jpg" width="500"/> | ||
</div> | ||
|
||
Pixel2Style2Pixel uses a fairly large model to encode images, and encodes the image into the style vector space of StyleGAN V2, so that the image before encoding and the image after decoding have a strong correlation. | ||
|
||
Its main functions are: | ||
|
||
- Convert image to hidden codes | ||
- Turn face to face | ||
- Generate images based on sketches or segmentation results | ||
- Convert low-resolution images to high-definition images | ||
|
||
At present, only the models of portrait reconstruction and portrait cartoonization are realized in PaddleGAN. | ||
|
||
## How to use | ||
|
||
### Generate | ||
|
||
The user could use the following command to generate and select the local image as input: | ||
|
||
``` | ||
cd applications/ | ||
python -u tools/styleganv2.py \ | ||
--input_image <YOUR INPUT IMAGE> \ | ||
--output_path <DIRECTORY TO STORE OUTPUT IMAGE> \ | ||
--weight_path <YOUR PRETRAINED MODEL PATH> \ | ||
--model_type ffhq-inversion \ | ||
--seed 233 \ | ||
--size 1024 \ | ||
--style_dim 512 \ | ||
--n_mlp 8 \ | ||
--channel_multiplier 2 \ | ||
--cpu | ||
``` | ||
|
||
**params:** | ||
- input_image: the input image file path | ||
- output_path: the directory where the generated images are stored | ||
- weight_path: pretrained model path | ||
- model_type: inner model type in PaddleGAN. If you use an existing model type, `weight_path` will have no effect. | ||
Currently available: `ffhq-inversion`, `ffhq-toonify` | ||
- seed: random number seed | ||
- size: model parameters, output image resolution | ||
- style_dim: model parameters, dimensions of style z | ||
- n_mlp: model parameters, the number of multi-layer perception layers for style z | ||
- channel_multiplier: model parameters, channel product, affect model size and the quality of generated pictures | ||
- cpu: whether to use cpu inference, if not, please remove it from the command | ||
|
||
### Train (TODO) | ||
|
||
In the future, training scripts will be added to facilitate users to train more types of Pixel2Style2Pixel image encoders. | ||
|
||
|
||
## Results | ||
|
||
Input portrait: | ||
|
||
<div align="center"> | ||
<img src="../../imgs/pSp-input.jpg" width="300"/> | ||
</div> | ||
|
||
Cropped portrait-Reconstructed portrait-Cartoonized portrait: | ||
|
||
<div align="center"> | ||
<img src="../../imgs/pSp-input-crop.png" width="100"/> | ||
<img src="../../imgs/pSp-inversion.png" width="100"/> | ||
<img src="../../imgs/pSp-toonify.png" width="100"/> | ||
</div> | ||
|
||
## Reference | ||
|
||
``` | ||
@article{richardson2020encoding, | ||
title={Encoding in Style: a StyleGAN Encoder for Image-to-Image Translation}, | ||
author={Richardson, Elad and Alaluf, Yuval and Patashnik, Or and Nitzan, Yotam and Azar, Yaniv and Shapiro, Stav and Cohen-Or, Daniel}, | ||
journal={arXiv preprint arXiv:2008.00951}, | ||
year={2020} | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# StyleGAN V2 | ||
|
||
## StyleGAN V2 introduction | ||
|
||
The task of StyleGAN V2 is image generation. Given a vector of a specific length, generate the image corresponding to the vector. It is an upgraded version of StyleGAN, which solves the problem of artifacts generated by StyleGAN. | ||
|
||
<div align="center"> | ||
<img src="../../imgs/stylegan2-teaser-1024x256.png" width="500"/> | ||
</div> | ||
|
||
StyleGAN V2 can mix multi-level style vectors. Its core is adaptive style decoupling. | ||
|
||
Compared with StyleGAN, its main improvement is: | ||
|
||
- The quality of the generated image is significantly better (higher FID score, fewer artifacts) | ||
- Propose a new method to replace progressive training, with more perfect details such as teeth and eyes | ||
- Style mixing improved | ||
- Smoother interpolation | ||
- Train faster | ||
|
||
## How to use | ||
|
||
### Generate | ||
|
||
The user can generate different results by replacing the value of the seed or removing the seed. Use the following command to generate images: | ||
|
||
``` | ||
cd applications/ | ||
python -u tools/styleganv2.py \ | ||
--output_path <DIRECTORY TO STORE OUTPUT IMAGE> \ | ||
--weight_path <YOUR PRETRAINED MODEL PATH> \ | ||
--model_type ffhq-config-f \ | ||
--seed 233 \ | ||
--size 1024 \ | ||
--style_dim 512 \ | ||
--n_mlp 8 \ | ||
--channel_multiplier 2 \ | ||
--n_row 3 \ | ||
--n_col 5 \ | ||
--cpu | ||
``` | ||
|
||
**params:** | ||
- output_path: the directory where the generated images are stored | ||
- weight_path: pretrained model path | ||
- model_type: inner model type in PaddleGAN. If you use an existing model type, `weight_path` will have no effect. | ||
Currently available: `ffhq-config-f`, `animeface-512` | ||
- seed: random number seed | ||
- size: model parameters, output image resolution | ||
- style_dim: model parameters, dimensions of style z | ||
- n_mlp: model parameters, the number of multi-layer perception layers for style z | ||
- channel_multiplier: model parameters, channel product, affect model size and the quality of generated pictures | ||
- n_row: the number of rows of the sampled image | ||
- n_col: the number of columns of the sampled picture | ||
- cpu: whether to use cpu inference, if not, please remove it from the command | ||
|
||
### Train (TODO) | ||
|
||
In the future, training scripts will be added to facilitate users to train more types of StyleGAN V2 image generators. | ||
|
||
|
||
## Results | ||
|
||
Random Samples: | ||
|
||
![Samples](../../imgs/stylegan2-sample.png) | ||
|
||
Random Style Mixing: | ||
|
||
![Random Style Mixing](../../imgs/stylegan2-sample-mixing-0.png) | ||
|
||
|
||
## Reference | ||
|
||
``` | ||
@inproceedings{Karras2019stylegan2, | ||
title = {Analyzing and Improving the Image Quality of {StyleGAN}}, | ||
author = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten and Jaakko Lehtinen and Timo Aila}, | ||
booktitle = {Proc. CVPR}, | ||
year = {2020} | ||
} | ||
``` |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.