Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion TUTORIAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,26 @@ We provide several training scripts as usage examples. They are located under `s

4. Intermediate image outputs and model binary files are stored in `outputs/unit_gta2city_folder`

### Resume from last stopped iteration:
```
python train.py --trainer UNIT --config configs/unit_gta2city_folder.yaml --resume
```
This will resume training from last stopped iteration.
If you get EOF erroe etc it means the last stored iteration may be corrupt so delete it and resume. Example if 50k iteration is corrupt, delete it. Resume option continues from 49k then.
Note: --resume option is stored true by default ie mentioning it alone is enough to make it true.

### Testing

First, download our pretrained models for the gta2cityscape task and put them in `models` folder.

### Out of memory?
In case you run out of memory on system to store checkpoints.
1. Change the frequency of checkpoint storage in the config file.
2. Delete all the discriminator and generator model all but last two iterations.
3. 'df -h ' helps you keep track of current available system memory.
4. In case, you accidentally delete wrong checkpoints : Go to the trash folder in your system and mv the file from there to required checkpoint folder.
In case of linux systems : cd ~/.local/share/Trash (Source: https://askubuntu.com/questions/327943/how-to-open-trash-through-terminal)

#### Pretrained models

| Dataset | Model Link |
Expand Down Expand Up @@ -101,4 +116,4 @@ Run the following command to translate Cityscape images to GTA5 images
|-------------|--------------|
| <img src="https://raw.githubusercontent.com/NVIDIA/UNIT/master/results/city2gta/input.jpg" width="384" title="Input"> | <img src="https://raw.githubusercontent.com/NVIDIA/UNIT/master/results/city2gta/output.jpg" width="384" title="Output"> |



54 changes: 54 additions & 0 deletions configs/unit_rain_folder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).

# logger options
image_save_iter: 10000 # How often do you want to save output images during training
image_display_iter: 100 # How often do you want to display output images during training
display_size: 4 # How many images do you want to display each time
snapshot_save_iter: 10000 # How often do you want to save trained models
log_iter: 1 # How often do you want to log the training stats

# optimization options
max_iter: 1000000 # maximum number of training iterations
batch_size: 1 # batch size
weight_decay: 0.0001 # weight decay
beta1: 0.5 # Adam parameter
beta2: 0.999 # Adam parameter
init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal]
lr: 0.0001 # initial learning rate
lr_policy: step # learning rate scheduler
step_size: 100000 # how often to decay learning rate
gamma: 0.5 # how much to decay learning rate
gan_w: 1 # weight of adversarial loss
recon_x_w: 10 # weight of image reconstruction loss
recon_h_w: 0 # weight of hidden reconstruction loss
recon_kl_w: 0.01 # weight of KL loss for reconstruction
recon_x_cyc_w: 10 # weight of cycle consistency loss
recon_kl_cyc_w: 0.01 # weight of KL loss for cycle consistency
vgg_w: 1 # weight of domain-invariant perceptual loss

# model options
gen:
dim: 64 # number of filters in the bottommost layer
activ: relu # activation function [relu/lrelu/prelu/selu/tanh]
n_downsample: 2 # number of downsampling layers in content encoder
n_res: 4 # number of residual blocks in content encoder/decoder
pad_type: reflect # padding type [zero/reflect]
dis:
dim: 64 # number of filters in the bottommost layer
norm: none # normalization layer [none/bn/in/ln]
activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh]
n_layer: 4 # number of layers in D
gan_type: lsgan # GAN loss [lsgan/nsgan]
num_scales: 3 # number of scales
pad_type: reflect # padding type [zero/reflect]

# data options
input_dim_a: 3 # number of image channels [1/3]
input_dim_b: 3 # number of image channels [1/3]
num_workers: 8 # number of data loading threads
new_size: 256 # first resize the shortest image side to this size
crop_image_height: 64 # random crop image of this height
crop_image_width: 64 # random crop image of this width

data_root: ./datasets/cr/ # dataset folder location
54 changes: 54 additions & 0 deletions configs/unit_rain_folder_2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).

# logger options
image_save_iter: 10000 # How often do you want to save output images during training
image_display_iter: 100 # How often do you want to display output images during training
display_size: 4 # How many images do you want to display each time
snapshot_save_iter: 10000 # How often do you want to save trained models
log_iter: 1 # How often do you want to log the training stats

# optimization options
max_iter: 1000000 # maximum number of training iterations
batch_size: 1 # batch size
weight_decay: 0.0001 # weight decay
beta1: 0.5 # Adam parameter
beta2: 0.999 # Adam parameter
init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal]
lr: 0.0001 # initial learning rate
lr_policy: step # learning rate scheduler
step_size: 100000 # how often to decay learning rate
gamma: 0.5 # how much to decay learning rate
gan_w: 1 # weight of adversarial loss
recon_x_w: 10 # weight of image reconstruction loss
recon_h_w: 0 # weight of hidden reconstruction loss
recon_kl_w: 0.01 # weight of KL loss for reconstruction
recon_x_cyc_w: 10 # weight of cycle consistency loss
recon_kl_cyc_w: 0.01 # weight of KL loss for cycle consistency
vgg_w: 1 # weight of domain-invariant perceptual loss

# model options
gen:
dim: 64 # number of filters in the bottommost layer
activ: relu # activation function [relu/lrelu/prelu/selu/tanh]
n_downsample: 2 # number of downsampling layers in content encoder
n_res: 4 # number of residual blocks in content encoder/decoder
pad_type: reflect # padding type [zero/reflect]
dis:
dim: 64 # number of filters in the bottommost layer
norm: none # normalization layer [none/bn/in/ln]
activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh]
n_layer: 4 # number of layers in D
gan_type: lsgan # GAN loss [lsgan/nsgan]
num_scales: 3 # number of scales
pad_type: reflect # padding type [zero/reflect]

# data options
input_dim_a: 3 # number of image channels [1/3]
input_dim_b: 3 # number of image channels [1/3]
num_workers: 8 # number of data loading threads
new_size: 256 # first resize the shortest image side to this size
crop_image_height: 128 # random crop image of this height
crop_image_width: 128 # random crop image of this width

data_root: /home/harshitha/UNIT/datasets/cr/ # dataset folder location
54 changes: 54 additions & 0 deletions configs/unit_snow_folder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).

# logger options
image_save_iter: 10000 # How often do you want to save output images during training
image_display_iter: 100 # How often do you want to display output images during training
display_size: 4 # How many images do you want to display each time
snapshot_save_iter: 10000 # How often do you want to save trained models
log_iter: 1 # How often do you want to log the training stats

# optimization options
max_iter: 1000000 # maximum number of training iterations
batch_size: 1 # batch size
weight_decay: 0.0001 # weight decay
beta1: 0.5 # Adam parameter
beta2: 0.999 # Adam parameter
init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal]
lr: 0.0001 # initial learning rate
lr_policy: step # learning rate scheduler
step_size: 100000 # how often to decay learning rate
gamma: 0.5 # how much to decay learning rate
gan_w: 1 # weight of adversarial loss
recon_x_w: 10 # weight of image reconstruction loss
recon_h_w: 0 # weight of hidden reconstruction loss
recon_kl_w: 0.01 # weight of KL loss for reconstruction
recon_x_cyc_w: 10 # weight of cycle consistency loss
recon_kl_cyc_w: 0.01 # weight of KL loss for cycle consistency
vgg_w: 1 # weight of domain-invariant perceptual loss

# model options
gen:
dim: 64 # number of filters in the bottommost layer
activ: relu # activation function [relu/lrelu/prelu/selu/tanh]
n_downsample: 2 # number of downsampling layers in content encoder
n_res: 4 # number of residual blocks in content encoder/decoder
pad_type: reflect # padding type [zero/reflect]
dis:
dim: 64 # number of filters in the bottommost layer
norm: none # normalization layer [none/bn/in/ln]
activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh]
n_layer: 4 # number of layers in D
gan_type: lsgan # GAN loss [lsgan/nsgan]
num_scales: 3 # number of scales
pad_type: reflect # padding type [zero/reflect]

# data options
input_dim_a: 3 # number of image channels [1/3]
input_dim_b: 3 # number of image channels [1/3]
num_workers: 8 # number of data loading threads
new_size: 256 # first resize the shortest image side to this size
crop_image_height: 256 # random crop image of this height
crop_image_width: 256 # random crop image of this width

data_root: ./datasets/cs/ # dataset folder location