-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmodel_checkpoint.yaml
27 lines (24 loc) · 1.75 KB
/
model_checkpoint.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.callbacks.ModelCheckpoint.html
# Save the model periodically by monitoring a quantity.
# Look at the above link for more detailed information.
# To avoiding copying of loss and metric names, during hydra initialization
# there is custom resolver which replaces __loss__ to loss.__class__.__name__
# and __metric__ to main_metric.__class__.__name__,
# for example: ${replace:"__metric__/valid"}
# Use quotes for defining internal value in ${replace:"..."} to avoid
# grammar problems with hydra config parser.
model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
dirpath: ${paths.output_dir}/checkpoints # directory to save the model file
filename: ${replace:"epoch{epoch:03d}-loss_valid{__loss__/valid:.4f}-metric_valid{__metric__/valid:.4f}"} # checkpoint filename
monitor: ${replace:"__metric__/valid"} # name of the logged metric which determines when model is improving
verbose: False # verbosity mode
save_last: True # additionally always save an exact copy of the last checkpoint to a file last.ckpt
save_top_k: 5 # save k best models (determined by above metric)
mode: "max" # "max" means higher metric value is better, can be also "min"
auto_insert_metric_name: False # when True, the checkpoints filenames will contain the metric name
save_weights_only: False # if True, then only the model’s weights will be saved
every_n_train_steps: null # number of training steps between checkpoints
train_time_interval: null # checkpoints are monitored at the specified time interval
every_n_epochs: null # number of epochs between checkpoints
save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation