-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtrain.py
More file actions
executable file
·89 lines (75 loc) · 3.86 KB
/
train.py
File metadata and controls
executable file
·89 lines (75 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import torch
import argparse
from utils.configs import Config, str2bool
from torch.utils.data import DataLoader
from pytorch_lightning.plugins import DDPPlugin
######################################## Pytorch lightning ########################################################
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer, seed_everything
seed_everything(1112)
from pytorch_lightning.loggers import TensorBoardLogger
from networks.model import LLMVS
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--model', type = str, default = 'summe_head2_layer3', help = 'the name of the model')
parser.add_argument('--dataset', type = str, default = 'summe', help = 'the name of the dataset')
parser.add_argument('--split_idx', type = int, default = 0, help = 'the split index')
parser.add_argument('--epochs', type = int, default = 200, help = 'the number of training epochs')
parser.add_argument('--reduced_dim', type = int, default = 2048)
parser.add_argument('--num_heads', type = int, default = 2)
parser.add_argument('--num_layers', type = int, default = 3)
parser.add_argument('--tag', type = str, default = 'summe_split0')
parser.add_argument('--lr', type = float, default = 1e-4, help = 'the learning rate')
parser.add_argument('--pt_path', type=str, default='llama_emb/summe_sum/')
opt = parser.parse_args()
kwargs = vars(opt)
config = Config(**kwargs)
if config.dataset == 'summe':
from utils.summe_dataset import SumMeLLaMADataset, TrainBatchCollator, ValBatchCollator
train_dataset = SumMeLLaMADataset(mode='train', split_idx=config.split_idx, llama_embedding = config.pt_path)
val_dataset = SumMeLLaMADataset(mode='test', split_idx=config.split_idx, llama_embedding = config.pt_path)
elif config.dataset == 'tvsum':
from utils.tvsum_dataset import TVSumLLaMADataset, TrainBatchCollator,ValBatchCollator
train_dataset = TVSumLLaMADataset(mode='train', split_idx=config.split_idx, llama_embedding = config.pt_path)
val_dataset = TVSumLLaMADataset(mode='test', split_idx=config.split_idx, llama_embedding = config.pt_path)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=8, collate_fn = TrainBatchCollator(), pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=8, collate_fn = ValBatchCollator(), pin_memory=True, persistent_workers=True)
model = LLMVS(config = config)
model.cuda()
best_rho_model = '{}/best_rho_model'.format(config.save_dir_root)
best_tau_model = '{}/best_tau_model'.format(config.save_dir_root)
checkpoint_callback_rho = ModelCheckpoint(
monitor='val_sRho',
dirpath= best_rho_model,
filename='{epoch:02d}-{val_sRho:.3f}',
save_top_k=1,
save_last=True,
mode='max',
)
checkpoint_callback_tau = ModelCheckpoint(
monitor='val_kTau',
dirpath= best_tau_model,
filename='{epoch:02d}-{val_kTau:.3f}',
save_top_k=1,
save_last=True,
mode='max',
)
trainer = Trainer(
gpus=-1,
accelerator='ddp',
max_epochs=opt.epochs,
accumulate_grad_batches=2,
precision=16,
gradient_clip_val=0.01,
callbacks=[checkpoint_callback_rho, checkpoint_callback_tau],
benchmark=True,
deterministic=False,
val_check_interval=0.5,
progress_bar_refresh_rate=100,
profiler="simple",
log_every_n_steps=4,
plugins=DDPPlugin(find_unused_parameters=False),
)
trainer.validate(model,val_loader)
trainer.fit(model, train_loader, val_loader)