-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathtrain.py
161 lines (146 loc) · 8.98 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# 本文件用于训练DeepCFD模型
import pickle
import json
from paddle.distributed import fleet
from utils.train_functions import *
from utils.functions import *
from model.UNetEx import UNetEx
import configparser
if __name__ == "__main__":
fleet.init(is_collective=True)
config = configparser.ConfigParser()
config.read("./config/config.ini")
# 加载数据集并处理
x = pickle.load(open(os.path.join(config["path"]["data_path"], "dataX.pkl"), "rb"))
y = pickle.load(open(os.path.join(config["path"]["data_path"], "dataY.pkl"), "rb"))
x = paddle.to_tensor(x, dtype="float32")
y = paddle.to_tensor(y, dtype="float32")
y_trans = paddle.transpose(y, perm=[0, 2, 3, 1])
channels_weights = paddle.reshape(
paddle.sqrt(paddle.mean(paddle.transpose(y, perm=[0, 2, 3, 1]).reshape((981 * 172 * 79, 3)) ** 2, axis=0)),
shape=[1, -1, 1, 1])
# 创建保存文件夹
simulation_directory = config["path"]["save_path"]
if not os.path.exists(simulation_directory):
os.makedirs(simulation_directory)
# 按7:3的比例分割数据集,7为训练集,3为测试集
train_data, test_data = split_tensors(x, y, ratio=float(config["hyperparameter"]["train_test_ratio"]))
train_dataset, test_dataset = paddle.io.TensorDataset([train_data[0], train_data[1]]), \
paddle.io.TensorDataset([test_data[0], test_data[1]])
test_x, test_y = test_dataset[:]
# 设定种子,便于复现
paddle.seed(999)
# 设置训练epochs和batch_size
epochs = int(config["hyperparameter"]["epochs"])
batch_size = int(config["hyperparameter"]["batch_size"])
# 设置学习率
lr = float(config["hyperparameter"]["learning_rate"])
# 设置卷积核大小
kernel_size = int(config["net_parameter"]["kernel_size"])
# 设置卷积层channel数目
filters = [int(i) for i in config["net_parameter"]["filters"].split(",")]
# 设置batch_norm和weight_norm
bn = bool(int(config["net_parameter"]["batch_norm"]))
wn = bool(int(config["net_parameter"]["weight_norm"]))
# 构建模型
model = UNetEx(3, 3, filters=filters, kernel_size=kernel_size, batch_norm=bn, weight_norm=wn)
model = fleet.distributed_model(model)
# 定义优化器
optimizer = paddle.optimizer.AdamW(learning_rate=lr, parameters=model.parameters(),
weight_decay=float(config["hyperparameter"]["weight_decay"]))
optimizer = fleet.distributed_optimizer(optimizer)
# 设置记录列表
config = {}
train_loss_curve = []
test_loss_curve = []
train_mse_curve = []
test_mse_curve = []
train_ux_curve = []
test_ux_curve = []
train_uy_curve = []
test_uy_curve = []
train_p_curve = []
test_p_curve = []
# 用于后续训练过程的记录
def after_epoch(scope):
train_loss_curve.append(scope["train_loss"])
test_loss_curve.append(scope["val_loss"])
train_mse_curve.append(scope["train_metrics"]["mse"])
test_mse_curve.append(scope["val_metrics"]["mse"])
train_ux_curve.append(scope["train_metrics"]["ux"])
test_ux_curve.append(scope["val_metrics"]["ux"])
train_uy_curve.append(scope["train_metrics"]["uy"])
test_uy_curve.append(scope["val_metrics"]["uy"])
train_p_curve.append(scope["train_metrics"]["p"])
test_p_curve.append(scope["val_metrics"]["p"])
# 损失函数
def loss_func(model, batch):
x, y = batch
output = model(x)
lossu = ((output[:, 0, :, :] - y[:, 0, :, :]) ** 2).reshape(
(output.shape[0], 1, output.shape[2], output.shape[3]))
lossv = ((output[:, 1, :, :] - y[:, 1, :, :]) ** 2).reshape(
(output.shape[0], 1, output.shape[2], output.shape[3]))
lossp = paddle.abs((output[:, 2, :, :] - y[:, 2, :, :])).reshape(
(output.shape[0], 1, output.shape[2], output.shape[3]))
loss = (lossu + lossv + lossp) / channels_weights
return paddle.sum(loss), output
# 训练模型,加入除loss以外的4个指标:Total MSE、Ux MSE、Uy MSE、p MSE
DeepCFD, train_metrics, train_loss, test_metrics, test_loss = train_model(simulation_directory, model, loss_func,
train_dataset, test_dataset, optimizer,
epochs=epochs, batch_size=batch_size,
m_mse_name="Total MSE",
m_mse_on_batch=lambda scope: float(
paddle.sum((scope["output"] -
scope["batch"][1]) ** 2)),
m_mse_on_epoch=lambda scope: sum(
scope["list"]) / len(
scope["dataset"]),
m_ux_name="Ux MSE",
m_ux_on_batch=lambda scope: float(
paddle.sum((scope["output"][:, 0, :,
:] - scope["batch"][1][:,
0, :, :]) ** 2)),
m_ux_on_epoch=lambda scope: sum(
scope["list"]) / len(
scope["dataset"]),
m_uy_name="Uy MSE",
m_uy_on_batch=lambda scope: float(
paddle.sum((scope["output"][:, 1, :,
:] - scope["batch"][1][:,
1, :, :]) ** 2)),
m_uy_on_epoch=lambda scope: sum(
scope["list"]) / len(
scope["dataset"]),
m_p_name="p MSE",
m_p_on_batch=lambda scope: float(
paddle.sum((scope["output"][:, 2, :,
:] - scope["batch"][1][:,
2, :, :]) ** 2)),
m_p_on_epoch=lambda scope: sum(
scope["list"]) / len(
scope["dataset"]), patience=25,
after_epoch=after_epoch
)
# 用于记录训练过程中的各项指标并保存
metrics = {}
metrics["train_metrics"] = train_metrics
metrics["train_loss"] = train_loss
metrics["test_metrics"] = test_metrics
metrics["test_loss"] = test_loss
curves = {}
curves["train_loss_curve"] = train_loss_curve
curves["test_loss_curve"] = test_loss_curve
curves["train_mse_curve"] = train_mse_curve
curves["test_mse_curve"] = test_mse_curve
curves["train_ux_curve"] = train_ux_curve
curves["test_ux_curve"] = test_ux_curve
curves["train_uy_curve"] = train_uy_curve
curves["test_uy_curve"] = test_uy_curve
curves["train_p_curve"] = train_p_curve
curves["test_p_curve"] = test_p_curve
config["metrics"] = metrics
config["curves"] = curves
# 保存各项训练指标
with open(simulation_directory + "results.json", "w") as file:
json.dump(config, file)