Skip to content

Commit

Permalink
remove with_data_parallel (PaddlePaddle#1658)
Browse files Browse the repository at this point in the history
* remove with_data_parallel

* ACT adapts fleet

* ACT'demo adapts fleet

* fix bugs
  • Loading branch information
zzjjay authored Feb 20, 2023
1 parent 65c776d commit e33dc48
Show file tree
Hide file tree
Showing 30 changed files with 276 additions and 303 deletions.
14 changes: 5 additions & 9 deletions demo/nas/block_sa_nas_mobilenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
import imagenet_reader

_logger = get_logger(__name__, level=logging.INFO)
Expand Down Expand Up @@ -157,15 +156,13 @@ def search_mobilenetv2_block(config, args, image_size):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
train_program, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
outs = exe.run(
train_compiled_program, feed=data, fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
Expand All @@ -175,9 +172,8 @@ def search_mobilenetv2_block(config, args, image_size):
reward = []
for batch_id, data in enumerate(val_loader()):
test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
batch_reward = exe.run(
test_program, feed=data, fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)

Expand Down
13 changes: 5 additions & 8 deletions demo/nas/rl_nas_mobilenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,13 @@ def search_mobilenetv2(config, args, image_size, is_server=True):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
train_program, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
outs = exe.run(
train_compiled_program, feed=data, fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
Expand All @@ -161,9 +159,8 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
batch_reward = exe.run(
test_program, feed=data, fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)

Expand Down
26 changes: 10 additions & 16 deletions demo/nas/sa_nas_mobilenetv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,13 @@ def search_mobilenetv2(config, args, image_size, is_server=True):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
train_program, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
outs = exe.run(
train_compiled_program, feed=data, fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
Expand All @@ -154,9 +152,8 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
batch_reward = exe.run(
test_program, feed=data, fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)

Expand Down Expand Up @@ -223,15 +220,13 @@ def test_search_result(tokens, image_size, args, config):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
train_program, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
outs = exe.run(
train_compiled_program, feed=data, fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
Expand All @@ -243,9 +238,8 @@ def test_search_result(tokens, image_size, args, config):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
batch_reward = exe.run(
test_program, feed=data, fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)

Expand Down
27 changes: 12 additions & 15 deletions demo/nas/sanas_darts_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ def train(main_prog, exe, epoch_id, train_loader, fetch_list, args):
[[drop_path_probility * epoch_id / args.retain_epoch]
for i in range(args.batch_size)]).astype(np.float32)
drop_path_mask = 1 - np.random.binomial(
1, drop_path_prob[0],
size=[args.batch_size, 20, 4, 2]).astype(np.float32)
1, drop_path_prob[0], size=[args.batch_size, 20, 4, 2
]).astype(np.float32)
feed.append({
"image": image,
"label": label,
Expand Down Expand Up @@ -195,8 +195,8 @@ def search(config, args, image_size, is_server=True):

current_params = count_parameters_in_MB(
train_program.global_block().all_parameters(), 'cifar10')
_logger.info('step: {}, current_params: {}M'.format(step,
current_params))
_logger.info(
'step: {}, current_params: {}M'.format(step, current_params))
if current_params > float(3.77):
continue

Expand All @@ -222,9 +222,7 @@ def search(config, args, image_size, is_server=True):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=train_fetch_list[0].name,
build_strategy=build_strategy)
train_program, build_strategy=build_strategy)

valid_top1_list = []
for epoch_id in range(args.retain_epoch):
Expand All @@ -234,8 +232,8 @@ def search(config, args, image_size, is_server=True):
step, epoch_id, train_top1))
valid_top1 = valid(test_program, exe, epoch_id, test_loader,
test_fetch_list, args)
_logger.info("TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id,
valid_top1))
_logger.info(
"TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id, valid_top1))
valid_top1_list.append(valid_top1)
sa_nas.reward(float(valid_top1_list[-1] + valid_top1_list[-2]) / 2)

Expand Down Expand Up @@ -276,19 +274,18 @@ def final_test(config, args, image_size, token=None):

build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=train_fetch_list[0].name, build_strategy=build_strategy)
train_program, build_strategy=build_strategy)

valid_top1_list = []
for epoch_id in range(args.retain_epoch):
train_top1 = train(train_compiled_program, exe, epoch_id, train_loader,
train_fetch_list, args)
_logger.info("TRAIN: Epoch {}, train_acc {:.6f}".format(epoch_id,
train_top1))
_logger.info(
"TRAIN: Epoch {}, train_acc {:.6f}".format(epoch_id, train_top1))
valid_top1 = valid(test_program, exe, epoch_id, test_loader,
test_fetch_list, args)
_logger.info("TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id,
valid_top1))
_logger.info(
"TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id, valid_top1))
valid_top1_list.append(valid_top1)

output_dir = os.path.join('darts_output', str(epoch_id))
Expand Down
16 changes: 13 additions & 3 deletions demo/prune/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ tar -xf MobileNetV1_pretrained.tar

通过以下命令启动裁剪任务:

- 单卡启动:
```
export CUDA_VISIBLE_DEVICES=0
python train.py \
Expand All @@ -43,9 +44,18 @@ python train.py \
--criterion "l1_norm"
```

其中,`model`用于指定待裁剪的模型。`pruned_ratio`用于指定各个卷积层通道数被裁剪的比例。`data`选项用于指定使用的数据集。
`criterion` 选项用于指定所使用的剪裁算法策略,现在支持`l1_norm`, `bn_scale`, `geometry_median`。默认为`l1_norm`。可以
设置该参数以改变剪裁算法策略。该目录下的四个shell脚本文件是在ResNet34, MobileNetV1, MobileNetV2等三个模型上进行的四组
- 多卡启动:
```
export CUDA_VISIBLE_DEVICES=0, 1
python -m paddle.distributed.launch train.py \
--model "MobileNet" \
--pruned_ratio 0.31 \
--data "mnist" \
--criterion "l1_norm" \
--fleet
```

其中,`model`用于指定待裁剪的模型。`pruned_ratio`用于指定各个卷积层通道数被裁剪的比例。`data`选项用于指定使用的数据集。`criterion` 选项用于指定所使用的剪裁算法策略,现在支持`l1_norm`, `bn_scale`, `geometry_median`,默认为`l1_norm``fleet` 用于开启多卡训练,在多卡启动时需要调用该参数。该目录下的四个shell脚本文件是在ResNet34, MobileNetV1, MobileNetV2等三个模型上进行的四组
`criterion`设置为`geometry_median`的实验,可以直接运行脚本文件启动剪裁实验。

执行`python train.py --help`查看更多选项。
Expand Down
3 changes: 2 additions & 1 deletion demo/prune/fpgm_mobilenetv1_f-50_train.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
python -m paddle.distributed.launch train.py \
--model="MobileNet" \
--pretrained_model="/workspace/models/MobileNetV1_pretrained" \
--data="imagenet" \
Expand All @@ -14,4 +14,5 @@ python train.py \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./fpgm_mobilenetv1_models" \
--fleet \
2>&1 | tee fpgm_mobilenetv1_train.log
3 changes: 2 additions & 1 deletion demo/prune/fpgm_mobilenetv2_f-50_train.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
python -m paddle.distributed.launch train.py \
--model="MobileNetV2" \
--pretrained_model="/workspace/models/MobileNetV2_pretrained" \
--data="imagenet" \
Expand All @@ -14,4 +14,5 @@ python train.py \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./fpgm_mobilenetv2_models" \
--fleet \
2>&1 | tee fpgm_mobilenetv2_train.log
3 changes: 2 additions & 1 deletion demo/prune/fpgm_resnet34_f-42_train.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
python -m paddle.distributed.launch train.py \
--model="ResNet34" \
--pretrained_model="/workspace/models/ResNet34_pretrained" \
--data="imagenet" \
--pruned_ratio=0.25 \
--lr_strategy="cosine_decay" \
--criterion="geometry_median" \
--model_path="./fpgm_resnet34_025_120_models" \
--fleet \
2>&1 | tee fpgm_resnet025_120_train.log
3 changes: 2 additions & 1 deletion demo/prune/fpgm_resnet34_f-50_train.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_fraction_of_gpu_memory_to_use=0.98
python train.py \
python -m paddle.distributed.launch train.py \
--model="ResNet34" \
--pretrained_model="/workspace/models/ResNet34_pretrained" \
--data="imagenet" \
Expand All @@ -14,4 +14,5 @@ python train.py \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./fpgm_resnet34_models" \
--fleet \
2>&1 | tee fpgm_resnet03_train.log
38 changes: 22 additions & 16 deletions demo/prune/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import models
from utility import add_arguments, print_arguments
import paddle.vision.transforms as T
from paddle.distributed import fleet

_logger = get_logger(__name__, level=logging.INFO)

Expand All @@ -40,6 +41,7 @@
add_arg('criterion', str, "l1_norm", "The prune criterion to be used, support l1_norm and batch_norm_scale.")
add_arg('save_inference', bool, False, "Whether to save inference model.")
add_arg('ce_test', bool, False, "Whether to CE test.")
parser.add_argument('fleet', action='store_true', help="Whether to turn on distributed training.")
# yapf: enable

model_list = models.__all__
Expand Down Expand Up @@ -96,6 +98,8 @@ def create_optimizer(args, step_per_epoch):


def compress(args):
if args.fleet:
fleet.init(is_collective=True)

num_workers = 4
shuffle = True
Expand Down Expand Up @@ -130,8 +134,8 @@ def compress(args):
else:
raise ValueError("{} is not supported.".format(args.data))
image_shape = [int(m) for m in image_shape.split(",")]
assert args.model in model_list, "{} is not in lists: {}".format(args.model,
model_list)
assert args.model in model_list, "{} is not in lists: {}".format(
args.model, model_list)
places = paddle.static.cuda_places(
) if args.use_gpu else paddle.static.cpu_places()
place = places[0]
Expand All @@ -140,13 +144,16 @@ def compress(args):
name='image', shape=[None] + image_shape, dtype='float32')
label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
batch_size_per_card = int(args.batch_size / len(places))
sampler = paddle.io.DistributedBatchSampler(
train_dataset,
shuffle=shuffle,
drop_last=True,
batch_size=batch_size_per_card)
train_loader = paddle.io.DataLoader(
train_dataset,
places=places,
feed_list=[image, label],
drop_last=True,
batch_size=batch_size_per_card,
shuffle=shuffle,
batch_sampler=sampler,
return_list=False,
use_shared_memory=True,
num_workers=num_workers)
Expand All @@ -171,6 +178,8 @@ def compress(args):
acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
val_program = paddle.static.default_main_program().clone(for_test=True)
opt, learning_rate = create_optimizer(args, step_per_epoch)
if args.fleet:
opt = fleet.distributed_optimizer(opt)
opt.minimize(avg_cost)

exe.run(paddle.static.default_startup_program())
Expand All @@ -180,8 +189,8 @@ def compress(args):
def if_exist(var):
return os.path.exists(os.path.join(args.pretrained_model, var.name))

_logger.info("Load pretrained model from {}".format(
args.pretrained_model))
_logger.info(
"Load pretrained model from {}".format(args.pretrained_model))
paddle.static.load(paddle.static.default_main_program(),
args.pretrained_model, exe)

Expand Down Expand Up @@ -247,13 +256,10 @@ def train(epoch, program):
place=place)
_logger.info("FLOPs after pruning: {}".format(flops(pruned_program)))

build_strategy = paddle.static.BuildStrategy()
exec_strategy = paddle.static.ExecutionStrategy()
train_program = paddle.static.CompiledProgram(
pruned_program).with_data_parallel(
loss_name=avg_cost.name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
if args.fleet:
train_program = paddle.static.CompiledProgram(pruned_program)
else:
train_program = pruned_program

for i in range(args.num_epochs):
train(i, train_program)
Expand All @@ -268,8 +274,8 @@ def train(epoch, program):
infer_model_path, [image], [out],
exe,
program=pruned_val_program)
_logger.info("Saved inference model into [{}]".format(
infer_model_path))
_logger.info(
"Saved inference model into [{}]".format(infer_model_path))


def main():
Expand Down
Loading

0 comments on commit e33dc48

Please sign in to comment.