Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
uniform logger manager in FleetAPI.
hidde API under distributed/utils which users don't need.
  • Loading branch information
sljlp authored Sep 22, 2022
1 parent 372505b commit 7eb046c
Show file tree
Hide file tree
Showing 38 changed files with 491 additions and 395 deletions.
1 change: 0 additions & 1 deletion python/paddle/distributed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401

from . import cloud_utils # noqa: F401
from . import utils # noqa: F401

from .sharding import * # noqa: F401

Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import warnings
import logging
import numpy as np
from .utils import get_logger
from ..utils.log_utils import get_logger


class Converter(object):
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/dist_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .utils import get_dist_attr
from .converter import Converter
from .process_group import _g_process_group_map
from .utils import get_logger
from ..utils.log_utils import get_logger


def check_filename(re_exp, filename):
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/parallelizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import time
import paddle
from paddle.fluid.backward import append_backward
from paddle.distributed.utils import get_logger
from paddle.distributed.utils.log_utils import get_logger
from paddle.distributed.fleet import cloud_utils
import paddle.fluid.core as core
from paddle.fluid import program_guard
Expand Down
6 changes: 2 additions & 4 deletions python/paddle/distributed/cloud_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@

import os
import paddle
from paddle.distributed.utils import get_cluster
from paddle.distributed.utils import logger
from paddle.distributed.utils import get_gpus
from paddle.distributed.utils import get_cluster_from_args
from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args
from paddle.distributed.utils.launch_utils import logger

__all__ = []

Expand Down
5 changes: 4 additions & 1 deletion python/paddle/distributed/fleet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .model import distributed_model
from .optimizer import distributed_optimizer
from .scaler import distributed_scaler
from .utils import log_util

__all__ = [ #noqa
"CommunicateTopology", "UtilBase", "HybridCommunicateGroup",
Expand Down Expand Up @@ -90,5 +91,7 @@
shrink = fleet.shrink
get_hybrid_communicate_group = fleet.get_hybrid_communicate_group
distributed_scaler = distributed_scaler

set_log_level = log_util.set_log_level
get_log_level_code = log_util.get_log_level_code
get_log_level_name = log_util.get_log_level_name
from .. import auto_parallel as auto
12 changes: 4 additions & 8 deletions python/paddle/distributed/fleet/elastic/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,9 @@
from paddle.distributed.fleet import cloud_utils
from paddle.distributed.fleet import launch_utils

logger = logging.getLogger("ELASTIC")
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
fmt='%(name)s %(levelname)s %(asctime)s %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
from paddle.distributed.utils.log_utils import get_logger

logger = get_logger("INFO", "ELASTIC")

ELASTIC_EXIT_CODE = 101
ELASTIC_AUTO_PARALLEL_EXIT_CODE = 102
Expand Down Expand Up @@ -354,7 +350,7 @@ def pre_hook(self):
stderr=subprocess.PIPE,
shell=True).communicate()
if err:
logger.warn("pre_hook exec failed")
logger.warning("pre_hook exec failed")
else:
logger.info(f"pre_hook exec result: {out.decode('utf-8').strip()}")

Expand Down
64 changes: 40 additions & 24 deletions python/paddle/distributed/fleet/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import copy
import warnings
import paddle
import os
from types import MethodType
Expand All @@ -32,6 +31,8 @@
from .meta_parallel import model_parallel_random_seed
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid import core
from .utils.log_util import logger, set_log_level
import logging

__all__ = []

Expand All @@ -54,7 +55,7 @@ def apply_ir_passes(main_program, startup_program, config):
# RawProgramOptimizer also inserts coalesce_tensor
# into program. These two procedures may conflict
# in which vars are to be fused.
warnings.warn(
logger.warning(
'Currently, the fuse_all_optimizer_ops pass has conflict with fuse_all_reduce_ops pass. Disable the fuse_all_optimizer_ops pass temporarily.'
)
build_strategy.fuse_all_optimizer_ops = False
Expand Down Expand Up @@ -83,7 +84,7 @@ def __impl__(*args, **kwargs):

if cls._role_maker is not None and cls._role_maker._is_non_distributed(
) is True:
warnings.warn(
logger.warning(
"%s() function doesn't work when use non_distributed fleet." %
(func.__name__))
return
Expand Down Expand Up @@ -165,7 +166,11 @@ def __init__(self):
self._context = {}
self.user_defined_optimizer = paddle.optimizer.Optimizer(0.0)

def init(self, role_maker=None, is_collective=False, strategy=None):
def init(self,
role_maker=None,
is_collective=False,
strategy=None,
log_level="INFO"):
"""
Initialize role_maker in Fleet.
Expand All @@ -182,6 +187,8 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
GPU.The default value is False.The default value is False.
strategy (DistributedStrategy): Extra properties for distributed training.
For details, please refer to paddle.distributed.fleet.DistributedStrategy. Default: None.
log_level (Integer, String, optional): A ``Integer`` or ``String`` Variable determining how hight
the logging level is. Default is "INFO".
Returns:
Expand Down Expand Up @@ -217,7 +224,18 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
strategy = fleet.DistributedStrategy()
fleet.init(strategy=strategy)
Examples5:
.. code-block:: python
import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
fleet.init(log_level = "DEBUG")
"""

set_log_level(log_level)

if strategy is None:
strategy = DistributedStrategy()
self._user_defined_strategy = copy.deepcopy(strategy)
Expand Down Expand Up @@ -261,12 +279,12 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
self._hcg = tp.HybridCommunicateGroup(self._topology)
return
if parallel_helper._is_parallel_ctx_initialized():
warnings.warn(
logger.warning(
"The dygraph parallel environment has been initialized.")
else:
# FLAGS_nccl_nrings is used for dynamic graph multi-stream communication
if "FLAGS_nccl_nrings" in os.environ:
warnings.warn(
logger.warning(
"You have set the environment variable FLAGS_nccl_nrings "
"outside the program, so the nccl_comm_num in "
"DistributedStrategy will not take effect here.")
Expand All @@ -281,7 +299,7 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
if tp._HYBRID_PARALLEL_GROUP is None:
self._init_hybrid_parallel_env()
else:
warnings.warn(
logger.warning(
"The dygraph hybrid parallel environment has been initialized."
)
elif self._is_collective:
Expand Down Expand Up @@ -850,9 +868,6 @@ def save_inference_model(self,
fleet.init_server()
"""
# warnings.warn(
# "'save_inference_model' is a deprecated, will be deleted after v2.2.0, Please use fleet.save instead."
# )

self._runtime_handle._save_inference_model(executor, dirname,
feeded_var_names,
Expand Down Expand Up @@ -902,10 +917,6 @@ def save_persistables(self, executor, dirname, main_program=None, mode=0):
fleet.save_persistables(exe, "dirname", paddle.static.default_main_program())
"""
# warnings.warn(
# "'save_persistables' is a deprecated, will be deleted after v2.2.0, Please use fleet.save instead."
# )

self._runtime_handle._save_persistables(executor, dirname, main_program,
mode)

Expand Down Expand Up @@ -1015,7 +1026,7 @@ def distributed_optimizer(self, optimizer, strategy=None):

if strategy is not None:
if self._is_collective:
warnings.warn(
logger.warning(
"It is recommended to use DistributedStrategy "
"in fleet.init(). The strategy here is only for compatibility. "
"If the strategy in fleet.distributed_optimizer() is "
Expand Down Expand Up @@ -1304,8 +1315,9 @@ def _minimize_impl(self,
copy_user_defined_strategy, can_not_apply_optimizer_list)

context["valid_strategy"] = copy.deepcopy(valid_strategy)
# print("valid_strategy:", context["valid_strategy"])
# print("user_defined_strategy:", context["user_defined_strategy"])
logger.debug("valid_strategy: " + str(context["valid_strategy"]))
logger.debug("user_defined_strategy: " +
str(context["user_defined_strategy"]))

applied_meta_list = self.strategy_compiler._get_applied_meta_list()
applied_graph_list = self.strategy_compiler._get_applied_graph_list()
Expand Down Expand Up @@ -1335,17 +1347,19 @@ def _minimize_impl(self,
no_grad_set=no_grad_set)

if meta_optimizer:
# print("before minimize program id:", id(loss.block.program))
logger.debug("before minimize program id: " +
str(id(loss.block.program)))
optimize_ops, params_grads = meta_optimizer.minimize(
loss, startup_program, parameter_list, no_grad_set=no_grad_set)
# print("after minimize program id:", id(loss.block.program))

logger.debug("after minimize program id: " +
str(id(loss.block.program)))
default_program = paddle.static.default_main_program()
# print("default program id:", id(default_program))
logger.debug("default program id: " + str(id(default_program)))

if id(default_program) != id(loss.block.program):
paddle.fluid.framework.switch_main_program(loss.block.program)
# print("default program id after switch:", id(default_program))
logger.debug("default program id after switch: " +
str(id(default_program)))

else:
optimize_ops, params_grads = self.user_defined_optimizer.minimize(
Expand All @@ -1355,7 +1369,8 @@ def _minimize_impl(self,
context["program_params_grads"] = params_grads

if graph_optimizer:
# print("before graph minimize program id:", id(loss.block.program))
logger.debug("before graph minimize program id: " +
str(id(loss.block.program)))
optimize_ops, params_grads = graph_optimizer.minimize(
loss, startup_program, parameter_list, no_grad_set=no_grad_set)
# since we do not encourage users to use graph operations
Expand Down Expand Up @@ -1454,7 +1469,8 @@ def _minimize_losses_impl(self,
if v or k not in opt_info:
opt_info[k] = v
program._fleet_opt = opt_info
# print("fleet base opt info:", id(program), program._fleet_opt)
logger.debug("fleet base opt info: " + str(id(program)) +
str(program._fleet_opt))

if self._runtime_handle is None:
self._runtime_handle = RuntimeFactory()._create_runtime(context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,8 @@
from .sharding import utils
# FIXME: import *
from .sharding.utils import *

import logging

logger = logging.getLogger(__name__)
formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
from ..utils.log_util import logger

__all__ = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import paddle
from paddle import nn
from paddle.distributed import collective
from paddle.distributed.utils import get_logger
from paddle.distributed.utils.log_utils import get_logger

from .group_sharded_storage import GradStorage
from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/distributed/fleet/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import copy
import warnings
import paddle
import os
import numpy as np
Expand All @@ -22,6 +21,7 @@
from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer
from paddle.fluid import core
from paddle.distributed import fleet
from .utils.log_util import logger


def _dygraph_distributed_optimizer(optimizer, strategy=None):
Expand Down Expand Up @@ -52,7 +52,7 @@ def _dygraph_distributed_optimizer(optimizer, strategy=None):

if strategy is not None:
if fleet_env._is_collective:
warnings.warn(
logger.warning(
"It is recommended to use DistributedStrategy "
"in fleet_env.init(). The strategy here is only for compatibility. "
"If the strategy in fleet_env.distributed_optimizer() is "
Expand Down
10 changes: 2 additions & 8 deletions python/paddle/distributed/fleet/recompute/recompute.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@
from paddle.fluid.framework import in_dygraph_mode

import logging

logger = logging.getLogger(__name__)
formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
from ..utils.log_util import logger

__all__ = []

Expand All @@ -49,7 +43,7 @@ def detach_variable(inputs):
def check_recompute_necessary(inputs):
if not any(input_.stop_gradient == False for input_ in inputs
if isinstance(input_, (core.eager.Tensor, paddle.Tensor))):
logger.warn(
logger.warning(
"[Recompute]: None of the inputs to current recompute block need grad, "
"therefore there is NO need to recompute this block in backward !")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import os
import six
import numpy as np
import warnings

from paddle import framework
import paddle
Expand Down
Loading

0 comments on commit 7eb046c

Please sign in to comment.