Use flake8-comprehensions (ray-project#1976)

* Add flake8 to Travis * Add flake8-comprehensions [flake8 plugin](https://github.com/adamchainz/flake8-comprehensions) that checks for useless constructions. * Use generators instead of lists where appropriate A lot of the builtins can take in generators instead of lists. This commit applies `flake8-comprehensions` to find them. * Fix lint error * Fix some string formatting The rest can be fixed in another PR * Fix compound literals syntax This should probably be merged after ray-project#1963. * dict() -> {} * Use dict literal syntax dict(...) -> {...} * Rewrite nested dicts * Fix hanging indent * Add missing import * Add missing quote * fmt * Add missing whitespace * rm duplicate pip install This is already installed in another file. * Fix indent * move `merge_dicts` into utils * Bring up to date with `master` * Add automatic syntax upgrade * rm pyupgrade In case users want to still use it on their own, the upgrade-syn.sh script was left in the `.travis` dir.
mitar · May 20, 2018 · f795173 · f795173
1 parent 99ae74e
commit f795173
Show file tree

Hide file tree

Showing 37 changed files with 330 additions and 273 deletions.
diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh
@@ -77,7 +77,7 @@ elif [[ "$LINT" == "1" ]]; then
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
   # Install Python linting tools.
-  pip install -q flake8
+  pip install -q flake8 flake8-comprehensions
 elif [[ "$LINUX_WHEELS" == "1" ]]; then
   sudo apt-get install docker
   sudo usermod -a -G docker travis

diff --git a/.travis/upgrade-syn.sh b/.travis/upgrade-syn.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+# Cause the script to exit if a single command fails
+set -eo pipefail
+
+# this stops git rev-parse from failing if we run this from the .git directory
+builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
+
+ROOT="$(git rev-parse --show-toplevel)"
+builtin cd "$ROOT"
+
+find \
+    python test \
+    -name '*.py' -type f \
+    -not -path 'python/ray/cloudpickle/*' \
+    -not -path 'python/ray/dataframe/*' \
+    -exec python -m pyupgrade {} +
+
+if ! git diff --quiet; then
+    echo 'Reformatted staged files. Please review and stage the changes.'
+    echo 'Files updated:'
+    echo
+
+    git --no-pager diff --name-only
+
+    exit 1
+fi
diff --git a/examples/cython/cython_main.py b/examples/cython/cython_main.py
@@ -26,7 +26,7 @@ def run_func(func, *args, **kwargs):
     return result
 
 
-@click.group(context_settings=dict(help_option_names=["-h", "--help"]))
+@click.group(context_settings={'help_option_names': ['-h', '--help']})
 def cli():
     """Working with Cython actors and functions in Ray"""
 

diff --git a/examples/resnet/resnet_main.py b/examples/resnet/resnet_main.py
@@ -220,7 +220,7 @@ def train():
         while True:
             all_weights = ray.get([actor.compute_steps.remote(weight_id)
                                    for actor in train_actors])
-            mean_weights = {k: (sum([weights[k] for weights in all_weights]) /
+            mean_weights = {k: (sum(weights[k] for weights in all_weights) /
                                 num_gpus)
                             for k in all_weights[0]}
             weight_id = ray.put(mean_weights)

diff --git a/examples/streaming/streaming.py b/examples/streaming/streaming.py
@@ -92,7 +92,7 @@ def next(self):
     article_index = 0
     while True:
         print("article index = {}".format(article_index))
-        wordcounts = dict()
+        wordcounts = {}
         counts = ray.get([reducer.next_reduce_result.remote(article_index)
                           for reducer in reducers])
         for count in counts:

diff --git a/python/ray/actor.py b/python/ray/actor.py
@@ -491,8 +491,8 @@ def pred(x):
         # Extract the signatures of each of the methods. This will be used
         # to catch some errors if the methods are called with inappropriate
         # arguments.
-        self._method_signatures = dict()
-        self._actor_method_num_return_vals = dict()
+        self._method_signatures = {}
+        self._actor_method_num_return_vals = {}
         for method_name, method in self._actor_methods:
             # Print a warning message if the method signature is not
             # supported. We don't raise an exception because if the actor

diff --git a/python/ray/autoscaler/aws/config.py b/python/ray/autoscaler/aws/config.py
@@ -145,10 +145,8 @@ def _configure_key_pair(config):
 def _configure_subnet(config):
     ec2 = _resource("ec2", config)
     subnets = sorted(
-        [
-            s for s in ec2.subnets.all()
-            if s.state == "available" and s.map_public_ip_on_launch
-        ],
+        (s for s in ec2.subnets.all()
+         if s.state == "available" and s.map_public_ip_on_launch),
         reverse=True,  # sort from Z-A
         key=lambda subnet: subnet.availability_zone)
     if not subnets:
@@ -293,11 +291,11 @@ def _get_key(key_name, config):
 
 
 def _client(name, config):
-    boto_config = Config(retries=dict(max_attempts=BOTO_MAX_RETRIES))
+    boto_config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
     return boto3.client(name, config["provider"]["region"], config=boto_config)
 
 
 def _resource(name, config):
-    boto_config = Config(retries=dict(max_attempts=BOTO_MAX_RETRIES))
+    boto_config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
     return boto3.resource(
         name, config["provider"]["region"], config=boto_config)
diff --git a/python/ray/autoscaler/aws/node_provider.py b/python/ray/autoscaler/aws/node_provider.py
@@ -13,7 +13,7 @@
 class AWSNodeProvider(NodeProvider):
     def __init__(self, provider_config, cluster_name):
         NodeProvider.__init__(self, provider_config, cluster_name)
-        config = Config(retries=dict(max_attempts=BOTO_MAX_RETRIES))
+        config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
         self.ec2 = boto3.resource(
             "ec2", region_name=provider_config["region"], config=config)
 

diff --git a/python/ray/dataframe/concat.py b/python/ray/dataframe/concat.py
@@ -38,17 +38,17 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
                          "and ray.dataframe.DataFrame objs are "
                          "valid", type(type_check))
 
-    all_series = all([isinstance(obj, pandas.Series)
-                      for obj in objs])
+    all_series = all(isinstance(obj, pandas.Series)
+                     for obj in objs)
     if all_series:
         return pandas.concat(objs, axis, join, join_axes,
                              ignore_index, keys, levels, names,
                              verify_integrity, copy)
 
     if isinstance(objs, dict):
         raise NotImplementedError(
-              "Obj as dicts not implemented. To contribute to "
-              "Pandas on Ray, please visit github.com/ray-project/ray.")
+            "Obj as dicts not implemented. To contribute to "
+            "Pandas on Ray, please visit github.com/ray-project/ray.")
 
     axis = pandas.DataFrame()._get_axis_number(axis)
 

diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py
@@ -668,7 +668,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             mismatch = len(by) != len(self) if axis == 0 \
                 else len(by) != len(self.columns)
 
-            if all([obj in self for obj in by]) and mismatch:
+            if all(obj in self for obj in by) and mismatch:
                 raise NotImplementedError(
                     "Groupby with lists of columns not yet supported.")
             elif mismatch:
@@ -2194,7 +2194,7 @@ def idxmax(self, axis=0, skipna=True):
             A Series with the index for each maximum value for the axis
                 specified.
         """
-        if not all([d != np.dtype('O') for d in self.dtypes]):
+        if not all(d != np.dtype('O') for d in self.dtypes):
             raise TypeError(
                 "reduction operation 'argmax' not allowed for this dtype")
 
@@ -2216,7 +2216,7 @@ def idxmin(self, axis=0, skipna=True):
             A Series with the index for each minimum value for the axis
                 specified.
         """
-        if not all([d != np.dtype('O') for d in self.dtypes]):
+        if not all(d != np.dtype('O') for d in self.dtypes):
             raise TypeError(
                 "reduction operation 'argmax' not allowed for this dtype")
 
@@ -3196,9 +3196,9 @@ def quantile_helper(df, base_object):
             """
             # This if call prevents ValueErrors with object only partitions
             if (numeric_only and
-                    all([dtype == np.dtype('O') or
-                         is_timedelta64_dtype(dtype)
-                         for dtype in df.dtypes])):
+                    all(dtype == np.dtype('O') or
+                        is_timedelta64_dtype(dtype)
+                        for dtype in df.dtypes)):
                 return base_object
             else:
                 return df.quantile(q=q, axis=axis, numeric_only=numeric_only,
@@ -4224,16 +4224,28 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep="", float_format=None,
                tupleize_cols=None, date_format=None, doublequote=True,
                escapechar=None, decimal="."):
 
-        kwargs = dict(
-                path_or_buf=path_or_buf, sep=sep, na_rep=na_rep,
-                float_format=float_format, columns=columns, header=header,
-                index=index, index_label=index_label, mode=mode,
-                encoding=encoding, compression=compression, quoting=quoting,
-                quotechar=quotechar, line_terminator=line_terminator,
-                chunksize=chunksize, tupleize_cols=tupleize_cols,
-                date_format=date_format, doublequote=doublequote,
-                escapechar=escapechar, decimal=decimal
-                )
+        kwargs = {
+            'path_or_buf': path_or_buf,
+            'sep': sep,
+            'na_rep': na_rep,
+            'float_format': float_format,
+            'columns': columns,
+            'header': header,
+            'index': index,
+            'index_label': index_label,
+            'mode': mode,
+            'encoding': encoding,
+            'compression': compression,
+            'quoting': quoting,
+            'quotechar': quotechar,
+            'line_terminator': line_terminator,
+            'chunksize': chunksize,
+            'tupleize_cols': tupleize_cols,
+            'date_format': date_format,
+            'doublequote': doublequote,
+            'escapechar': escapechar,
+            'decimal': decimal
+        }
 
         if compression is not None:
             warnings.warn("Defaulting to Pandas implementation",

diff --git a/python/ray/dataframe/io.py b/python/ray/dataframe/io.py
@@ -208,60 +208,61 @@ def read_csv(filepath_or_buffer,
         kwargs: Keyword arguments in pandas::from_csv
     """
 
-    kwargs = dict(
-        sep=sep,
-        delimiter=delimiter,
-        header=header,
-        names=names,
-        index_col=index_col,
-        usecols=usecols,
-        squeeze=squeeze,
-        prefix=prefix,
-        mangle_dupe_cols=mangle_dupe_cols,
-        dtype=dtype,
-        engine=engine,
-        converters=converters,
-        true_values=true_values,
-        false_values=false_values,
-        skipinitialspace=skipinitialspace,
-        skiprows=skiprows,
-        nrows=nrows,
-        na_values=na_values,
-        keep_default_na=keep_default_na,
-        na_filter=na_filter,
-        verbose=verbose,
-        skip_blank_lines=skip_blank_lines,
-        parse_dates=parse_dates,
-        infer_datetime_format=infer_datetime_format,
-        keep_date_col=keep_date_col,
-        date_parser=date_parser,
-        dayfirst=dayfirst,
-        iterator=iterator,
-        chunksize=chunksize,
-        compression=compression,
-        thousands=thousands,
-        decimal=decimal,
-        lineterminator=lineterminator,
-        quotechar=quotechar,
-        quoting=quoting,
-        escapechar=escapechar,
-        comment=comment,
-        encoding=encoding,
-        dialect=dialect,
-        tupleize_cols=tupleize_cols,
-        error_bad_lines=error_bad_lines,
-        warn_bad_lines=warn_bad_lines,
-        skipfooter=skipfooter,
-        skip_footer=skip_footer,
-        doublequote=doublequote,
-        delim_whitespace=delim_whitespace,
-        as_recarray=as_recarray,
-        compact_ints=compact_ints,
-        use_unsigned=use_unsigned,
-        low_memory=low_memory,
-        buffer_lines=buffer_lines,
-        memory_map=memory_map,
-        float_precision=float_precision)
+    kwargs = {
+        'sep': sep,
+        'delimiter': delimiter,
+        'header': header,
+        'names': names,
+        'index_col': index_col,
+        'usecols': usecols,
+        'squeeze': squeeze,
+        'prefix': prefix,
+        'mangle_dupe_cols': mangle_dupe_cols,
+        'dtype': dtype,
+        'engine': engine,
+        'converters': converters,
+        'true_values': true_values,
+        'false_values': false_values,
+        'skipinitialspace': skipinitialspace,
+        'skiprows': skiprows,
+        'nrows': nrows,
+        'na_values': na_values,
+        'keep_default_na': keep_default_na,
+        'na_filter': na_filter,
+        'verbose': verbose,
+        'skip_blank_lines': skip_blank_lines,
+        'parse_dates': parse_dates,
+        'infer_datetime_format': infer_datetime_format,
+        'keep_date_col': keep_date_col,
+        'date_parser': date_parser,
+        'dayfirst': dayfirst,
+        'iterator': iterator,
+        'chunksize': chunksize,
+        'compression': compression,
+        'thousands': thousands,
+        'decimal': decimal,
+        'lineterminator': lineterminator,
+        'quotechar': quotechar,
+        'quoting': quoting,
+        'escapechar': escapechar,
+        'comment': comment,
+        'encoding': encoding,
+        'dialect': dialect,
+        'tupleize_cols': tupleize_cols,
+        'error_bad_lines': error_bad_lines,
+        'warn_bad_lines': warn_bad_lines,
+        'skipfooter': skipfooter,
+        'skip_footer': skip_footer,
+        'doublequote': doublequote,
+        'delim_whitespace': delim_whitespace,
+        'as_recarray': as_recarray,
+        'compact_ints': compact_ints,
+        'use_unsigned': use_unsigned,
+        'low_memory': low_memory,
+        'buffer_lines': buffer_lines,
+        'memory_map': memory_map,
+        'float_precision': float_precision,
+    }
 
     # Default to Pandas read_csv for non-serializable objects
     if not isinstance(filepath_or_buffer, str) or \

diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py
@@ -1783,7 +1783,7 @@ def test_fillna_dtype_conversion(num_partitions=2):
     )
 
     # equiv of replace
-    df = pd.DataFrame(dict(A=[1, np.nan], B=[1., 2.]))
+    df = pd.DataFrame({'A': [1, np.nan], 'B': [1., 2.]})
     ray_df = from_pandas(df, num_partitions)
     for v in ['', 1, np.nan, 1.0]:
         assert ray_df_equals_pandas(

diff --git a/python/ray/dataframe/utils.py b/python/ray/dataframe/utils.py
@@ -9,7 +9,7 @@
 from . import get_npartitions
 
 
-_NAN_BLOCKS = dict()
+_NAN_BLOCKS = {}
 
 
 def _get_nan_block_id(n_row=1, n_col=1, transpose=False):
@@ -225,7 +225,7 @@ def _map_partitions(func, partitions, *argslists):
         return [_deploy_func.remote(func, part, argslists[0])
                 for part in partitions]
     else:
-        assert(all([len(args) == len(partitions) for args in argslists]))
+        assert(all(len(args) == len(partitions) for args in argslists))
         return [_deploy_func.remote(func, *args)
                 for args in zip(partitions, *argslists)]
 

diff --git a/python/ray/experimental/array/distributed/core.py b/python/ray/experimental/array/distributed/core.py
@@ -241,7 +241,7 @@ def subblocks(a, *ranges):
     result = DistArray(shape)
     for index in np.ndindex(*result.num_blocks):
         result.objectids[index] = a.objectids[tuple(
-            [ranges[i][index[i]] for i in range(a.ndim)])]
+            ranges[i][index[i]] for i in range(a.ndim))]
     return result