From b4d475bf0966ac148f61d7f42bd9b46155bb04f6 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Sun, 12 Feb 2017 16:22:57 -0800 Subject: [PATCH] Misc cleanups for style and consistency. Change: 147297316 --- .../dirichlet_multinomial_test.py | 7 +- .../python/kernel_tests/multinomial_test.py | 7 +- .../kernel_tests/vector_student_t_test.py | 14 +- .../distributions/python/ops/bernoulli.py | 10 +- .../contrib/distributions/python/ops/beta.py | 10 +- .../distributions/python/ops/bijector.py | 188 +++++++++--------- .../distributions/python/ops/binomial.py | 12 +- .../distributions/python/ops/categorical.py | 15 +- .../contrib/distributions/python/ops/chi2.py | 10 +- .../distributions/python/ops/dirichlet.py | 22 +- .../python/ops/dirichlet_multinomial.py | 28 +-- .../distributions/python/ops/distribution.py | 90 +++++---- .../python/ops/distribution_util.py | 79 ++++---- .../distributions/python/ops/exponential.py | 14 +- .../contrib/distributions/python/ops/gamma.py | 10 +- .../distributions/python/ops/gumbel.py | 12 +- .../distributions/python/ops/inverse_gamma.py | 10 +- .../python/ops/kullback_leibler.py | 6 +- .../distributions/python/ops/laplace.py | 12 +- .../distributions/python/ops/logistic.py | 10 +- .../distributions/python/ops/mixture.py | 20 +- .../distributions/python/ops/multinomial.py | 25 ++- .../distributions/python/ops/mvn_diag.py | 6 +- .../python/ops/mvn_diag_plus_low_rank.py | 8 +- .../python/ops/mvn_linear_operator.py | 8 +- .../distributions/python/ops/mvn_tril.py | 6 +- .../distributions/python/ops/normal.py | 12 +- .../python/ops/normal_conjugate_posteriors.py | 34 ++-- .../python/ops/onehot_categorical.py | 20 +- .../distributions/python/ops/poisson.py | 8 +- .../python/ops/quantized_distribution.py | 17 +- .../python/ops/relaxed_bernoulli.py | 8 +- .../python/ops/relaxed_onehot_categorical.py | 22 +- .../contrib/distributions/python/ops/shape.py | 73 ++++--- .../distributions/python/ops/special_math.py | 20 +- .../distributions/python/ops/student_t.py | 35 ++-- .../python/ops/transformed_distribution.py | 18 +- .../distributions/python/ops/uniform.py | 10 +- .../python/ops/vector_student_t.py | 59 +++--- .../distributions/python/ops/wishart.py | 100 +++++----- 40 files changed, 542 insertions(+), 533 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py index 235ce209458041..60703e6997c90c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py @@ -222,9 +222,10 @@ def testCovarianceFromSampling(self): dist = ds.DirichletMultinomial(n, alpha) x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) - x_centered = x - sample_mean[None, ...] + x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( - x_centered[..., None], x_centered[..., None, :]), 0) + x_centered[..., array_ops.newaxis], + x_centered[..., array_ops.newaxis, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ @@ -317,7 +318,7 @@ def testCovarianceNAlphaBroadcast(self): dist = ds.DirichletMultinomial(ns, alpha) covariance = dist.covariance() expected_covariance = shared_matrix * ( - ns * (ns + alpha_0) / (1 + alpha_0))[..., None] + ns * (ns + alpha_0) / (1 + alpha_0))[..., array_ops.newaxis] self.assertEqual([4, 3, 3], covariance.get_shape()) self.assertAllClose(expected_covariance, covariance.eval()) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py b/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py index ded12c9c4d4ebd..06ea27d8860681 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py @@ -239,7 +239,7 @@ def testCovarianceFromSampling(self): # via broadcast between alpha, n. theta = np.array([[1., 2, 3], [2.5, 4, 0.01]], dtype=np.float32) - theta /= np.sum(theta, 1)[..., None] + theta /= np.sum(theta, 1)[..., array_ops.newaxis] # Ideally we'd be able to test broadcasting but, the multinomial sampler # doesn't support different total counts. n = np.float32(5) @@ -247,9 +247,10 @@ def testCovarianceFromSampling(self): dist = ds.Multinomial(n, theta) # batch_shape=[2], event_shape=[3] x = dist.sample(int(250e3), seed=1) sample_mean = math_ops.reduce_mean(x, 0) - x_centered = x - sample_mean[None, ...] + x_centered = x - sample_mean[array_ops.newaxis, ...] sample_cov = math_ops.reduce_mean(math_ops.matmul( - x_centered[..., None], x_centered[..., None, :]), 0) + x_centered[..., array_ops.newaxis], + x_centered[..., array_ops.newaxis, :]), 0) sample_var = array_ops.matrix_diag_part(sample_cov) sample_stddev = math_ops.sqrt(sample_var) [ diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py index 0a4e7fb5b5b512..9d0ffd63763329 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py @@ -176,7 +176,7 @@ def testProbScalarBaseDistributionNonScalarTransform(self): x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1. expected_mst = _FakeVectorStudentT( - df=np.tile(df, len(scale_diag)), + df=np.tile(df, reps=len(scale_diag)), loc=loc, scale_tril=scale_tril) @@ -207,7 +207,7 @@ def testProbScalarBaseDistributionNonScalarTransformDynamic(self): x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1. expected_mst = _FakeVectorStudentT( - df=np.tile(df, len(scale_diag)), + df=np.tile(df, reps=len(scale_diag)), loc=loc, scale_tril=scale_tril) @@ -236,8 +236,9 @@ def testProbNonScalarBaseDistributionScalarTransform(self): expected_mst = _FakeVectorStudentT( df=df, - loc=np.tile(loc[None, :], [len(df), 1]), - scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1])) + loc=np.tile(loc[array_ops.newaxis, :], reps=[len(df), 1]), + scale_tril=np.tile(scale_tril[array_ops.newaxis, :, :], + reps=[len(df), 1, 1])) with self.test_session(): actual_mst = _VectorStudentT(df=df, loc=loc, scale_diag=scale_diag, @@ -261,8 +262,9 @@ def testProbNonScalarBaseDistributionScalarTransformDynamic(self): expected_mst = _FakeVectorStudentT( df=df, - loc=np.tile(loc[None, :], [len(df), 1]), - scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1])) + loc=np.tile(loc[array_ops.newaxis, :], reps=[len(df), 1]), + scale_tril=np.tile(scale_tril[array_ops.newaxis, :, :], + reps=[len(df), 1, 1])) with self.test_session(): df_pl = array_ops.placeholder(dtypes.float32, name="df") diff --git a/tensorflow/contrib/distributions/python/ops/bernoulli.py b/tensorflow/contrib/distributions/python/ops/bernoulli.py index 60f8c114d8e7f5..7e984c4881ecc4 100644 --- a/tensorflow/contrib/distributions/python/ops/bernoulli.py +++ b/tensorflow/contrib/distributions/python/ops/bernoulli.py @@ -57,15 +57,15 @@ def __init__(self, Bernoulli distribution. Only one of `logits` or `probs` should be passed in. dtype: The type of the event samples. Default: `int32`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If p and logits are passed, or if neither are passed. @@ -114,7 +114,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - new_shape = array_ops.concat(([n], self.batch_shape_tensor()), 0) + new_shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) uniform = random_ops.random_uniform( new_shape, seed=seed, dtype=self.probs.dtype) sample = math_ops.less(uniform, self.probs) diff --git a/tensorflow/contrib/distributions/python/ops/beta.py b/tensorflow/contrib/distributions/python/ops/beta.py index 53149b3acd25fe..4a59c6ccf4a3e3 100644 --- a/tensorflow/contrib/distributions/python/ops/beta.py +++ b/tensorflow/contrib/distributions/python/ops/beta.py @@ -139,15 +139,15 @@ def __init__(self, concentration0: Positive floating-point `Tensor` indicating mean number of failures; aka "beta". Otherwise has same semantics as `concentration1`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[concentration1, @@ -267,7 +267,7 @@ def _variance(self): @distribution_util.AppendDocstring( """Note: The mode is undefined when `concentration1 <= 1` or `concentration0 <= 1`. If `self.allow_nan_stats` is `True`, `NaN` - is used for undefined modes. If `self.allow_nan_stats` is `False` an + is used for undefined modes. If `self.allow_nan_stats` is `False` an exception is raised when one or more modes are undefined.""") def _mode(self): mode = (self.concentration1 - 1.) / (self.total_concentration - 2.) diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py index adb5a4722862f0..d3242af066f640 100644 --- a/tensorflow/contrib/distributions/python/ops/bijector.py +++ b/tensorflow/contrib/distributions/python/ops/bijector.py @@ -20,7 +20,7 @@ Differentiable, bijective transformations of continuous random variables alter the calculations made in the cumulative/probability distribution functions and -sample function. This module provides a standard interface for making these +sample function. This module provides a standard interface for making these manipulations. For more details and examples, see the `Bijector` docstring. @@ -180,7 +180,7 @@ class Bijector(object): [diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a bijective, differentiable function. A `Bijector` is used by `TransformedDistribution` but can be generally used for transforming a - `Distribution` generated `Tensor`. A `Bijector` is characterized by three + `Distribution` generated `Tensor`. A `Bijector` is characterized by three operations: 1. Forward Evaluation @@ -198,7 +198,7 @@ class Bijector(object): "The log of the determinant of the matrix of all first-order partial derivatives of the inverse function." Useful for inverting a transformation to compute one probability in terms - of another. Geometrically, the det(Jacobian) is the volume of the + of another. Geometrically, the det(Jacobian) is the volume of the transformation and is used to scale the probability. By convention, transformations of random variables are named in terms of the @@ -210,7 +210,7 @@ class Bijector(object): - Basic properties: ```python - x = ... # A tensor. + x = ... # A tensor. # Evaluate forward transformation. fwd_x = my_bijector.forward(x) x == my_bijector.inverse(fwd_x) @@ -267,7 +267,7 @@ def _forward_log_det_jacobian(self, x): if self.event_ndims is None: raise ValueError("Jacobian requires known event_ndims.") event_dims = array_ops.shape(x)[-self.event_ndims:] - return math_ops.reduce_sum(x, reduction_indices=event_dims) + return math_ops.reduce_sum(x, axis=event_dims) ``` - "Affine" @@ -292,8 +292,8 @@ def _forward_log_det_jacobian(self, x): partitioning: - Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch, - and event (S, B, E) shape semantics. Suppose - the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. + and event (S, B, E) shape semantics. Suppose the `Tensor`'s + partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. For `Exp`, the shape of the `Tensor` returned by `forward` and `inverse` is unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by @@ -308,7 +308,7 @@ def _forward_log_det_jacobian(self, x): - If the `Bijector`'s use is limited to `TransformedDistribution` (or friends like `QuantizedDistribution`) then depending on your use, you may not need - to implement all of `_forward` and `_inverse` functions. Examples: + to implement all of `_forward` and `_inverse` functions. Examples: 1. Sampling (e.g., `sample`) only requires `_forward`. 2. Probability functions (e.g., `prob`, `cdf`, `survival`) only require `_inverse` (and related). @@ -316,7 +316,7 @@ def _forward_log_det_jacobian(self, x): `_inverse` can be implemented as a cache lookup. See `Example Use` [above] which shows how these functions are used to - transform a distribution. (Note: `_forward` could theoretically be + transform a distribution. (Note: `_forward` could theoretically be implemented as a cache lookup but this would require controlling the underlying sample generation mechanism.) @@ -334,7 +334,7 @@ def _forward_log_det_jacobian(self, x): - Subclasses should implement `_forward_event_shape`, `_forward_event_shape_tensor` (and `inverse` counterparts) if the - transformation is shape-changing. By default the event-shape is assumed + transformation is shape-changing. By default the event-shape is assumed unchanged from input. Tips for implementing `_inverse` and `_inverse_log_det_jacobian`: @@ -343,14 +343,14 @@ def _forward_log_det_jacobian(self, x): can be implemented as a cache lookup. - The inverse `log o det o Jacobian` can be implemented as the negative of the - forward `log o det o Jacobian`. This is useful if the `inverse` is + forward `log o det o Jacobian`. This is useful if the `inverse` is implemented as a cache or the inverse Jacobian is computationally more expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following demonstrates the suggested implementation. ```python def _inverse_and_log_det_jacobian(self, y): - x = # ... implement inverse, possibly via cache. + x = ... # implement inverse, possibly via cache. return x, -self._forward_log_det_jac(x) # Note negation. ``` @@ -414,10 +414,10 @@ def __init__(self, Args: event_ndims: number of dimensions associated with event coordinates. graph_parents: Python list of graph prerequisites of this `Bijector`. - is_constant_jacobian: `Boolean` indicating that the Jacobian is not a + is_constant_jacobian: Python `bool` indicating that the Jacobian is not a function of the input. - validate_args: `Boolean`, default `False`. Whether to validate input with - asserts. If `validate_args` is `False`, and the inputs are invalid, + validate_args: Python `bool`, default `False`. Whether to validate input + with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. dtype: `tf.dtype` supported by this `Bijector`. `None` means dtype is not enforced. @@ -462,7 +462,7 @@ def is_constant_jacobian(self): Note: Jacobian is either constant for both forward and inverse or neither. Returns: - `Boolean`. + is_constant_jacobian: Python `bool`. """ return self._is_constant_jacobian @@ -733,7 +733,7 @@ def _call_inverse_and_inverse_log_det_jacobian(self, y, name, **kwargs): elif self.is_constant_jacobian: self._constant_ildj = ildj # We use the mapped version of x, even if we re-computed x above with a - # call to self._inverse_and_inverse_log_det_jacobian. This prevents + # call to self._inverse_and_inverse_log_det_jacobian. This prevents # re-evaluation of the inverse in a common case. x = x if mapping.x is None else mapping.x mapping = mapping.merge(x=x, ildj=ildj) @@ -886,7 +886,7 @@ class Inline(Bijector): forward_fn=tf.exp, inverse_fn=tf.log, inverse_log_det_jacobian_fn=( - lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), + lambda y: -tf.reduce_sum(tf.log(y), axis=-1)), name="exp") ``` @@ -922,11 +922,11 @@ def __init__(self, static event shape changes. Default: shape is assumed unchanged. inverse_event_shape_tensor_fn: Python callable implementing non-identical event shape changes. Default: shape is assumed unchanged. - is_constant_jacobian: `Boolean` indicating that the Jacobian is constant - for all input arguments. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String`, name given to ops managed by this object. + is_constant_jacobian: Python `bool` indicating that the Jacobian is + constant for all input arguments. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str`, name given to ops managed by this object. """ super(Inline, self).__init__( event_ndims=0, @@ -1021,9 +1021,9 @@ def __init__(self, bijector, validate_args=False, name=None): Args: bijector: Bijector instance. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String`, name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str`, name given to ops managed by this object. """ self._bijector = bijector @@ -1103,10 +1103,10 @@ def __init__(self, bijectors=(), validate_args=False, name=None): Args: bijectors: Python list of bijector instances. An empty list makes this bijector equivalent to the `Identity` bijector. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String`, name given to ops managed by this object. Default: E.g., - `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str`, name given to ops managed by this object. Default: + E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`. Raises: ValueError: if bijectors have different dtypes. @@ -1246,9 +1246,9 @@ def __init__(self, `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`. event_ndims: Python scalar indicating the number of dimensions associated with a particular draw from the distribution. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String` name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. Raises: ValueError: if `power < 0` or is not known statically. @@ -1285,24 +1285,24 @@ def _inverse_and_inverse_log_det_jacobian(self, y): event_dims = self._event_dims_tensor(y) if self.power == 0.: x = math_ops.log(y) - ildj = -math_ops.reduce_sum(x, reduction_indices=event_dims) + ildj = -math_ops.reduce_sum(x, axis=event_dims) return x, ildj # TODO(jvdillon): If large y accuracy is an issue, consider using # (y**self.power - 1.) / self.power when y >> 1. x = math_ops.expm1(math_ops.log(y) * self.power) / self.power ildj = (self.power - 1.) * math_ops.reduce_sum( math_ops.log(y), - reduction_indices=event_dims) + axis=event_dims) return x, ildj def _forward_log_det_jacobian(self, x): x = self._maybe_assert_valid_x(x) event_dims = self._event_dims_tensor(x) if self.power == 0.: - return math_ops.reduce_sum(x, reduction_indices=event_dims) + return math_ops.reduce_sum(x, axis=event_dims) return (1. / self.power - 1.) * math_ops.reduce_sum( math_ops.log1p(x * self.power), - reduction_indices=event_dims) + axis=event_dims) def _maybe_assert_valid_x(self, x): if not self.validate_args or self.power == 0.: @@ -1351,9 +1351,9 @@ def __init__(self, Args: event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String` name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. """ super(Exp, self).__init__( event_ndims=event_ndims, @@ -1376,8 +1376,8 @@ def __init__(self, tril, v, diag=None, validate_args=False): tril: `Tensor` of shape `[B1,..,Bb, d, d]`. v: `Tensor` of shape `[B1,...,Bb, d, k]`. diag: `Tensor` of shape `[B1,...,Bb, k, k]` or None - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. """ self._m = tril self._v = v @@ -1477,7 +1477,7 @@ def sqrt_log_abs_det(self): linalg_ops.matrix_determinant(self._woodbury_sandwiched_term()))) # Reduction is ok because we always prepad inputs to this class. log_det_m = math_ops.reduce_sum(math_ops.log(math_ops.abs( - array_ops.matrix_diag_part(self._m))), reduction_indices=[-1]) + array_ops.matrix_diag_part(self._m))), axis=[-1]) return log_det_c + 2. * self._d.sqrt_log_abs_det() + log_det_m def _woodbury_sandwiched_term(self): @@ -1591,34 +1591,34 @@ def __init__(self, `scale_diag != None` means `scale += tf.diag(scale_diag)`. Args: - shift: Numeric `Tensor`. If this is set to `None`, no shift is applied. + shift: Floating-point `Tensor`. If this is set to `None`, no shift is + applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag = scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. - scale_diag: Numeric `Tensor` representing the diagonal matrix. - `scale_diag` has shape [N1, N2, ... k], which represents a k x k + scale_diag: Floating-point `Tensor` representing the diagonal matrix. + `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. - scale_tril: Numeric `Tensor` representing the diagonal matrix. - `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k + scale_tril: Floating-point `Tensor` representing the diagonal matrix. + `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. - scale_perturb_factor: Numeric `Tensor` representing factor matrix with - last two dimensions of shape `(k, r)`. - When `None`, no rank-r update is added to `scale`. - scale_perturb_diag: Numeric `Tensor` representing the diagonal matrix. - `scale_perturb_diag` has shape [N1, N2, ... r], which represents an - r x r Diagonal matrix. - When `None` low rank updates will take the form `scale_perturb_factor * - scale_perturb_factor.T`. + scale_perturb_factor: Floating-point `Tensor` representing factor matrix + with last two dimensions of shape `(k, r)`. When `None`, no rank-r + update is added to `scale`. + scale_perturb_diag: Floating-point `Tensor` representing the diagonal + matrix. `scale_perturb_diag` has shape [N1, N2, ... r], which + represents an `r x r` diagonal matrix. When `None` low rank updates will + take the form `scale_perturb_factor * scale_perturb_factor.T`. event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. Must be 0 or 1. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String` name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. Raises: ValueError: if `perturb_diag` is specified but not `perturb_factor`. @@ -1692,17 +1692,19 @@ def _create_scale_operator(self, identity_multiplier, diag, tril, Args: identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. - diag: Numeric `Tensor` representing the diagonal matrix. `scale_diag` has - shape [N1, N2, ... k], which represents a k x k diagonal matrix. - tril: Numeric `Tensor` representing the diagonal matrix. `scale_tril` has - shape [N1, N2, ... k], which represents a k x k lower triangular matrix. - perturb_diag: Numeric `Tensor` representing the diagonal matrix of the - low rank update. - perturb_factor: Numeric `Tensor` representing factor matrix. + diag: Floating-point `Tensor` representing the diagonal matrix. + `scale_diag` has shape [N1, N2, ... k], which represents a k x k + diagonal matrix. + tril: Floating-point `Tensor` representing the diagonal matrix. + `scale_tril` has shape [N1, N2, ... k], which represents a k x k lower + triangular matrix. + perturb_diag: Floating-point `Tensor` representing the diagonal matrix of + the low rank update. + perturb_factor: Floating-point `Tensor` representing factor matrix. event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions - associated with a particular draw from the distribution. Must be 0 or 1 - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. + associated with a particular draw from the distribution. Must be 0 or 1 + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. Returns: scale. In the case of scaling by a constant, scale is a @@ -1759,7 +1761,7 @@ def _create_scale_operator(self, identity_multiplier, diag, tril, return identity_multiplier # Infer the shape from the V and D. v_shape = array_ops.shape(perturb_factor) - identity_shape = array_ops.concat((v_shape[:-1], (v_shape[-2],)), 0) + identity_shape = array_ops.concat([v_shape[:-1], [v_shape[-2]]], 0) scaled_identity = operator_pd_identity.OperatorPDIdentity( identity_shape, perturb_factor.dtype.base_dtype, @@ -1807,7 +1809,7 @@ def _preprocess_diag(self, identity_multiplier, diag, event_ndims): def _process_matrix(self, matrix, min_rank, event_ndims): """Helper to __init__ which gets matrix in batch-ready form.""" # Pad the matrix so that matmul works in the case of a matrix and vector - # input. Keep track if the matrix was padded, to distinguish between a + # input. Keep track if the matrix was padded, to distinguish between a # rank 3 tensor and a padded rank 2 tensor. # TODO(srvasude): Remove side-effects from functions. Its currently unbroken # but error-prone since the function call order may change in the future. @@ -1895,7 +1897,7 @@ class AffineLinearOperator(Bijector): where `*` denotes the scalar product. Note: we don't always simply transpose `X` (but write it this way for - brevity). Actually the input `X` undergoes the following transformation + brevity). Actually the input `X` undergoes the following transformation before being premultiplied by `scale`: 1. If there are no sample dims, we call `X = tf.expand_dims(X, 0)`, i.e., @@ -1910,8 +1912,8 @@ class AffineLinearOperator(Bijector): (For more details see `shape.make_batch_of_event_sample_matrices`.) The result of the above transformation is that `X` can be regarded as a batch - of matrices where each column is a draw from the distribution. After - premultiplying by `scale`, we take the inverse of this procedure. The input + of matrices where each column is a draw from the distribution. After + premultiplying by `scale`, we take the inverse of this procedure. The input `Y` also undergoes the same transformation before/after premultiplying by `inv(scale)`. @@ -1952,14 +1954,14 @@ def __init__(self, """Instantiates the `AffineLinearOperator` bijector. Args: - shift: Numeric `Tensor`. - scale: Subclass of `LinearOperator`. Represents the (batch) positive + shift: Floating-point `Tensor`. + scale: Subclass of `LinearOperator`. Represents the (batch) positive definite matrix `M` in `R^{k x k}`. event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. Must be 0 or 1. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String` name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. Raises: ValueError: if `event_ndims` is not 0 or 1. @@ -2126,21 +2128,21 @@ def _inverse_and_inverse_log_det_jacobian(self, y): event_dims = self._event_dims_tensor(y) # Could also do: # ildj = math_ops.reduce_sum(y - distribution_util.softplus_inverse(y), - # reduction_indices=event_dims) + # axis=event_dims) # but the following is more numerically stable. Ie, # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1] # ==> dX/dY = exp{Y} / (exp{Y} - 1) # = 1 / (1 - exp{-Y}), - # which is the most stable for large Y > 0. For small Y, we use + # which is the most stable for large Y > 0. For small Y, we use # 1 - exp{-Y} approx Y. ildj = -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)), - reduction_indices=event_dims) + axis=event_dims) return distribution_util.softplus_inverse(y), ildj def _forward_log_det_jacobian(self, x): # pylint: disable=unused-argument event_dims = self._event_dims_tensor(x) return -math_ops.reduce_sum( - nn_ops.softplus(-x), reduction_indices=event_dims) + nn_ops.softplus(-x), axis=event_dims) class SoftmaxCentered(Bijector): @@ -2148,7 +2150,7 @@ class SoftmaxCentered(Bijector): To implement [softmax](https://en.wikipedia.org/wiki/Softmax_function) as a bijection, the forward transformation appends a value to the input and the - inverse removes this coordinate. The appended coordinate represents a pivot, + inverse removes this coordinate. The appended coordinate represents a pivot, e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last coordinate. @@ -2169,7 +2171,7 @@ class SoftmaxCentered(Bijector): At first blush it may seem like the [Invariance of domain]( https://en.wikipedia.org/wiki/Invariance_of_domain) theorem implies this - implementation is not a bijection. However, the appended dimension + implementation is not a bijection. However, the appended dimension makes the (forward) image non-open and the theorem does not directly apply. """ @@ -2287,12 +2289,12 @@ def _inverse(self, y): depth=ndims, on_value=shape[-1]-np.array(1, dtype=shape.dtype), dtype=shape.dtype) - size = array_ops.concat((shape[:-1], np.asarray([1], dtype=shape.dtype)), 0) + size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0) log_normalization = -array_ops.strided_slice(x, begin, begin + size) # Here we slice out all but the last coordinate; see above for idea. begin = array_ops.zeros_like(shape) - size = array_ops.concat((shape[:-1], [shape[-1] - 1]), 0) + size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0) x = array_ops.strided_slice(x, begin, begin + size) x += log_normalization @@ -2330,7 +2332,7 @@ def _inverse_log_det_jacobian(self, y): # or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector # docstring "Tip". # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma - return -math_ops.reduce_sum(math_ops.log(y), reduction_indices=-1) + return -math_ops.reduce_sum(math_ops.log(y), axis=-1) def _forward_log_det_jacobian(self, x): if self._static_event_ndims == 0: @@ -2343,10 +2345,10 @@ def _forward_log_det_jacobian(self, x): # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( - math_ops.reduce_logsumexp(x, reduction_indices=-1, keep_dims=True)) + math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) fldj = (-log_normalization + math_ops.reduce_sum(x - log_normalization, - reduction_indices=-1, + axis=-1, keep_dims=True)) return array_ops.squeeze(fldj, squeeze_dims=-1) @@ -2391,9 +2393,9 @@ def __init__(self, event_ndims=2, validate_args=False, event_ndims: `constant` `int32` scalar `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. Must be 0 or 2. - validate_args: `Boolean` indicating whether arguments should be checked - for correctness. - name: `String` name given to ops managed by this object. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. Raises: ValueError: if event_ndims is neither 0 or 2. diff --git a/tensorflow/contrib/distributions/python/ops/binomial.py b/tensorflow/contrib/distributions/python/ops/binomial.py index 273b7620314c1f..4b29aa17e080a9 100644 --- a/tensorflow/contrib/distributions/python/ops/binomial.py +++ b/tensorflow/contrib/distributions/python/ops/binomial.py @@ -120,7 +120,7 @@ def __init__(self, Args: total_count: Non-negative floating point tensor with shape broadcastable to `[N1,..., Nm]` with `m >= 0` and the same dtype as `probs` or - `logits`. Defines this as a batch of `N1 x ... x Nm` different Binomial + `logits`. Defines this as a batch of `N1 x ... x Nm` different Binomial distributions. Its components should be equal to integer values. logits: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and @@ -131,15 +131,15 @@ def __init__(self, `[N1,..., Nm]` `m >= 0`, `probs in [0, 1]`. Each entry represents the probability of success for independent Binomial distributions. Only one of `logits` or `probs` should be passed in. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[total_count, logits, probs]) as ns: @@ -221,7 +221,7 @@ def _variance(self): @distribution_util.AppendDocstring( """Note that when `(1 + total_count) * probs` is an integer, there are - actually two modes. Namely, `(1 + total_count) * probs` and + actually two modes. Namely, `(1 + total_count) * probs` and `(1 + total_count) * probs - 1` are both modes. Here we return only the larger of the two modes.""") def _mode(self): diff --git a/tensorflow/contrib/distributions/python/ops/categorical.py b/tensorflow/contrib/distributions/python/ops/categorical.py index 67f3a1cc9360e1..6908faa5ad6718 100644 --- a/tensorflow/contrib/distributions/python/ops/categorical.py +++ b/tensorflow/contrib/distributions/python/ops/categorical.py @@ -97,15 +97,15 @@ def __init__( represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. dtype: The type of the event samples (default: int32). - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[logits, probs]) as ns: @@ -133,9 +133,8 @@ def __init__( dtype=dtypes.int32, name="event_size") else: - self._event_size = array_ops.gather(logits_shape, - self._batch_rank, - name="event_size") + with ops.name_scope(name="event_size"): + self._event_size = logits_shape[self._batch_rank] if logits_shape_static[:-1].is_fully_defined(): self._batch_shape_val = constant_op.constant( @@ -192,7 +191,7 @@ def _sample_n(self, n, seed=None): samples = math_ops.cast(samples, self.dtype) ret = array_ops.reshape( array_ops.transpose(samples), - array_ops.concat(([n], self.batch_shape_tensor()), 0)) + array_ops.concat([[n], self.batch_shape_tensor()], 0)) return ret def _log_prob(self, k): diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py index d980c705f14f3c..6f3e3700c8f5d0 100644 --- a/tensorflow/contrib/distributions/python/ops/chi2.py +++ b/tensorflow/contrib/distributions/python/ops/chi2.py @@ -70,16 +70,16 @@ def __init__(self, Args: df: Floating point tensor, the degrees of freedom of the - distribution(s). `df` must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + distribution(s). `df` must contain only positive values. + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() # Even though all stats of chi2 are defined for valid parameters, this is diff --git a/tensorflow/contrib/distributions/python/ops/dirichlet.py b/tensorflow/contrib/distributions/python/ops/dirichlet.py index bd713cf08f2bfd..8c95bb3ce6c282 100644 --- a/tensorflow/contrib/distributions/python/ops/dirichlet.py +++ b/tensorflow/contrib/distributions/python/ops/dirichlet.py @@ -142,15 +142,15 @@ def __init__(self, `concentration.shape = [N1, N2, ..., Nm, k]` then `batch_shape = [N1, N2, ..., Nm]` and `event_shape = [k]`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[concentration]) as ns: @@ -225,12 +225,13 @@ def _entropy(self): axis=-1)) def _mean(self): - return self.concentration / self.total_concentration[..., None] + return self.concentration / self.total_concentration[..., array_ops.newaxis] def _covariance(self): x = self._variance_scale_term() * self._mean() return array_ops.matrix_set_diag( - -math_ops.matmul(x[..., None], x[..., None, :]), # outer prod + -math_ops.matmul(x[..., array_ops.newaxis], + x[..., array_ops.newaxis, :]), # outer prod self._variance()) def _variance(self): @@ -240,16 +241,17 @@ def _variance(self): def _variance_scale_term(self): """Helper to `_covariance` and `_variance` which computes a shared scale.""" - return math_ops.rsqrt(1. + self.total_concentration[..., None]) + return math_ops.rsqrt(1. + self.total_concentration[..., array_ops.newaxis]) @distribution_util.AppendDocstring( """Note: The mode is undefined when any `concentration <= 1`. If - `self.allow_nan_stats` is `True`, `NaN` is used for undefined modes. If + `self.allow_nan_stats` is `True`, `NaN` is used for undefined modes. If `self.allow_nan_stats` is `False` an exception is raised when one or more modes are undefined.""") def _mode(self): k = math_ops.cast(self.event_shape_tensor()[0], self.dtype) - mode = (self.concentration - 1.) / (self.total_concentration[..., None] - k) + mode = (self.concentration - 1.) / ( + self.total_concentration[..., array_ops.newaxis] - k) if self.allow_nan_stats: nan = array_ops.fill( array_ops.shape(mode), @@ -290,7 +292,7 @@ def _maybe_assert_valid_sample(self, x): x, message="samples must be positive"), distribution_util.assert_close( - array_ops.ones((), dtype=self.dtype), + array_ops.ones([], dtype=self.dtype), math_ops.reduce_sum(x, -1), message="sample last-dimension must sum to `1`"), ], x) diff --git a/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py b/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py index 11b6826c1ab024..8a8b500331737c 100644 --- a/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py +++ b/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py @@ -36,10 +36,10 @@ _dirichlet_multinomial_sample_note = """For each batch of counts, -`value = [n_0, ... ,n_{k-1}]`, `P[value]` is the probability that after sampling -`self.total_count` draws from this Dirichlet-Multinomial distribution, the -number of draws falling in class `j` is `n_j`. Since this definition is -[exchangeable]( https://en.wikipedia.org/wiki/Exchangeable_random_variables); +`value = [n_0, ..., n_{k-1}]`, `P[value]` is the probability that after +sampling `self.total_count` draws from this Dirichlet-Multinomial distribution, +the number of draws falling in class `j` is `n_j`. Since this definition is +[exchangeable](https://en.wikipedia.org/wiki/Exchangeable_random_variables); different sequences have the same counts so the probability includes a combinatorial coefficient. @@ -153,22 +153,22 @@ def __init__(self, Args: total_count: Non-negative floating point tensor, whose dtype is the same as `concentration`. The shape is broadcastable to `[N1,..., Nm]` with - `m >= 0`. Defines this as a batch of `N1 x ... x Nm` different + `m >= 0`. Defines this as a batch of `N1 x ... x Nm` different Dirichlet multinomial distributions. Its components should be equal to integer values. concentration: Positive floating point tensor, whose dtype is the same as `n` with shape broadcastable to `[N1,..., Nm, k]` `m >= 0`. Defines this as a batch of `N1 x ... x Nm` different `k` class Dirichlet multinomial distributions. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[total_count, concentration]) as ns: @@ -177,8 +177,7 @@ def __init__(self, # we use the last dimension for the distribution, whereas # the batch dimensions are the leading dimensions, which forces the # distribution dimension to be defined explicitly (i.e. it cannot be - # created automatically by prepending). This forces enough - # explicitness. + # created automatically by prepending). This forces enough explicitness. # * All calls involving `counts` eventually require a broadcast between # `counts` and concentration. self._total_count = self._maybe_assert_valid_total_count( @@ -261,7 +260,7 @@ def _prob(self, counts): def _mean(self): return self.total_count * (self.concentration / - self.total_concentration[..., None]) + self.total_concentration[..., array_ops.newaxis]) @distribution_util.AppendDocstring( """The covariance for each batch member is defined as the following: @@ -284,7 +283,8 @@ def _mean(self): def _covariance(self): x = self._variance_scale_term() * self._mean() return array_ops.matrix_set_diag( - -math_ops.matmul(x[..., None], x[..., None, :]), # outer prod + -math_ops.matmul(x[..., array_ops.newaxis], + x[..., array_ops.newaxis, :]), # outer prod self._variance()) def _variance(self): @@ -296,7 +296,7 @@ def _variance_scale_term(self): """Helper to `_covariance` and `_variance` which computes a shared scale.""" # We must take care to expand back the last dim whenever we use the # total_concentration. - c0 = self.total_concentration[..., None] + c0 = self.total_concentration[..., array_ops.newaxis] return math_ops.sqrt((1. + c0 / self.total_count) / (1. + c0)) def _maybe_assert_valid_concentration(self, concentration, validate_args): diff --git a/tensorflow/contrib/distributions/python/ops/distribution.py b/tensorflow/contrib/distributions/python/ops/distribution.py index 31b34cb1b51fe2..7f2e83f61493e5 100644 --- a/tensorflow/contrib/distributions/python/ops/distribution.py +++ b/tensorflow/contrib/distributions/python/ops/distribution.py @@ -63,8 +63,8 @@ def _copy_fn(fn): """ if not callable(fn): raise TypeError("fn is not callable: %s" % fn) - # The blessed way to copy a function. copy.deepcopy fails to create - # a non-reference copy. Since: + # The blessed way to copy a function. copy.deepcopy fails to create a + # non-reference copy. Since: # types.FunctionType == type(lambda: None), # and the docstring for the function type states: # @@ -129,7 +129,7 @@ def __new__(mcs, classname, baseclasses, attrs): ValueError: If a `Distribution` public method lacks a docstring. """ if not baseclasses: # Nothing to be done for Distribution - raise TypeError("Expected non-empty baseclass. Does Distribution " + raise TypeError("Expected non-empty baseclass. Does Distribution " "not subclass _BaseDistribution?") which_base = [ base for base in baseclasses @@ -185,7 +185,7 @@ class ReparameterizationType(object): `NOT_REPARAMETERIZED`: Samples from the distribution are not fully reparameterized, and straight-through gradients are either partially - unsupported or are not supported at all. In this case, for purposes of + unsupported or are not supported at all. In this case, for purposes of e.g. RL or variational inference, it is generally safest to wrap the sample results in a `stop_gradients` call and instead use policy gradients / surrogate loss instead. @@ -234,8 +234,8 @@ class Distribution(_BaseDistribution): ### Subclassing Subclasses are expected to implement a leading-underscore version of the - same-named function. The argument signature should be identical except for - the omission of `name="..."`. For example, to enable `log_prob(value, + same-named function. The argument signature should be identical except for + the omission of `name="..."`. For example, to enable `log_prob(value, name="log_prob")` a subclass should implement `_log_prob(value)`. Subclasses can append to public-level docstrings by providing @@ -248,7 +248,7 @@ def _log_prob(self, value): ``` would add the string "Some other details." to the `log_prob` function - docstring. This is implemented as a simple decorator to avoid python + docstring. This is implemented as a simple decorator to avoid python linter complaining about missing Args/Returns/Raises sections in the partial docstrings. @@ -261,7 +261,7 @@ def _log_prob(self, value): `log_prob` reflect this broadcasting, as does the return value of `sample` and `sample_n`. - `sample_n_shape = (n,) + batch_shape + event_shape`, where `sample_n_shape` is + `sample_n_shape = [n] + batch_shape + event_shape`, where `sample_n_shape` is the shape of the `Tensor` returned from `sample_n`, `n` is the number of samples, `batch_shape` defines how many independent distributions there are, and `event_shape` defines the shape of samples from each of those independent @@ -286,19 +286,19 @@ def _log_prob(self, value): # `event_shape_t` is a `Tensor` which will evaluate to []. event_shape_t = u.event_shape_tensor() - # Sampling returns a sample per distribution. `samples` has shape - # (5, 2, 2), which is (n,) + batch_shape + event_shape, where n=5, - # batch_shape=(2, 2), and event_shape=(). + # Sampling returns a sample per distribution. `samples` has shape + # [5, 2, 2], which is [n] + batch_shape + event_shape, where n=5, + # batch_shape=[2, 2], and event_shape=[]. samples = u.sample_n(5) # The broadcasting holds across methods. Here we use `cdf` as an example. The # same holds for `log_cdf` and the likelihood functions. - # `cum_prob` has shape (2, 2) as the `value` argument was broadcasted to the + # `cum_prob` has shape [2, 2] as the `value` argument was broadcasted to the # shape of the `Uniform` instance. cum_prob_broadcast = u.cdf(4.0) - # `cum_prob`'s shape is (2, 2), one per distribution. No broadcasting + # `cum_prob`'s shape is [2, 2], one per distribution. No broadcasting # occurred. cum_prob_per_dist = u.cdf([[4.0, 5.0], [6.0, 7.0]]) @@ -311,9 +311,9 @@ def _log_prob(self, value): ### Parameter values leading to undefined statistics or distributions. Some distributions do not have well-defined statistics for all initialization - parameter values. For example, the beta distribution is parameterized by - positive real numbers `a` and `b`, and does not have well-defined mode if - `a < 1` or `b < 1`. + parameter values. For example, the beta distribution is parameterized by + positive real numbers `concentration1` and `concentration0`, and does not have + well-defined mode if `concentration1 < 1` or `concentration0 < 1`. The user is given the option of raising an exception or returning `NaN`. @@ -356,25 +356,28 @@ def __init__(self, Args: dtype: The type of the event samples. `None` implies no type-enforcement. - is_continuous: Python boolean. If `True` this - `Distribution` is continuous over its supported domain. + is_continuous: Python `bool`. If `True` this `Distribution` is continuous + over its supported domain. reparameterization_type: Instance of `ReparameterizationType`. If `distributions.FULLY_REPARAMETERIZED`, this `Distribution` can be reparameterized in terms of some standard distribution with a function whose Jacobian is constant for the support - of the standard distribution. If `distributions.NOT_REPARAMETERIZED`, + of the standard distribution. If `distributions.NOT_REPARAMETERIZED`, then no such reparameterization is available. - validate_args: Python boolean. Whether to validate input with asserts. - If `validate_args` is `False`, and the inputs are invalid, - correct behavior is not guaranteed. - allow_nan_stats: Python boolean. If `False`, raise an - exception if a statistic (e.g., mean, mode) is undefined for any batch - member. If True, batch members with valid parameters leading to - undefined statistics will return `NaN` for this statistic. - parameters: Python dictionary of parameters used to instantiate this + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics + (e.g., mean, mode, variance) use the value "`NaN`" to indicate the + result is undefined. When `False`, an exception is raised if one or + more of the statistic's batch members are undefined. + parameters: Python `dict` of parameters used to instantiate this `Distribution`. - graph_parents: Python list of graph prerequisites of this `Distribution`. - name: A name for this distribution. Default: subclass name. + graph_parents: Python `list` of graph prerequisites of this + `Distribution`. + name: Python `str` name prefixed to Ops created by this class. Default: + subclass name. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. @@ -419,8 +422,8 @@ def param_static_shapes(cls, sample_shape): This is a class method that describes what key/value arguments are required to instantiate the given `Distribution` so that a particular shape is - returned for that instance's call to `sample()`. Assumes that - the sample's shape is known statically. + returned for that instance's call to `sample()`. Assumes that the sample's + shape is known statically. Subclasses should override class method `_param_shapes` to return constant-valued tensors when constant values are fed. @@ -493,25 +496,24 @@ def reparameterization_type(self): @property def allow_nan_stats(self): - """Python boolean describing behavior when a stat is undefined. + """Python `bool` describing behavior when a stat is undefined. - Stats return +/- infinity when it makes sense. E.g., the variance - of a Cauchy distribution is infinity. However, sometimes the - statistic is undefined, e.g., if a distribution's pdf does not achieve a - maximum within the support of the distribution, the mode is undefined. - If the mean is undefined, then by definition the variance is undefined. - E.g. the mean for Student's T for df = 1 is undefined (no clear way to say - it is either + or - infinity), so the variance = E[(X - mean)^2] is also - undefined. + Stats return +/- infinity when it makes sense. E.g., the variance of a + Cauchy distribution is infinity. However, sometimes the statistic is + undefined, e.g., if a distribution's pdf does not achieve a maximum within + the support of the distribution, the mode is undefined. If the mean is + undefined, then by definition the variance is undefined. E.g. the mean for + Student's T for df = 1 is undefined (no clear way to say it is either + or - + infinity), so the variance = E[(X - mean)**2] is also undefined. Returns: - allow_nan_stats: Python boolean. + allow_nan_stats: Python `bool`. """ return self._allow_nan_stats @property def validate_args(self): - """Python boolean indicated possibly expensive checks are enabled.""" + """Python `bool` indicating possibly expensive checks are enabled.""" return self._validate_args def copy(self, **override_parameters_kwargs): @@ -611,7 +613,7 @@ def is_scalar_event(self, name="is_scalar_event"): name: The name to give this op. Returns: - is_scalar_event: `Boolean` `scalar` `Tensor`. + is_scalar_event: `bool` scalar `Tensor`. """ with self._name_scope(name): return ops.convert_to_tensor( @@ -625,7 +627,7 @@ def is_scalar_batch(self, name="is_scalar_batch"): name: The name to give this op. Returns: - is_scalar_batch: `Boolean` `scalar` `Tensor`. + is_scalar_batch: `bool` scalar `Tensor`. """ with self._name_scope(name): return ops.convert_to_tensor( diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py index 10b4a6ceabde05..0acbb957e2c8a9 100644 --- a/tensorflow/contrib/distributions/python/ops/distribution_util.py +++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py @@ -42,8 +42,8 @@ def assert_close( """Assert that that x and y are within machine epsilon of each other. Args: - x: Numeric `Tensor` - y: Numeric `Tensor` + x: Floating-point `Tensor` + y: Floating-point `Tensor` data: The tensors to print out if the condition is `False`. Defaults to error message and first few entries of `x` and `y`. summarize: Print this many entries of each tensor. @@ -80,7 +80,7 @@ def assert_integer_form( """Assert that x has integer components (or floats equal to integers). Args: - x: Numeric `Tensor` + x: Floating-point `Tensor` data: The tensors to print out if the condition is `False`. Defaults to error message and first few entries of `x` and `y`. summarize: Print this many entries of each tensor. @@ -113,7 +113,7 @@ def same_dynamic_shape(a, b): b: `Tensor` Returns: - `Boolean` `Tensor` representing if both tensors have the same shape. + `bool` `Tensor` representing if both tensors have the same shape. """ a = ops.convert_to_tensor(a, name="a") b = ops.convert_to_tensor(b, name="b") @@ -142,15 +142,15 @@ def get_logits_and_probs(logits=None, """Converts logit to probabilities (or vice-versa), and returns both. Args: - logits: Numeric `Tensor` representing log-odds. - probs: Numeric `Tensor` representing probabilities. - multidimensional: `Boolean`, default `False`. + logits: Floating-point `Tensor` representing log-odds. + probs: Floating-point `Tensor` representing probabilities. + multidimensional: Python `bool`, default `False`. If `True`, represents whether the last dimension of `logits` or `probs`, - a `[N1, N2, ... k]` dimensional tensor, representing the + a `[N1, N2, ... k]` dimensional tensor, representing the logit or probability of `shape[-1]` classes. - validate_args: `Boolean`, default `False`. When `True`, either assert `0 <= - probs <= 1` (if not `multidimensional`) or that the last dimension of - `probs` sums to one. + validate_args: Python `bool`, default `False`. When `True`, either assert + `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension + of `probs` sums to one. name: A name for this operation (optional). Returns: @@ -189,7 +189,7 @@ def get_logits_and_probs(logits=None, # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see - # logits = log(probs) - log(gather(probs, pivot)). A side-effect of + # logits = log(probs) - log(probs[pivot]). A side-effect of # being consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional case # explicitly keeps the pivot dimension. @@ -208,10 +208,10 @@ def log_combinations(n, counts, name="log_combinations"): where `i` runs over all `k` classes. Args: - n: Numeric `Tensor` broadcastable with `counts`. This represents `n` + n: Floating-point `Tensor` broadcastable with `counts`. This represents `n` outcomes. - counts: Numeric `Tensor` broadcastable with `n`. This represents counts - in `k` classes, where `k` is the last dimension of the tensor. + counts: Floating-point `Tensor` broadcastable with `n`. This represents + counts in `k` classes, where `k` is the last dimension of the tensor. name: A name for this operation (optional). Returns: @@ -220,15 +220,14 @@ def log_combinations(n, counts, name="log_combinations"): # First a bit about the number of ways counts could have come in: # E.g. if counts = [1, 2], then this is 3 choose 2. # In general, this is (sum counts)! / sum(counts!) - # The sum should be along the last dimension of counts. This is the + # The sum should be along the last dimension of counts. This is the # "distribution" dimension. Here n a priori represents the sum of counts. with ops.name_scope(name, values=[n, counts]): n = ops.convert_to_tensor(n, name="n") counts = ops.convert_to_tensor(counts, name="counts") total_permutations = math_ops.lgamma(n + 1) counts_factorial = math_ops.lgamma(counts + 1) - redundant_permutations = math_ops.reduce_sum(counts_factorial, - reduction_indices=[-1]) + redundant_permutations = math_ops.reduce_sum(counts_factorial, axis=[-1]) return total_permutations - redundant_permutations @@ -242,7 +241,7 @@ def matrix_diag_transform(matrix, transform=None, name=None): matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) - # Make the diagonal positive. If the upper triangle was zero, this would be a + # Make the diagonal positive. If the upper triangle was zero, this would be a # valid Cholesky factor. chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) @@ -264,7 +263,7 @@ def matrix_diag_transform(matrix, transform=None, name=None): # This is a fully trainable multivariate normal! dist = tf.contrib.distributions.MVNCholesky(mu, chol) - # Standard log loss. Minimizing this will "train" mu and chol, and then dist + # Standard log loss. Minimizing this will "train" mu and chol, and then dist # will be a distribution predicting labels as multivariate Gaussians. loss = -1 * tf.reduce_mean(dist.log_prob(labels)) ``` @@ -272,9 +271,9 @@ def matrix_diag_transform(matrix, transform=None, name=None): Args: matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are equal. - transform: Element-wise function mapping `Tensors` to `Tensors`. To - be applied to the diagonal of `matrix`. If `None`, `matrix` is returned - unchanged. Defaults to `None`. + transform: Element-wise function mapping `Tensors` to `Tensors`. To + be applied to the diagonal of `matrix`. If `None`, `matrix` is returned + unchanged. Defaults to `None`. name: A name to give created ops. Defaults to "matrix_diag_transform". @@ -308,7 +307,7 @@ def rotate_transpose(x, shift, name="rotate_transpose"): Example: ```python - x = ... # Tensor of shape [1, 2, 3, 4]. + x = ... # Tensor of shape [1, 2, 3, 4]. rotate_transpose(x, -1) # result shape: [2, 3, 4, 1] rotate_transpose(x, -2) # result shape: [3, 4, 1, 2] rotate_transpose(x, 1) # result shape: [4, 1, 2, 3] @@ -321,7 +320,7 @@ def rotate_transpose(x, shift, name="rotate_transpose"): x: `Tensor`. shift: `Tensor`. Number of dimensions to transpose left (shift<0) or transpose right (shift>0). - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Returns: rotated_x: Input `Tensor` with dimensions circularly rotated by shift. @@ -363,7 +362,7 @@ def rotate_transpose(x, shift, name="rotate_transpose"): ndims - math_ops.mod(shift, ndims)) first = math_ops.range(0, shift) last = math_ops.range(shift, ndims) - perm = array_ops.concat((last, first), 0) + perm = array_ops.concat([last, first], 0) return array_ops.transpose(x, perm=perm) @@ -383,7 +382,7 @@ def pick_vector(cond, cond: `Tensor`. Must have `dtype=tf.bool` and be scalar. true_vector: `Tensor` of one dimension. Returned when cond is `True`. false_vector: `Tensor` of one dimension. Returned when cond is `False`. - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Example: @@ -419,7 +418,7 @@ def pick_vector(cond, false_vector.name, false_vector.dtype)) n = array_ops.shape(true_vector)[0] return array_ops.slice( - array_ops.concat((true_vector, false_vector), 0), + array_ops.concat([true_vector, false_vector], 0), [array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)]) @@ -438,13 +437,13 @@ def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e., `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`. - Although the non-batch complexity is O(n^2), large constants and sub-optimal + Although the non-batch complexity is O(n**2), large constants and sub-optimal vectorization means the complexity of this function is 5x slower than zeroing - out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This + out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This function becomes competitive only when several matmul/cholesky/etc ops can be - ellided in constructing the input. Example: wiring a fully connected layer as + ellided in constructing the input. Example: wiring a fully connected layer as a covariance matrix; this function reduces the final layer by 2x and possibly - reduces the network arch complexity considerably. In most cases it is better + reduces the network arch complexity considerably. In most cases it is better to simply build a full matrix and zero out the upper triangular elements, e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly construct a lower triangular. @@ -463,10 +462,10 @@ def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): Args: x: `Tensor` representing lower triangular elements. - validate_args: `Boolean`, default `False`. Whether to ensure the shape of - `x` can be mapped to a lower triangular matrix (controls non-static checks - only). - name: `String`. The name to give this op. + validate_args: Python `bool`, default `False`. Whether to ensure the shape + of `x` can be mapped to a lower triangular matrix (controls non-static + checks only). + name: Python `str`. The name to give this op. Returns: tril: `Tensor` with lower triangular elements filled from `x`. @@ -476,7 +475,7 @@ def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): lower triangular matrix. """ # TODO(jvdillon): Replace this code with dedicated op when it exists. - with ops.name_scope(name, values=(x,)): + with ops.name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") if (x.get_shape().ndims is not None and x.get_shape()[-1].value is not None): @@ -509,7 +508,7 @@ def tril_ids(n): ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter - # upper-triangular elements. The idea here is to compute the + # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 @@ -586,7 +585,7 @@ def softplus_inverse(x, name=None): # = Log[1 - exp{-x}] + x (3) # (2) is the "obvious" inverse, but (3) is more stable than (2) for large x. # For small x (e.g. x = 1e-10), (3) will become -inf since 1 - exp{-x} will - # be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0. + # be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0. # # In addition to the numerically stable derivation above, we clamp # small/large values to be congruent with the logic in: @@ -598,7 +597,7 @@ def softplus_inverse(x, name=None): # gradient of `where` behaves like `pred*pred_true + (1-pred)*pred_false` # thus an `inf` in an unselected path results in `0*inf=nan`. We are careful # to overwrite `x` with ones only when we will never actually use this - # value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`. + # value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`. threshold = np.log(np.finfo(x.dtype.as_numpy_dtype).eps) + 2. is_too_small = math_ops.less(x, np.exp(threshold)) is_too_large = math_ops.greater(x, -threshold) diff --git a/tensorflow/contrib/distributions/python/ops/exponential.py b/tensorflow/contrib/distributions/python/ops/exponential.py index 6bff48c31787de..0d49721e7eeea6 100644 --- a/tensorflow/contrib/distributions/python/ops/exponential.py +++ b/tensorflow/contrib/distributions/python/ops/exponential.py @@ -78,15 +78,15 @@ def __init__(self, Args: rate: Floating point tensor, equivalent to `1 / mean`. Must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() # Even though all statistics of are defined for valid inputs, this is not @@ -96,7 +96,7 @@ def __init__(self, with ops.name_scope(name, values=[rate]) as ns: self._rate = ops.convert_to_tensor(rate, name="rate") super(Exponential, self).__init__( - concentration=array_ops.ones((), dtype=self._rate.dtype), + concentration=array_ops.ones([], dtype=self._rate.dtype), rate=self._rate, allow_nan_stats=allow_nan_stats, validate_args=validate_args, @@ -116,13 +116,13 @@ def rate(self): return self._rate def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], array_ops.shape(self._rate)), 0) + shape = array_ops.concat([[n], array_ops.shape(self._rate)], 0) # Sample uniformly-at-random from the open-interval (0, 1). sampled = random_ops.random_uniform( shape, minval=np.nextafter(self.dtype.as_numpy_dtype(0.), self.dtype.as_numpy_dtype(1.)), - maxval=array_ops.ones((), dtype=self.dtype), + maxval=array_ops.ones([], dtype=self.dtype), seed=seed, dtype=self.dtype) return -math_ops.log(sampled) / self._rate diff --git a/tensorflow/contrib/distributions/python/ops/gamma.py b/tensorflow/contrib/distributions/python/ops/gamma.py index cec72f1ec665f0..a0c64b47aaf2b8 100644 --- a/tensorflow/contrib/distributions/python/ops/gamma.py +++ b/tensorflow/contrib/distributions/python/ops/gamma.py @@ -112,15 +112,15 @@ def __init__(self, distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `concentration` and `rate` are different dtypes. @@ -231,7 +231,7 @@ def _stddev(self): @distribution_util.AppendDocstring( """The mode of a gamma distribution is `(shape - 1) / rate` when - `shape > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, + `shape > 1`, and `NaN` otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`.""") def _mode(self): mode = (self.concentration - 1.) / self.rate diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index 8a445f87f13d0d..704a0021ba7f11 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -110,15 +110,15 @@ def __init__(self, loc: Floating point tensor, the means of the distribution(s). scale: Floating point tensor, the scales of the distribution(s). scale must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if loc and scale are different dtypes. @@ -129,7 +129,7 @@ def __init__(self, validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") - contrib_tensor_util.assert_same_float_dtype((self._loc, self._scale)) + contrib_tensor_util.assert_same_float_dtype([self._loc, self._scale]) super(_Gumbel, self).__init__( dtype=self._scale.dtype, is_continuous=True, @@ -171,7 +171,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], array_ops.shape(self.mean())), 0) + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) np_dtype = self.dtype.as_numpy_dtype # Uniform variates must be sampled from the interval (0,1] rather than # [0,1], as they are passed through log() to compute Gumbel variates. diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index 7b6700341ebb67..a74fb350d1706d 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -111,15 +111,15 @@ def __init__(self, distribution(s). Must contain only positive values. rate: Floating point tensor, the inverse scale params of the distribution(s). Must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: @@ -223,7 +223,7 @@ def _entropy(self): @distribution_util.AppendDocstring( """The mean of an inverse gamma distribution is `rate / (concentration - 1)`, when `concentration > 1`, and `NaN` - otherwise. If `self.allow_nan_stats` is `False`, an exception will be + otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`""") def _mean(self): mean = self.rate / (self.concentration - 1.) diff --git a/tensorflow/contrib/distributions/python/ops/kullback_leibler.py b/tensorflow/contrib/distributions/python/ops/kullback_leibler.py index 47411817bfe165..f24f01235a2072 100644 --- a/tensorflow/contrib/distributions/python/ops/kullback_leibler.py +++ b/tensorflow/contrib/distributions/python/ops/kullback_leibler.py @@ -66,7 +66,7 @@ def kl(dist_a, dist_b, allow_nan=False, name=None): dist_b: The second distribution. allow_nan: If `False` (default), a runtime error is raised if the KL returns NaN values for any batch entry of the given - distributions. If `True`, the KL may return a NaN for the given entry. + distributions. If `True`, the KL may return a NaN for the given entry. name: (optional) Name scope to use for created operations. Returns: @@ -80,7 +80,7 @@ def kl(dist_a, dist_b, allow_nan=False, name=None): if kl_fn is None: raise NotImplementedError( "No KL(dist_a || dist_b) registered for dist_a type %s and dist_b " - "type %s" % ((type(dist_a).__name__, type(dist_b).__name__))) + "type %s" % (type(dist_a).__name__, type(dist_b).__name__)) with ops.name_scope("KullbackLeibler"): kl_t = kl_fn(dist_a, dist_b, name=name) @@ -95,7 +95,7 @@ def kl(dist_a, dist_b, allow_nan=False, name=None): math_ops.logical_not( math_ops.reduce_any(math_ops.is_nan(kl_t))), ["KL calculation between %s and %s returned NaN values " - "(and was called with allow_nan=False). Values:" + "(and was called with allow_nan=False). Values:" % (dist_a.name, dist_b.name), kl_t])]): return array_ops.identity(kl_t, name="checked_kl") diff --git a/tensorflow/contrib/distributions/python/ops/laplace.py b/tensorflow/contrib/distributions/python/ops/laplace.py index c47b66620c723e..4951a41fd5509d 100644 --- a/tensorflow/contrib/distributions/python/ops/laplace.py +++ b/tensorflow/contrib/distributions/python/ops/laplace.py @@ -86,15 +86,15 @@ def __init__(self, of the distribution. scale: Positive floating point tensor which characterizes the spread of the distribution. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `loc` and `scale` are of different dtype. @@ -105,7 +105,7 @@ def __init__(self, validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") - contrib_tensor_util.assert_same_float_dtype((self._loc, self._scale)) + contrib_tensor_util.assert_same_float_dtype([self._loc, self._scale]) super(Laplace, self).__init__( dtype=self._loc.dtype, is_continuous=True, @@ -147,7 +147,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], self.batch_shape_tensor()), 0) + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) # Sample uniformly-at-random from the open-interval (-1, 1). uniform_samples = random_ops.random_uniform( shape=shape, diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 2fb00dbc581732..c9aab3dde117f4 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -108,13 +108,13 @@ def __init__(self, loc: Floating point tensor, the means of the distribution(s). scale: Floating point tensor, the scales of the distribution(s). Must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: The name to give Ops created by the initializer. @@ -127,7 +127,7 @@ def __init__(self, validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") - contrib_tensor_util.assert_same_float_dtype((self._loc, self._scale)) + contrib_tensor_util.assert_same_float_dtype([self._loc, self._scale]) super(Logistic, self).__init__( dtype=self._scale.dtype, is_continuous=True, @@ -169,7 +169,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], array_ops.shape(self.mean())), 0) + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) np_dtype = self.dtype.as_numpy_dtype minval = np.nextafter(np_dtype(0), np_dtype(1)) uniform = random_ops.random_uniform(shape=shape, diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index 2ba3d2546d4730..1c421c8fa09d98 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -66,13 +66,13 @@ def __init__(self, components: A list or tuple of `Distribution` instances. Each instance must have the same type, be defined on the same domain, and have matching `event_shape` and `batch_shape`. - validate_args: `Boolean`, default `False`. If `True`, raise a runtime + validate_args: Python `bool`, default `False`. If `True`, raise a runtime error if batch or event ranks are inconsistent between cat and any of - the distributions. This is only checked if the ranks cannot be + the distributions. This is only checked if the ranks cannot be determined statically at graph construction time. - allow_nan_stats: Boolean, default `True`. If `False`, raise an + allow_nan_stats: Boolean, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any - batch member. If `True`, batch members with valid parameters leading to + batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution (optional). @@ -265,7 +265,7 @@ def _sample_n(self, n, seed=None): else: event_shape = self.event_shape_tensor() - # Get indices into the raw cat sampling tensor. We will + # Get indices into the raw cat sampling tensor. We will # need these to stitch sample values back out after sampling # within the component partitions. samples_raw_indices = array_ops.reshape( @@ -315,7 +315,7 @@ def _sample_n(self, n, seed=None): # For sample s, batch element b of component c, we get the # partitioned batch indices from # partitioned_batch_indices[c]; and shift each element by - # the sample index. The final lookup can be thought of as + # the sample index. The final lookup can be thought of as # a matrix gather along locations (s, b) in # samples_class_c where the n_class rows correspond to # samples within this component and the batch_size columns @@ -329,7 +329,7 @@ def _sample_n(self, n, seed=None): partitioned_batch_indices[c]) samples_class_c = array_ops.reshape( samples_class_c, - array_ops.concat(([n_class * batch_size], event_shape), 0)) + array_ops.concat([[n_class * batch_size], event_shape], 0)) samples_class_c = array_ops.gather( samples_class_c, lookup_partitioned_batch_indices, name="samples_class_c_gather") @@ -340,8 +340,8 @@ def _sample_n(self, n, seed=None): indices=partitioned_samples_indices, data=samples_class) # Reshape back to proper sample, batch, and event shape. ret = array_ops.reshape(lhs_flat_ret, - array_ops.concat((samples_shape, - self.event_shape_tensor()), 0)) + array_ops.concat([samples_shape, + self.event_shape_tensor()], 0)) ret.set_shape( tensor_shape.TensorShape(static_samples_shape).concatenate( self.event_shape)) @@ -361,7 +361,7 @@ def entropy_lower_bound(self, name="entropy_lower_bound"): \\) where \\( p \\) is the prior distribution, \\( q \\) is the variational, - and \\( H[q] \\) is the entropy of \\( q \\). If there is a lower bound + and \\( H[q] \\) is the entropy of \\( q \\). If there is a lower bound \\( G[q] \\) such that \\( H[q] \geq G[q] \\) then it can be used in place of \\( H[q] \\). diff --git a/tensorflow/contrib/distributions/python/ops/multinomial.py b/tensorflow/contrib/distributions/python/ops/multinomial.py index bcf2acb715288a..de8a96a6dcc60b 100644 --- a/tensorflow/contrib/distributions/python/ops/multinomial.py +++ b/tensorflow/contrib/distributions/python/ops/multinomial.py @@ -136,7 +136,7 @@ def __init__(self, Args: total_count: Non-negative floating point tensor with shape broadcastable to `[N1,..., Nm]` with `m >= 0`. Defines this as a batch of - `N1 x ... x Nm` different Multinomial distributions. Its components + `N1 x ... x Nm` different Multinomial distributions. Its components should be equal to integer values. logits: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`, @@ -144,19 +144,19 @@ def __init__(self, `N1 x ... x Nm` different `k` class Multinomial distributions. Only one of `logits` or `probs` should be passed in. probs: Positive floating point tensor with shape broadcastable to - `[N1,..., Nm, k]` `m >= 0` and same dtype as `total_count`. Defines + `[N1,..., Nm, k]` `m >= 0` and same dtype as `total_count`. Defines this as a batch of `N1 x ... x Nm` different `k` class Multinomial distributions. `probs`'s components in the last portion of its shape should sum to `1`. Only one of `logits` or `probs` should be passed in. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[total_count, logits, probs]) as ns: @@ -169,7 +169,7 @@ def __init__(self, multidimensional=True, validate_args=validate_args, name=name) - self._mean_val = self._total_count[..., None] * self._probs + self._mean_val = self._total_count[..., array_ops.newaxis] * self._probs super(Multinomial, self).__init__( dtype=self._probs.dtype, is_continuous=False, @@ -229,7 +229,7 @@ def _sample_n(self, n, seed=None): seed=seed) draws = array_ops.reshape(draws, shape=[-1, n, n_draws]) x = math_ops.reduce_sum(array_ops.one_hot(draws, depth=k), - reduction_indices=-2) # shape: [B, n, k] + axis=-2) # shape: [B, n, k] x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) return array_ops.reshape(x, final_shape) @@ -254,13 +254,16 @@ def _mean(self): return array_ops.identity(self._mean_val) def _covariance(self): - p = self.probs * array_ops.ones_like(self.total_count)[..., None] + p = self.probs * array_ops.ones_like( + self.total_count)[..., array_ops.newaxis] return array_ops.matrix_set_diag( - -math_ops.matmul(self._mean_val[..., None], p[..., None, :]), + -math_ops.matmul(self._mean_val[..., array_ops.newaxis], + p[..., array_ops.newaxis, :]), # outer product self._variance()) def _variance(self): - p = self.probs * array_ops.ones_like(self.total_count)[..., None] + p = self.probs * array_ops.ones_like( + self.total_count)[..., array_ops.newaxis] return self._mean_val - self._mean_val * p def _maybe_assert_valid_total_count(self, total_count, validate_args): diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py index edc52517696945..d409f35fa52965 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py @@ -180,15 +180,15 @@ def __init__(self, `k x k` identity matrices added to `scale`. When both `scale_identity_multiplier` and `scale_diag` are `None` then `scale` is the `Identity`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index 51487cf3a32fee..9806839106ad08 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -199,18 +199,18 @@ def __init__(self, scale_perturb_diag: Floating-point `Tensor` representing a diagonal matrix inside the rank-`r` perturbation added to `scale`. May have shape `[B1, ..., Bb, r]`, `b >= 0`, and characterizes `b`-batches of `r x r` - diagonal matrices inside the perturbation added to `scale`. When + diagonal matrices inside the perturbation added to `scale`. When `None`, an identity matrix is used inside the perturbation. Can only be specified if `scale_perturb_factor` is also specified. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if at most `scale_identity_multiplier` is specified. diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index f6f26a0b1d22fe..d16d4aa2fbae89 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -160,10 +160,10 @@ def __init__(self, `b >= 0` and `k` is the event size. scale: Instance of `LinearOperator` with same `dtype` as `loc` and shape `[B1, ..., Bb, k, k]`. - validate_args: `Boolean`, default `False`. Whether to validate input + validate_args: Python `bool`, default `False`. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. - allow_nan_stats: `Boolean`, default `True`. If `False`, raise an + allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. @@ -324,7 +324,7 @@ def _kl_brute_force(a, b, name=None): This `Op` computes the trace by solving `C_b^{-1} C_a`. Although efficient methods for solving systems with `C_b` may be available, a dense version of - (the square root of) `C_a` is used, so performance is `O(B s k^2)` where `B` + (the square root of) `C_a` is used, so performance is `O(B s k**2)` where `B` is the batch size, and `s` is the cost of solving `C_b x = y` for vectors `x` and `y`. @@ -362,7 +362,7 @@ def is_diagonal(x): # tr[inv(Cb) Ca] = tr[inv(B)' inv(B) A A'] # = tr[inv(B) A A' inv(B)'] # = tr[(inv(B) A) (inv(B) A)'] - # = sum_{ij} (inv(B) A)_{ij}^2 + # = sum_{ij} (inv(B) A)_{ij}**2 # = ||inv(B) A||_F**2 # where ||.||_F is the Frobenius norm and the second equality follows from # the cyclic permutation property. diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index 8fdc0822c49def..f0ba05b0a8b3d2 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -161,15 +161,15 @@ def __init__(self, scale_tril: Floating-point, lower-triangular `Tensor` with non-zero diagonal elements. `scale_tril` has shape `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: if neither `loc` nor `scale_tril` are specified. diff --git a/tensorflow/contrib/distributions/python/ops/normal.py b/tensorflow/contrib/distributions/python/ops/normal.py index 52634d2ff4c9dd..770a81cf1af04a 100644 --- a/tensorflow/contrib/distributions/python/ops/normal.py +++ b/tensorflow/contrib/distributions/python/ops/normal.py @@ -117,15 +117,15 @@ def __init__(self, loc: Floating point tensor; the means of the distribution(s). scale: Floating point tensor; the stddevs of the distribution(s). Must contain only positive values. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if `loc` and `scale` have different `dtype`. @@ -136,7 +136,7 @@ def __init__(self, validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") - contrib_tensor_util.assert_same_float_dtype((self._loc, self._scale)) + contrib_tensor_util.assert_same_float_dtype([self._loc, self._scale]) super(Normal, self).__init__( dtype=self._scale.dtype, is_continuous=True, @@ -180,7 +180,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], array_ops.shape(self.mean())), 0) + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) sampled = random_ops.random_normal( shape=shape, mean=0., stddev=1., dtype=self.loc.dtype, seed=seed) return sampled * self.scale + self.loc diff --git a/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py index bb4970ae908a9e..10e934326a1f09 100644 --- a/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py +++ b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py @@ -28,7 +28,7 @@ def normal_conjugates_known_scale_posterior(prior, scale, s, n): This model assumes that `n` observations (with sum `s`) come from a Normal with unknown mean `loc` (described by the Normal `prior`) - and known variance `scale^2`. The "known scale posterior" is + and known variance `scale**2`. The "known scale posterior" is the distribution of the unknown `loc`. Accepts a prior Normal distribution object, having parameters @@ -38,12 +38,12 @@ def normal_conjugates_known_scale_posterior(prior, scale, s, n): `n` (the number(s) of observations). Returns a posterior (also Normal) distribution object, with parameters - `(loc', scale'^2)`, where: + `(loc', scale'**2)`, where: ``` - mu ~ N(mu', sigma'^2) - sigma'^2 = 1/(1/sigma0^2 + n/sigma^2), - mu' = (mu0/sigma0^2 + s/sigma^2) * sigma'^2. + mu ~ N(mu', sigma'**2) + sigma'**2 = 1/(1/sigma0**2 + n/sigma**2), + mu' = (mu0/sigma0**2 + s/sigma**2) * sigma'**2. ``` Distribution parameters from `prior`, as well as `scale`, `s`, and `n`. @@ -54,8 +54,8 @@ def normal_conjugates_known_scale_posterior(prior, scale, s, n): the prior distribution having parameters `(loc0, scale0)`. scale: tensor of type `dtype`, taking values `scale > 0`. The known stddev parameter(s). - s: Tensor of type `dtype`. The sum(s) of observations. - n: Tensor of type `int`. The number(s) of observations. + s: Tensor of type `dtype`. The sum(s) of observations. + n: Tensor of type `int`. The number(s) of observations. Returns: A new Normal posterior distribution object for the unknown observation @@ -87,7 +87,7 @@ def normal_conjugates_known_scale_predictive(prior, scale, s, n): This model assumes that `n` observations (with sum `s`) come from a Normal with unknown mean `loc` (described by the Normal `prior`) - and known variance `scale^2`. The "known scale predictive" + and known variance `scale**2`. The "known scale predictive" is the distribution of new observations, conditioned on the existing observations and our prior. @@ -97,20 +97,20 @@ def normal_conjugates_known_scale_predictive(prior, scale, s, n): and statistical estimates `s` (the sum(s) of the observations) and `n` (the number(s) of observations). - Calculates the Normal distribution(s) `p(x | sigma^2)`: + Calculates the Normal distribution(s) `p(x | sigma**2)`: ``` - p(x | sigma^2) = int N(x | mu, sigma^2) N(mu | prior.loc, prior.scale**2) dmu - = N(x | prior.loc, 1/(sigma^2 + prior.scale**2)) + p(x | sigma**2) = int N(x | mu, sigma**2)N(mu | prior.loc, prior.scale**2) dmu + = N(x | prior.loc, 1 / (sigma**2 + prior.scale**2)) ``` Returns the predictive posterior distribution object, with parameters - `(loc', scale'^2)`, where: + `(loc', scale'**2)`, where: ``` - sigma_n^2 = 1/(1/sigma0^2 + n/sigma^2), - mu' = (mu0/sigma0^2 + s/sigma^2) * sigma_n^2. - sigma'^2 = sigma_n^2 + sigma^2, + sigma_n**2 = 1/(1/sigma0**2 + n/sigma**2), + mu' = (mu0/sigma0**2 + s/sigma**2) * sigma_n**2. + sigma'**2 = sigma_n**2 + sigma**2, ``` Distribution parameters from `prior`, as well as `scale`, `s`, and `n`. @@ -121,8 +121,8 @@ def normal_conjugates_known_scale_predictive(prior, scale, s, n): the prior distribution having parameters `(loc0, scale0)`. scale: tensor of type `dtype`, taking values `scale > 0`. The known stddev parameter(s). - s: Tensor of type `dtype`. The sum(s) of observations. - n: Tensor of type `int`. The number(s) of observations. + s: Tensor of type `dtype`. The sum(s) of observations. + n: Tensor of type `int`. The number(s) of observations. Returns: A new Normal predictive distribution object. diff --git a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py index 22f0d6d35f7924..7ebc48f004665c 100644 --- a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py @@ -43,11 +43,11 @@ class OneHotCategorical(distribution.Distribution): K is the number of classes. This class provides methods to create indexed batches of OneHotCategorical - distributions. If the provided `logits` or `probs` is rank 2 or higher, for + distributions. If the provided `logits` or `probs` is rank 2 or higher, for every fixed set of leading dimensions, the last dimension represents one - single OneHotCategorical distribution. When calling distribution + single OneHotCategorical distribution. When calling distribution functions (e.g. `dist.prob(x)`), `logits` and `x` are broadcast to the - same shape (if possible). In all cases, the last dimension of `logits,x` + same shape (if possible). In all cases, the last dimension of `logits,x` represents single OneHotCategorical distributions. #### Examples @@ -105,15 +105,15 @@ def __init__( vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. dtype: The type of the event samples (default: int32). - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[logits, probs]) as ns: @@ -173,7 +173,7 @@ def _event_shape(self): return self.logits.get_shape().with_rank_at_least(1)[-1:] def _sample_n(self, n, seed=None): - sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0) + sample_shape = array_ops.concat([[n], array_ops.shape(self.logits)], 0) logits = self.logits if logits.get_shape().ndims == 2: logits_2d = logits @@ -231,8 +231,8 @@ def _assert_valid_sample(self, x): return control_flow_ops.with_dependencies([ check_ops.assert_non_positive(x), distribution_util.assert_close( - array_ops.zeros((), dtype=self.dtype), - math_ops.reduce_logsumexp(x, reduction_indices=[-1])), + array_ops.zeros([], dtype=self.dtype), + math_ops.reduce_logsumexp(x, axis=[-1])), ], x) diff --git a/tensorflow/contrib/distributions/python/ops/poisson.py b/tensorflow/contrib/distributions/python/ops/poisson.py index e1ddc9a0e18b9d..64904824c1df77 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson.py +++ b/tensorflow/contrib/distributions/python/ops/poisson.py @@ -70,15 +70,15 @@ def __init__(self, Args: rate: Floating point tensor, the rate parameter of the distribution(s). `rate` must be positive. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[rate]) as ns: diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py index 7466b1c22091ad..1ee77c05e4b4a2 100644 --- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py @@ -38,8 +38,9 @@ def _logsum_expbig_minus_expsmall(big, small): To work correctly, we should have the pointwise relation: `small <= big`. Args: - big: Numeric `Tensor` - small: Numeric `Tensor` with same `dtype` as `big` and broadcastable shape. + big: Floating-point `Tensor` + small: Floating-point `Tensor` with same `dtype` as `big` and broadcastable + shape. Returns: `Tensor` of same `dtype` of `big` and broadcast shape. @@ -61,14 +62,14 @@ def _logsum_expbig_minus_expsmall(big, small): """ _prob_note = _prob_base_note + """ -The base distribution's `cdf` method must be defined on `y - 1`. If the +The base distribution's `cdf` method must be defined on `y - 1`. If the base distribution has a `survival_function` method, results will be more accurate for large values of `y`, and in this case the `survival_function` must also be defined on `y - 1`. """ _log_prob_note = _prob_base_note + """ -The base distribution's `log_cdf` method must be defined on `y - 1`. If the +The base distribution's `log_cdf` method must be defined on `y - 1`. If the base distribution has a `log_survival_function` method results will be more accurate for large values of `y`, and in this case the `log_survival_function` must also be defined on `y - 1`. @@ -194,19 +195,19 @@ def __init__(self, distribution: The base distribution class to transform. Typically an instance of `Distribution`. low: `Tensor` with same `dtype` as this distribution and shape - able to be added to samples. Should be a whole number. Default `None`. + able to be added to samples. Should be a whole number. Default `None`. If provided, base distribution's `prob` should be defined at `low`. high: `Tensor` with same `dtype` as this distribution and shape - able to be added to samples. Should be a whole number. Default `None`. + able to be added to samples. Should be a whole number. Default `None`. If provided, base distribution's `prob` should be defined at `high - 1`. `high` must be strictly greater than `low`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: If `dist_cls` is not a subclass of diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py index b257b64c7e908a..55e87fda152133 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py @@ -151,15 +151,15 @@ def __init__(self, probs: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. Only one of `logits` or `probs` should be passed in. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: ValueError: If both `probs` and `logits` are passed, or if neither. diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index bd22b4d25ac5ac..0e52743e1ce9cd 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -151,15 +151,15 @@ def __init__( the last dimension represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. dtype: The type of the event samples (default: int32). - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[logits, probs, temperature]) as ns: @@ -230,7 +230,7 @@ def _event_shape(self): return self.logits.get_shape().with_rank_at_least(1)[-1:] def _sample_n(self, n, seed=None): - sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0) + sample_shape = array_ops.concat([[n], array_ops.shape(self.logits)], 0) logits = self.logits * array_ops.ones(sample_shape) logits_2d = array_ops.reshape(logits, [-1, self.event_size]) np_dtype = self.dtype.as_numpy_dtype @@ -238,8 +238,8 @@ def _sample_n(self, n, seed=None): # Uniform variates must be sampled from the interval (0,1] rather than # [0,1], as they are passed through log() to compute Gumbel variates. # We need to use np.finfo(np_dtype).tiny because it is the smallest, - # positive, "normal" number. A "normal" number is such that the mantissa - # has an implicit leading 1. Normal, positive numbers x, y have the + # positive, "normal" number. A "normal" number is such that the mantissa + # has an implicit leading 1. Normal, positive numbers x, y have the # reasonable property that: x + y >= max(x, y). # minval=np.nextafter(np.float32(0),1)) can cause # tf.random_uniform(dtype=tf.float32) to sample 0. @@ -290,8 +290,8 @@ def _assert_valid_sample(self, x): return control_flow_ops.with_dependencies([ check_ops.assert_non_positive(x), distribution_util.assert_close( - array_ops.zeros((), dtype=self.dtype), - math_ops.reduce_logsumexp(x, reduction_indices=[-1])), + array_ops.zeros([], dtype=self.dtype), + math_ops.reduce_logsumexp(x, axis=[-1])), ], x) @@ -394,9 +394,9 @@ def __init__( of `logits` or `probs` should be passed in. dtype: The type of the event samples (default: int32). validate_args: Unused in this distribution. - allow_nan_stats: `Boolean`, default `True`. If `False`, raise an + allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any - batch member. If `True`, batch members with valid parameters leading to + batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution (optional). """ diff --git a/tensorflow/contrib/distributions/python/ops/shape.py b/tensorflow/contrib/distributions/python/ops/shape.py index 6e47df34c49aa4..90acd14a1f0045 100644 --- a/tensorflow/contrib/distributions/python/ops/shape.py +++ b/tensorflow/contrib/distributions/python/ops/shape.py @@ -39,7 +39,7 @@ class _DistributionShape(object): - `dims`: indexes into `shape`; useful for transpose, reduce. `Tensor`s sampled from a `Distribution` can be partitioned by `sample_dims`, - `batch_dims`, and `event_dims`. To understand the semantics of these + `batch_dims`, and `event_dims`. To understand the semantics of these dimensions, consider when two of the three are fixed and the remaining is varied: - `sample_dims`: indexes independent draws from identical @@ -78,7 +78,7 @@ class _DistributionShape(object): ```python sample_dims = [0] tf.reduce_mean(Normal(loc=1.3, scale=1.).sample_n(1000), - reduction_indices=sample_dims) # ~= 1.3 + axis=sample_dims) # ~= 1.3 ``` - Batch dimensions: @@ -93,13 +93,13 @@ class _DistributionShape(object): ~= 1/n sum_{i=1}^n P(X=x|y_i), y_i ~iid Laplace(0,1) = tf.reduce_mean(Normal(loc=Laplace(0., 1.).sample_n(n=1000), scale=tf.ones(1000)).prob(x), - reduction_indices=batch_dims) + axis=batch_dims) ``` The `Laplace` distribution generates a `Tensor` of shape `[1000]`. When fed to a `Normal`, this is interpreted as 1000 different locations, i.e., - 1000 non-identical Normals. Therefore a single call to `prob(x)` yields - 1000 probabilities, one for every location. The average over this batch + 1000 non-identical Normals. Therefore a single call to `prob(x)` yields + 1000 probabilities, one for every location. The average over this batch yields the marginal. - Event dimensions: @@ -139,8 +139,8 @@ class _DistributionShape(object): # E = [2, 2] # 100 iid samples from two, non-identical trivariate Normal distributions. - mu = ... # shape(2, 3) - sigma = ... # shape(2, 3, 3) + mu = ... # shape(2, 3) + sigma = ... # shape(2, 3, 3) X = MultivariateNormal(mu, sigma).sample(shape=[4, 25]) # S = [4, 25] # B = [2] @@ -154,7 +154,7 @@ class _DistributionShape(object): For example, when `validate_args=False` and `event_ndims` is a non-constant `Tensor`, it is checked to be a non-negative integer at graph - execution. (Same for `batch_ndims`). Constant `Tensor`s and non-`Tensor` + execution. (Same for `batch_ndims`). Constant `Tensor`s and non-`Tensor` arguments are always checked for correctness since this can be done for "free," i.e., during graph construction. """ @@ -167,7 +167,7 @@ def __init__(self, """Construct `DistributionShape` with fixed `batch_ndims`, `event_ndims`. `batch_ndims` and `event_ndims` are fixed throughout the lifetime of a - `Distribution`. They may only be known at graph execution. + `Distribution`. They may only be known at graph execution. If both `batch_ndims` and `event_ndims` are python scalars (rather than either being a `Tensor`), functions in this class automatically perform @@ -175,16 +175,16 @@ def __init__(self, Args: batch_ndims: `Tensor`. Number of `dims` (`rank`) of the batch portion of - indexes of a `Tensor`. A "batch" is a non-identical distribution, i.e, + indexes of a `Tensor`. A "batch" is a non-identical distribution, i.e, Normal with different parameters. event_ndims: `Tensor`. Number of `dims` (`rank`) of the event portion of indexes of a `Tensor`. An "event" is what is sampled from a distribution, i.e., a trivariate Normal has an event shape of [3] and a 4 dimensional Wishart has an event shape of [4, 4]. - validate_args: `Boolean`, default `False`. When `True`, non-`tf.constant` - `Tensor` arguments are checked for correctness. (`tf.constant` - arguments are always checked.) - name: `String`. The name prepended to Ops created by this class. + validate_args: Python `bool`, default `False`. When `True`, + non-`tf.constant` `Tensor` arguments are checked for correctness. + (`tf.constant` arguments are always checked.) + name: Python `str`. The name prepended to Ops created by this class. Raises: ValueError: if either `batch_ndims` or `event_ndims` are: `None`, @@ -234,7 +234,7 @@ def get_ndims(self, x, name="get_ndims"): Args: x: `Tensor`. - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Returns: ndims: Scalar number of dimensions associated with a `Tensor`. @@ -251,7 +251,7 @@ def get_sample_ndims(self, x, name="get_sample_ndims"): Args: x: `Tensor`. - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Returns: sample_ndims: `Tensor` (0D, `int32`). @@ -285,7 +285,7 @@ def get_dims(self, x, name="get_dims"): Example: ```python - x = ... # Tensor with shape [4, 3, 2, 1] + x = ... # Tensor with shape [4, 3, 2, 1] sample_dims, batch_dims, event_dims = _DistributionShape( batch_ndims=2, event_ndims=1).get_dims(x) # sample_dims == [0] @@ -296,7 +296,7 @@ def get_dims(self, x, name="get_dims"): Args: x: `Tensor`. - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Returns: sample_dims: `Tensor` (1D, `int32`). @@ -306,8 +306,8 @@ def get_dims(self, x, name="get_dims"): with self._name_scope(name, values=[x]): def make_dims(start_sum, size, name): """Closure to make dims range.""" - start_sum = start_sum if start_sum else ( - array_ops.zeros((), dtype=dtypes.int32, name="zero"),) + start_sum = start_sum if start_sum else [ + array_ops.zeros([], dtype=dtypes.int32, name="zero")] if self._is_all_constant_helper(size, *start_sum): start = sum(tensor_util.constant_value(s) for s in start_sum) stop = start + tensor_util.constant_value(size) @@ -317,9 +317,9 @@ def make_dims(start_sum, size, name): start = sum(start_sum) return math_ops.range(start, start + size) sample_ndims = self.get_sample_ndims(x, name=name) - return (make_dims((), sample_ndims, name="sample_dims"), - make_dims((sample_ndims,), self.batch_ndims, name="batch_dims"), - make_dims((sample_ndims, self.batch_ndims), + return (make_dims([], sample_ndims, name="sample_dims"), + make_dims([sample_ndims], self.batch_ndims, name="batch_dims"), + make_dims([sample_ndims, self.batch_ndims], self.event_ndims, name="event_dims")) def get_shape(self, x, name="get_shape"): @@ -327,7 +327,7 @@ def get_shape(self, x, name="get_shape"): Args: x: `Tensor`. - name: `String`. The name to give this op. + name: Python `str`. The name to give this op. Returns: sample_shape: `Tensor` (1D, `int32`). @@ -338,8 +338,8 @@ def get_shape(self, x, name="get_shape"): x = ops.convert_to_tensor(x, name="x") def slice_shape(start_sum, size, name): """Closure to slice out shape.""" - start_sum = start_sum if start_sum else ( - array_ops.zeros((), dtype=dtypes.int32, name="zero"),) + start_sum = start_sum if start_sum else [ + array_ops.zeros([], dtype=dtypes.int32, name="zero")] if (x.get_shape().ndims is not None and self._is_all_constant_helper(size, *start_sum)): start = sum(tensor_util.constant_value(s) for s in start_sum) @@ -347,14 +347,13 @@ def slice_shape(start_sum, size, name): slice_ = x.get_shape()[start:stop].as_list() if all(s is not None for s in slice_): return ops.convert_to_tensor(slice_, dtype=dtypes.int32, name=name) - # Fall-through intended. - return array_ops.slice(array_ops.shape(x), (sum(start_sum),), (size,)) + return array_ops.slice(array_ops.shape(x), [sum(start_sum)], [size]) sample_ndims = self.get_sample_ndims(x, name=name) - return (slice_shape((), sample_ndims, + return (slice_shape([], sample_ndims, name="sample_shape"), - slice_shape((sample_ndims,), self.batch_ndims, + slice_shape([sample_ndims], self.batch_ndims, name="batch_shape"), - slice_shape((sample_ndims, self.batch_ndims), self.event_ndims, + slice_shape([sample_ndims, self.batch_ndims], self.event_ndims, name="event_shape")) # TODO(jvdillon): Make remove expand_batch_dim and make expand_batch_dim=False @@ -371,9 +370,9 @@ def make_batch_of_event_sample_matrices( Args: x: `Tensor`. - expand_batch_dim: Python `Boolean` scalar. If `True` the batch dims will - be expanded such that batch_ndims>=1. - name: `String`. The name to give this op. + expand_batch_dim: Python `bool`. If `True` the batch dims will be expanded + such that `batch_ndims >= 1`. + name: Python `str`. The name to give this op. Returns: x: `Tensor`. Input transposed/reshaped to `B_+E_+S_`. @@ -412,9 +411,9 @@ def undo_make_batch_of_event_sample_matrices( Args: x: `Tensor` of shape `B_+E_+S_`. sample_shape: `Tensor` (1D, `int32`). - expand_batch_dim: Python `Boolean` scalar. If `True` the batch dims will - be expanded such that batch_ndims>=1. - name: `String`. The name to give this op. + expand_batch_dim: Python `bool`. If `True` the batch dims will be expanded + such that `batch_ndims>=1`. + name: Python `str`. The name to give this op. Returns: x: `Tensor`. Input transposed/reshaped to `S+B+E`. diff --git a/tensorflow/contrib/distributions/python/ops/special_math.py b/tensorflow/contrib/distributions/python/ops/special_math.py index bc0a8c2b273bf8..e5e5e1963e063f 100644 --- a/tensorflow/contrib/distributions/python/ops/special_math.py +++ b/tensorflow/contrib/distributions/python/ops/special_math.py @@ -45,7 +45,7 @@ # Upper bound values were chosen by examining for which values of 'x' # Log[cdf(x)] is 0, after which point we need to use the approximation -# Log[cdf(x)] = Log[1 - cdf(-x)] approx -cdf(-x). We chose a value slightly +# Log[cdf(x)] = Log[1 - cdf(-x)] approx -cdf(-x). We chose a value slightly # conservative, meaning we use the approximation earlier than needed. LOGNDTR_FLOAT64_UPPER = 8 LOGNDTR_FLOAT32_UPPER = 5 @@ -59,7 +59,7 @@ def ndtr(x, name="ndtr"): ``` 1 / x - ndtr(x) = ---------- | exp(-0.5 t^2) dt + ndtr(x) = ---------- | exp(-0.5 t**2) dt sqrt(2 pi) /-inf = 0.5 (1 + erf(x / sqrt(2))) @@ -106,7 +106,7 @@ def log_ndtr(x, series_order=3, name="log_ndtr"): For details of the Normal distribution function see `ndtr`. This function calculates `(log o ndtr)(x)` by either calling `log(ndtr(x))` or - using an asymptotic series. Specifically: + using an asymptotic series. Specifically: - For `x > upper_segment`, use the approximation `-ndtr(-x)` based on `log(1-x) ~= -x, x << 1`. - For `lower_segment < x <= upper_segment`, use the existing `ndtr` technique @@ -127,19 +127,19 @@ def log_ndtr(x, series_order=3, name="log_ndtr"): ``` ndtr(x) = scale * (1 + sum) + R_N - scale = exp(-0.5 x^2) / (-x sqrt(2 pi)) - sum = Sum{(-1)^n (2n-1)!! / (x^2)^n, n=1:N} - R_N = O(exp(-0.5 x^2) (2N+1)!! / |x|^{2N+3}) + scale = exp(-0.5 x**2) / (-x sqrt(2 pi)) + sum = Sum{(-1)^n (2n-1)!! / (x**2)^n, n=1:N} + R_N = O(exp(-0.5 x**2) (2N+1)!! / |x|^{2N+3}) ``` - where `(2n-1)!! = (2n-1) (2n-3) (2n-5) ... (3) (1)` is a + where `(2n-1)!! = (2n-1) (2n-3) (2n-5) ... (3) (1)` is a [double-factorial](https://en.wikipedia.org/wiki/Double_factorial). Args: x: `Tensor` of type `float32`, `float64`. series_order: Positive Python `integer`. Maximum depth to - evaluate the asymptotic expansion. This is the `N` above. + evaluate the asymptotic expansion. This is the `N` above. name: Python string. A name for the operation (default="log_ndtr"). Returns: @@ -176,7 +176,7 @@ def log_ndtr(x, series_order=3, name="log_ndtr"): # which extends the range of validity of this function. # * We use one fixed series_order for all of 'x', rather than adaptive. # * Our docstring properly reflects that this is an asymptotic series, not a - # Tayor series. We also provided a correct bound on the remainder. + # Taylor series. We also provided a correct bound on the remainder. # * We need to use the max/min in the _log_ndtr_lower arg to avoid nan when # x=0. This happens even though the branch is unchosen because when x=0 # the gradient of a select involves the calculation 1*dy+0*(-inf)=nan @@ -262,7 +262,7 @@ def log_cdf_laplace(x, name="log_cdf_laplace"): # exp{-x} --> inf, for x << -1 safe_exp_neg_x = math_ops.exp(-math_ops.abs(x)) - # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used + # log1p(z) = log(1 + z) approx z for |z| << 1. This approxmation is used # internally by log1p, rather than being done explicitly here. upper_solution = math_ops.log1p(-0.5 * safe_exp_neg_x) diff --git a/tensorflow/contrib/distributions/python/ops/student_t.py b/tensorflow/contrib/distributions/python/ops/student_t.py index 2d097ff95fff80..24db27a2f5233e 100644 --- a/tensorflow/contrib/distributions/python/ops/student_t.py +++ b/tensorflow/contrib/distributions/python/ops/student_t.py @@ -74,7 +74,7 @@ class StudentT(distribution.Distribution): ``` Notice that `scale` has semantics more similar to standard deviation than - variance. However it is not actually the std. deviation; the Student's + variance. However it is not actually the std. deviation; the Student's t-distribution std. dev. is `scale sqrt(df / (df - 2))` when `df > 2`. #### Examples @@ -134,22 +134,22 @@ def __init__(self, supports broadcasting (e.g. `df + loc + scale` is a valid operation). Args: - df: Numeric `Tensor`. The degrees of freedom of the distribution(s). - `df` must contain only positive values. - loc: Numeric `Tensor`. The mean(s) of the distribution(s). - scale: Numeric `Tensor`. The scaling factor(s) for the distribution(s). - Note that `scale` is not technically the standard deviation of this - distribution but has semantics more similar to standard deviation than - variance. - validate_args: Python `Boolean`, default `False`. When `True` distribution + df: Floating-point `Tensor`. The degrees of freedom of the + distribution(s). `df` must contain only positive values. + loc: Floating-point `Tensor`. The mean(s) of the distribution(s). + scale: Floating-point `Tensor`. The scaling factor(s) for the + distribution(s). Note that `scale` is not technically the standard + deviation of this distribution but has semantics more similar to + standard deviation than variance. + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if loc and scale are different dtypes. @@ -257,8 +257,9 @@ def _cdf(self, x): return array_ops.where(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf) def _entropy(self): - v = array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)[..., None] - u = v * self.df[..., None] + v = array_ops.ones(self.batch_shape_tensor(), + dtype=self.dtype)[..., array_ops.newaxis] + u = v * self.df[..., array_ops.newaxis] beta_arg = array_ops.concat([u, v], -1) / 2. return (math_ops.log(math_ops.abs(self.scale)) + 0.5 * math_ops.log(self.df) + @@ -269,7 +270,7 @@ def _entropy(self): @distribution_util.AppendDocstring( """The mean of Student's T equals `loc` if `df > 1`, otherwise it is - `NaN`. If `self.allow_nan_stats=True`, then an exception will be raised + `NaN`. If `self.allow_nan_stats=True`, then an exception will be raised rather than returning `NaN`.""") def _mean(self): mean = self.loc * array_ops.ones(self.batch_shape_tensor(), @@ -286,7 +287,7 @@ def _mean(self): return control_flow_ops.with_dependencies( [ check_ops.assert_less( - array_ops.ones((), dtype=self.dtype), + array_ops.ones([], dtype=self.dtype), self.df, message="mean not defined for components of df <= 1"), ], @@ -329,7 +330,7 @@ def _variance(self): return control_flow_ops.with_dependencies( [ check_ops.assert_less( - array_ops.ones((), dtype=self.dtype), + array_ops.ones([], dtype=self.dtype), self.df, message="variance not defined for components of df <= 1"), ], diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py index 058eaa0ade0cfc..067e96a18cf242 100644 --- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py @@ -139,7 +139,7 @@ class TransformedDistribution(distributions.Distribution): Write `cdf(Y=y)` for an absolutely continuous cumulative distribution function of random variable `Y`; write the probability density function `pdf(Y=y) := d^k / (dy_1,...,dy_k) cdf(Y=y)` for its derivative wrt to `Y` evaluated at - `y`. Assume that `Y = g(X)` where `g` is a deterministic diffeomorphism, + `y`. Assume that `Y = g(X)` where `g` is a deterministic diffeomorphism, i.e., a non-random, continuous, differentiable, and invertible function. Write the inverse of `g` as `X = g^{-1}(Y)` and `(J o g)(x)` for the Jacobian of `g` evaluated at `x`. @@ -214,7 +214,7 @@ class TransformedDistribution(distributions.Distribution): forward_fn=tf.exp, inverse_fn=tf.log, inverse_log_det_jacobian_fn=( - lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), + lambda y: -tf.reduce_sum(tf.log(y), axis=-1)), name="LogNormalTransformedDistribution") ``` @@ -230,7 +230,7 @@ class TransformedDistribution(distributions.Distribution): A `TransformedDistribution`'s batch- and event-shape are implied by the base distribution unless explicitly overridden by `batch_shape` or `event_shape` - arguments. Specifying an overriding `batch_shape` (`event_shape`) is + arguments. Specifying an overriding `batch_shape` (`event_shape`) is permitted only if the base distribution has scalar batch-shape (event-shape). The bijector is applied to the distribution as if the distribution possessed the overridden shape(s). The following example demonstrates how to construct a @@ -275,11 +275,11 @@ def __init__(self, `batch_shape`; valid only if `distribution.is_scalar_batch()`. event_shape: `integer` vector `Tensor` which overrides `distribution` `event_shape`; valid only if `distribution.is_scalar_event()`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - name: `String` name prefixed to Ops created by this class. Default: + name: Python `str` name prefixed to Ops created by this class. Default: `bijector.name + distribution.name`. """ parameters = locals() @@ -318,7 +318,7 @@ def __init__(self, # To convert a scalar distribution into a multivariate distribution we # will draw dims from the sample dims, which are otherwise iid. This is # easy to do except in the case that the base distribution has batch dims - # and we're overriding event shape. When that case happens the event dims + # and we're overriding event shape. When that case happens the event dims # will incorrectly be to the left of the batch dims. In this case we'll # cyclically permute left the new dims. self._needs_rotation = _logical_and( @@ -367,7 +367,7 @@ def _event_shape_tensor(self): def _event_shape(self): # If there's a chance that the event_shape has been overriden, we return - # what we statically know about the `event_shape_override`. This works + # what we statically know about the `event_shape_override`. This works # because: `_is_maybe_event_override` means `static_override` is `None` or a # non-empty list, i.e., we don't statically know the `event_shape` or we do. # @@ -388,12 +388,12 @@ def _batch_shape_tensor(self): def _batch_shape(self): # If there's a chance that the batch_shape has been overriden, we return - # what we statically know about the `batch_shape_override`. This works + # what we statically know about the `batch_shape_override`. This works # because: `_is_maybe_batch_override` means `static_override` is `None` or a # non-empty list, i.e., we don't statically know the `batch_shape` or we do. # # Notice that this implementation parallels the `_event_shape` except that - # the `bijector` doesn't get to alter the `batch_shape`. Recall that + # the `bijector` doesn't get to alter the `batch_shape`. Recall that # `batch_shape` is a property of a distribution while `event_shape` is # shared between both the `distribution` instance and the `bijector`. static_override = tensor_util.constant_value(self._override_batch_shape) diff --git a/tensorflow/contrib/distributions/python/ops/uniform.py b/tensorflow/contrib/distributions/python/ops/uniform.py index 80cc9c30155c00..1465bd81fb0fa2 100644 --- a/tensorflow/contrib/distributions/python/ops/uniform.py +++ b/tensorflow/contrib/distributions/python/ops/uniform.py @@ -87,15 +87,15 @@ def __init__(self, have `low < high`. high: Floating point tensor, upper boundary of the output interval. Must have `low < high`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: InvalidArgumentError: if `low >= high` and `validate_args=False`. @@ -158,7 +158,7 @@ def _event_shape(self): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - shape = array_ops.concat(([n], self.batch_shape_tensor()), 0) + shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) samples = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py index b7df4285a196b0..89128e66b7df3c 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py +++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py @@ -141,7 +141,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): [Student's t-distributions]( https://en.wikipedia.org/wiki/Student%27s_t-distribution) and should not be confused with the [Multivate Student's t-distribution]( - https://en.wikipedia.org/wiki/Multivariate_t-distribution). The + https://en.wikipedia.org/wiki/Multivariate_t-distribution). The traditional Multivariate Student's t-distribution is type of [elliptical distribution]( https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF: @@ -215,42 +215,39 @@ def __init__(self, The `event_shape` is the event shape of `Affine.event_shape`. Args: - df: Numeric `Tensor`. The degrees of freedom of the distribution(s). - `df` must contain only positive values. - Must be scalar if `loc`, `scale_*` imply non-scalar batch_shape or - must have the same `batch_shape` implied by `loc`, `scale_*`. - loc: Numeric `Tensor`. If this is set to `None`, no `loc` is applied. + df: Floating-point `Tensor`. The degrees of freedom of the + distribution(s). `df` must contain only positive values. Must be + scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the + same `batch_shape` implied by `loc`, `scale_*`. + loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is + applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a - scaling done to the identity matrix. - When `scale_identity_multiplier = scale_diag=scale_tril = None` then - `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added - to `scale`. - scale_diag: Numeric `Tensor` representing the diagonal matrix. - `scale_diag` has shape [N1, N2, ... k], which represents a k x k - diagonal matrix. - When `None` no diagonal term is added to `scale`. - scale_tril: Numeric `Tensor` representing the diagonal matrix. - `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k - lower triangular matrix. - When `None` no `scale_tril` term is added to `scale`. - The upper triangular elements above the diagonal are ignored. - scale_perturb_factor: Numeric `Tensor` representing factor matrix with - last two dimensions of shape `(k, r)`. - When `None`, no rank-r update is added to `scale`. - scale_perturb_diag: Numeric `Tensor` representing the diagonal matrix. - `scale_perturb_diag` has shape [N1, N2, ... r], which represents an - r x r Diagonal matrix. - When `None` low rank updates will take the form `scale_perturb_factor * - scale_perturb_factor.T`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + scaling done to the identity matrix. When `scale_identity_multiplier = + scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise + no scaled-identity-matrix is added to `scale`. + scale_diag: Floating-point `Tensor` representing the diagonal matrix. + `scale_diag` has shape [N1, N2, ..., k], which represents a k x k + diagonal matrix. When `None` no diagonal term is added to `scale`. + scale_tril: Floating-point `Tensor` representing the diagonal matrix. + `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k + lower triangular matrix. When `None` no `scale_tril` term is added to + `scale`. The upper triangular elements above the diagonal are ignored. + scale_perturb_factor: Floating-point `Tensor` representing factor matrix + with last two dimensions of shape `(k, r)`. When `None`, no rank-r + update is added to `scale`. + scale_perturb_diag: Floating-point `Tensor` representing the diagonal + matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which + represents an r x r Diagonal matrix. When `None` low rank updates will + take the form `scale_perturb_factor * scale_perturb_factor.T`. + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to - indicate the result is undefined. When `False`, an exception is raised + indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() graph_parents = [df, loc, scale_identity_multiplier, scale_diag, diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index 911c951668f076..aec84a073b60ba 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -87,20 +87,20 @@ def __init__(self, df: `float` or `double` tensor, the degrees of freedom of the distribution(s). `df` must be greater than or equal to `k`. scale_operator_pd: `float` or `double` instance of `OperatorPDBase`. - cholesky_input_output_matrices: `Boolean`. Any function which whose input - or output is a matrix assumes the input is Cholesky and returns a + cholesky_input_output_matrices: Python `bool`. Any function which whose + input or output is a matrix assumes the input is Cholesky and returns a Cholesky factored matrix. Example `log_prob` input takes a Cholesky and `sample_n` returns a Cholesky when `cholesky_input_output_matrices=True`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. Raises: TypeError: if scale is not floating-type @@ -210,9 +210,9 @@ def _sample_n(self, n, seed): batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 - shape = array_ops.concat(((n,), batch_shape, event_shape), 0) + shape = array_ops.concat([[n], batch_shape, event_shape], 0) - # Complexity: O(nbk^2) + # Complexity: O(nbk**2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., @@ -222,7 +222,7 @@ def _sample_n(self, n, seed): # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) - g = random_ops.random_gamma(shape=(n,), + g = random_ops.random_gamma(shape=[n], alpha=self._multi_gamma_sequence( 0.5 * self.df, self.dimension), beta=0.5, @@ -230,30 +230,30 @@ def _sample_n(self, n, seed): seed=distribution_util.gen_new_seed( seed, "wishart")) - # Complexity: O(nbk^2) + # Complexity: O(nbk**2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. - # Complexity: O(nbk^2) - perm = array_ops.concat((math_ops.range(1, ndims), (0,)), 0) + # Complexity: O(nbk**2) + perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0) x = array_ops.transpose(x, perm) - shape = array_ops.concat((batch_shape, (event_shape[0], -1)), 0) + shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a - # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so - # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is + # vector system. E.g., for OperatorPDDiag, each matmul is O(k**2), so + # this complexity is O(nbk**2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. - # Complexity: O(nbk^2) - shape = array_ops.concat((batch_shape, event_shape, (n,)), 0) + # Complexity: O(nbk**2) + shape = array_ops.concat([batch_shape, event_shape, [n]], 0) x = array_ops.reshape(x, shape) - perm = array_ops.concat(((ndims - 1,), math_ops.range(0, ndims - 1)), 0) + perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: @@ -278,7 +278,7 @@ def _log_prob(self, x): array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding - # batch scale matrix. Since a Distribution Tensor supports multiple + # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these @@ -288,10 +288,10 @@ def _log_prob(self, x): # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. - # Complexity: O(nbk^2) since transpose must access every element. + # Complexity: O(nbk**2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt - perm = array_ops.concat((math_ops.range(sample_ndims, ndims), - math_ops.range(0, sample_ndims)), 0) + perm = array_ops.concat([math_ops.range(sample_ndims, ndims), + math_ops.range(0, sample_ndims)], 0) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat( (batch_shape, (math_ops.cast( @@ -300,37 +300,37 @@ def _log_prob(self, x): scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving - # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so - # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is - # O(k^2) so this step has complexity O(nbk^3). + # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so + # this complexity is O(nbk**2). For OperatorPDCholesky, each solve is + # O(k**2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. - # Complexity: O(nbk^2) - shape = array_ops.concat((batch_shape, event_shape, sample_shape), 0) + # Complexity: O(nbk**2) + shape = array_ops.concat([batch_shape, event_shape, sample_shape], 0) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) - perm = array_ops.concat((math_ops.range(ndims - sample_ndims, ndims), - math_ops.range(0, ndims - sample_ndims)), 0) + perm = array_ops.concat([math_ops.range(ndims - sample_ndims, ndims), + math_ops.range(0, ndims - sample_ndims)], 0) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] - # = sum_{ik} (inv(S) L)_{ik}^2 + # = sum_{ik} (inv(S) L)_{ik}**2 # The second equality follows from the cyclic permutation property. - # Complexity: O(nbk^2) + # Complexity: O(nbk**2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), - reduction_indices=[-2, -1]) + axis=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum( math_ops.log(array_ops.matrix_diag_part(x_sqrt)), - reduction_indices=[-1]) + axis=[-1]) - # Complexity: O(nbk^2) + # Complexity: O(nbk**2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalization()) @@ -420,14 +420,14 @@ def _multi_lgamma(self, a, p, name="multi_lgamma"): seq = self._multi_gamma_sequence(a, p) return (0.25 * p * (p - 1.) * math.log(math.pi) + math_ops.reduce_sum(math_ops.lgamma(seq), - reduction_indices=(-1,))) + axis=[-1])) def _multi_digamma(self, a, p, name="multi_digamma"): """Computes the multivariate digamma function; Psi_p(a).""" with self._name_scope(name, values=[a, p]): seq = self._multi_gamma_sequence(a, p) return math_ops.reduce_sum(math_ops.digamma(seq), - reduction_indices=(-1,)) + axis=[-1]) class WishartCholesky(_WishartOperatorPD): @@ -469,7 +469,7 @@ class WishartCholesky(_WishartOperatorPD): dist = tf.contrib.distributions.WishartCholesky(df=df, scale=chol_scale) # Evaluate this on an observation in R^3, returning a scalar. - x = ... # A 3x3 positive definite matrix. + x = ... # A 3x3 positive definite matrix. dist.prob(x) # Shape is [], a scalar. # Evaluate this on a two observations, each in R^{3x3}, returning a length two @@ -506,20 +506,20 @@ def __init__(self, or equal to dimension of the scale matrix. scale: `float` or `double` `Tensor`. The Cholesky factorization of the symmetric positive definite scale matrix of the distribution. - cholesky_input_output_matrices: `Boolean`. Any function which whose input - or output is a matrix assumes the input is Cholesky and returns a + cholesky_input_output_matrices: Python `bool`. Any function which whose + input or output is a matrix assumes the input is Cholesky and returns a Cholesky factored matrix. Example `log_prob` input takes a Cholesky and `sample_n` returns a Cholesky when `cholesky_input_output_matrices=True`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[scale]) as ns: @@ -569,7 +569,7 @@ class WishartFull(_WishartOperatorPD): dist = tf.contrib.distributions.WishartFull(df=df, scale=scale) # Evaluate this on an observation in R^3, returning a scalar. - x = ... # A 3x3 positive definite matrix. + x = ... # A 3x3 positive definite matrix. dist.prob(x) # Shape is [], a scalar. # Evaluate this on a two observations, each in R^{3x3}, returning a length two @@ -606,20 +606,20 @@ def __init__(self, or equal to dimension of the scale matrix. scale: `float` or `double` `Tensor`. The symmetric positive definite scale matrix of the distribution. - cholesky_input_output_matrices: `Boolean`. Any function which whose input - or output is a matrix assumes the input is Cholesky and returns a + cholesky_input_output_matrices: Python `bool`. Any function which whose + input or output is a matrix assumes the input is Cholesky and returns a Cholesky factored matrix. Example `log_prob` input takes a Cholesky and `sample_n` returns a Cholesky when `cholesky_input_output_matrices=True`. - validate_args: Python `Boolean`, default `False`. When `True` distribution + validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. - allow_nan_stats: Python `Boolean`, default `True`. When `True`, statistics + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the - result is undefined. When `False`, an exception is raised if one or + result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. - name: `String` name prefixed to Ops created by this class. + name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() with ops.name_scope(name, values=[scale]) as ns: