Skip to content

Commit

Permalink
MAINT: Implement API changes for randomgen-derived code
Browse files Browse the repository at this point in the history
remove numpy.random.gen, BRNG.generator, pcg*, rand, randn
remove use_mask and Lemire's method, fix benchmarks for PCG removal
convert brng to bitgen (in C) and bit_generator (in python)
convert base R{NG,andom.*} to BitGenerator, fix last commit
randint -> integers, remove rand, randn, random_integers
RandomGenerator -> Generator, more "basic RNG" -> BitGenerator
random_sample -> random, jump -> jumped, resync with randomgen
Remove derived code from entropy
Port over changes accepted in upstream to protect log(0.0) where relevant
fix doctests for jumped, better document choice
Remove Python 2.7 shims
Use NPY_INLINE to simplify
Fix performance.py to work
Renam directory brng to bit_generators
Fix examples wiht new directory structure
Clarify relationship to historical RandomState
Remove references to .generator
Rename xoshiro256/512starstar
  • Loading branch information
mattip committed May 20, 2019
1 parent dd77ce3 commit 17e0070
Show file tree
Hide file tree
Showing 120 changed files with 3,256 additions and 14,772 deletions.
87 changes: 40 additions & 47 deletions benchmarks/benchmarks/bench_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from numpy.random import RandomState, RandomGenerator
from numpy.random import RandomState, Generator

class Random(Benchmark):
params = ['normal', 'uniform', 'weibull 1', 'binomial 10 0.5',
Expand Down Expand Up @@ -70,14 +70,14 @@ def time_randint_slow(self, name):
class Permutation(Benchmark):
def setup(self):
self.n = 10000
self.a_1d = np.random.random_sample(self.n)
self.a_2d = np.random.random_sample((self.n, 2))
self.a_1d = np.random.random(self.n)
self.a_2d = np.random.random((self.n, 2))

def time_permutation_1d(self):
np.random.permutation(self.a_1d)

def time_permutation_2d(self):
np.random.permutation(self.a_2d)
np.random.permutation(self.a_2d)

def time_permutation_int(self):
np.random.permutation(self.n)
Expand All @@ -86,33 +86,39 @@ def time_permutation_int(self):

class RNG(Benchmark):
param_names = ['rng']
params = ['DSFMT', 'PCG64', 'PCG32', 'MT19937', 'Xoroshiro128',
'Xorshift1024', 'Xoshiro256StarStar', 'Xoshiro512StarStar',
params = ['DSFMT', 'MT19937', 'Xoroshiro128',
'Xorshift1024', 'Xoshiro256', 'Xoshiro512',
'Philox', 'ThreeFry', 'ThreeFry32', 'numpy']

def setup(self, brng):
if brng == 'numpy':
self.rg = np.random.RandomState()
else:
self.rg = RandomGenerator(getattr(np.random, brng)())
self.rg.random_sample()
self.rg = Generator(getattr(np.random, brng)())
self.rg.random()
self.int32info = np.iinfo(np.int32)
self.uint32info = np.iinfo(np.uint32)
self.uint64info = np.iinfo(np.uint64)

def time_raw(self, brng):
if brng == 'numpy':
self.rg.random_integers(self.int32info.max, size=nom_size)
else:
self.rg.random_integers(self.int32info.max, size=nom_size)
self.rg.integers(self.int32info.max, size=nom_size, endpoint=True)

def time_32bit(self, brng):
min, max = self.uint32info.min, self.uint32info.max
self.rg.randint(min, max + 1, nom_size, dtype=np.uint32)
if brng == 'numpy':
self.rg.randint(min, max + 1, nom_size, dtype=np.uint32)
else:
self.rg.integers(min, max + 1, nom_size, dtype=np.uint32)

def time_64bit(self, brng):
min, max = self.uint64info.min, self.uint64info.max
self.rg.randint(min, max + 1, nom_size, dtype=np.uint64)
if brng == 'numpy':
self.rg.randint(min, max + 1, nom_size, dtype=np.uint64)
else:
self.rg.integers(min, max + 1, nom_size, dtype=np.uint64)

def time_normal_zig(self, brng):
self.rg.standard_normal(nom_size)
Expand All @@ -122,34 +128,32 @@ class Bounded(Benchmark):
u16 = np.uint16
u32 = np.uint32
u64 = np.uint64
param_names = ['rng', 'dt_max_masked']
params = [['DSFMT', 'PCG64', 'PCG32', 'MT19937', 'Xoroshiro128',
'Xorshift1024', 'Xoshiro256StarStar', 'Xoshiro512StarStar',
param_names = ['rng', 'dt_max']
params = [['DSFMT', 'MT19937', 'Xoroshiro128',
'Xorshift1024', 'Xoshiro256', 'Xoshiro512',
'Philox', 'ThreeFry', 'ThreeFry32', 'numpy'],
[[u8, 95, True],
[u8, 64, False], # Worst case for legacy
[u8, 95, False], # Typ. avg. case for legacy
[u8, 127, False], # Best case for legacy
[u16, 95, True],
[u16, 1024, False], # Worst case for legacy
[u16, 1535, False], # Typ. avg. case for legacy
[u16, 2047, False], # Best case for legacy
[u32, 95, True],
[u32, 1024, False], # Worst case for legacy
[u32, 1535, False], # Typ. avg. case for legacy
[u32, 2047, False], # Best case for legacy
[u64, 95, True],
[u64, 1024, False], # Worst case for legacy
[u64, 1535, False], # Typ. avg. case for legacy
[u64, 2047, False], # Best case for legacy
[[u8, 95],
[u8, 64], # Worst case for legacy
[u8, 127], # Best case for legacy
[u16, 95],
[u16, 1024], # Worst case for legacy
[u16, 1535], # Typ. avg. case for legacy
[u16, 2047], # Best case for legacy
[u32, 1024], # Worst case for legacy
[u32, 1535], # Typ. avg. case for legacy
[u32, 2047], # Best case for legacy
[u64, 95],
[u64, 1024], # Worst case for legacy
[u64, 1535], # Typ. avg. case for legacy
[u64, 2047], # Best case for legacy
]]

def setup(self, brng, args):
if brng == 'numpy':
self.rg = np.random.RandomState()
else:
self.rg = RandomGenerator(getattr(np.random, brng)())
self.rg.random_sample()
self.rg = Generator(getattr(np.random, brng)())
self.rg.random()

def time_bounded(self, brng, args):
"""
Expand All @@ -161,20 +165,9 @@ def time_bounded(self, brng, args):
output dtype
max : int
Upper bound for range. Lower is always 0. Must be <= 2**bits.
use_masked: bool
If True, masking and rejection sampling is used to generate a random
number in an interval. If False, Lemire's algorithm is used if
available to generate a random number in an interval.
Notes
-----
Lemire's algorithm has improved performance when max+1 is not a
power of two.
"""
dt, max, use_masked = args
dt, max = args
if brng == 'numpy':
self.rg.randint(0, max + 1, nom_size, dtype=dt)
else:
self.rg.randint(0, max + 1, nom_size, dtype=dt,
use_masked=use_masked)

self.rg.integers(0, max + 1, nom_size, dtype=dt)
4 changes: 2 additions & 2 deletions doc/neps/nep-0010-new-iterator-ufunc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1877,8 +1877,8 @@ the new iterator.
Here is one of the original functions, for reference, and some
random image data.::

In [5]: rand1 = np.random.random_sample(1080*1920*4).astype(np.float32)
In [6]: rand2 = np.random.random_sample(1080*1920*4).astype(np.float32)
In [5]: rand1 = np.random.random(1080*1920*4).astype(np.float32)
In [6]: rand2 = np.random.random(1080*1920*4).astype(np.float32)
In [7]: image1 = rand1.reshape(1080,1920,4).swapaxes(0,1)
In [8]: image2 = rand2.reshape(1080,1920,4).swapaxes(0,1)

Expand Down
89 changes: 44 additions & 45 deletions doc/neps/nep-0019-rng-policy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ those contributors simply walked away.
Implementation
--------------

Work on a proposed new PRNG subsystem is already underway in the randomgen_
project. The specifics of the new design are out of scope for this NEP and up
for much discussion, but we will discuss general policies that will guide the
evolution of whatever code is adopted. We will also outline just a few of the
requirements that such a new system must have to support the policy proposed in
this NEP.
Work on a proposed new pseudorandom number generator (PRNG) subsystem is
already underway in the randomgen_ project. The specifics of the new design
are out of scope for this NEP and up for much discussion, but we will discuss
general policies that will guide the evolution of whatever code is adopted. We
will also outline just a few of the requirements that such a new system must
have to support the policy proposed in this NEP.

First, we will maintain API source compatibility just as we do with the rest of
``numpy``. If we *must* make a breaking change, we will only do so with an
Expand All @@ -122,35 +122,35 @@ for a small performance improvement.
Any new design for the RNG subsystem will provide a choice of different core
uniform PRNG algorithms. A promising design choice is to make these core
uniform PRNGs their own lightweight objects with a minimal set of methods
(randomgen_ calls them “basic RNGs”). The broader set of non-uniform
distributions will be its own class that holds a reference to one of these core
uniform PRNG objects and simply delegates to the core uniform PRNG object when
it needs uniform random numbers. To borrow an example from randomgen_, the
class ``MT19937`` is a basic RNG that implements the classic Mersenne Twister
algorithm. The class ``RandomGenerator`` wraps around the basic RNG to provide
(randomgen_ calls them “BitGenerators”). The broader set of non-uniform
distributions will be its own class "Generator" that holds a reference to one
of these BitGenerator objects and simply delegates to the BitGenerator object
when it needs uniform random numbers. To borrow an example from randomgen_, the
class ``MT19937`` is a BitGenerator that implements the classic Mersenne Twister
algorithm. The class ``Generator`` wraps around the BitGenerator to provide
all of the non-uniform distribution methods::

# This is not the only way to instantiate this object.
# This is just handy for demonstrating the delegation.
>>> brng = MT19937(seed)
>>> rg = RandomGenerator(brng)
>>> rg = Generator(brng)
>>> x = rg.standard_normal(10)

We will be more strict about a select subset of methods on these basic RNG
We will be more strict about a select subset of methods on these BitGenerator
objects. They MUST guarantee stream-compatibility for a specified set
of methods which are chosen to make it easier to compose them to build other
distributions and which are needed to abstract over the implementation details
of the variety of core PRNG algorithms. Namely,
of the variety of BitGenerator algorithms. Namely,

* ``.bytes()``
* ``.random_uintegers()``
* ``.random_sample()``
* ``.integers()`` (which replaces ``randint`` and ``random_integers``
* ``.random()`` (which replaces ``.random_sample()``

The distributions class (``RandomGenerator``) SHOULD have all of the same
The distributions class (``Generator``) SHOULD have all of the same
distribution methods as ``RandomState`` with close-enough function signatures
such that almost all code that currently works with ``RandomState`` instances
will work with ``RandomGenerator`` instances (ignoring the precise stream
values). Some variance will be allowed for integer distributions: in order to
will work with ``Generator`` instances (ignoring the precise stream values).
Some variance will be allowed for integer distributions: in order to
avoid some of the cross-platform problems described above, these SHOULD be
rewritten to work with ``uint64`` numbers on all platforms.

Expand Down Expand Up @@ -183,14 +183,14 @@ reproducible across numpy versions.
This legacy distributions class MUST be accessible under the name
``numpy.random.RandomState`` for backwards compatibility. All current ways of
instantiating ``numpy.random.RandomState`` with a given state should
instantiate the Mersenne Twister basic RNG with the same state. The legacy
distributions class MUST be capable of accepting other basic RNGs. The purpose
here is to ensure that one can write a program with a consistent basic RNG
state with a mixture of libraries that may or may not have upgraded from
``RandomState``. Instances of the legacy distributions class MUST respond
``True`` to ``isinstance(rg, numpy.random.RandomState)`` because there is
current utility code that relies on that check. Similarly, old pickles of
``numpy.random.RandomState`` instances MUST unpickle correctly.
instantiate the Mersenne Twister BitGenerator with the same state. The legacy
distributions class MUST be capable of accepting other BitGenerators. The
purpose here is to ensure that one can write a program with a consistent
BitGenerator state with a mixture of libraries that may or may not have
upgraded from ``RandomState``. Instances of the legacy distributions class
MUST respond ``True`` to ``isinstance(rg, numpy.random.RandomState)`` because
there is current utility code that relies on that check. Similarly, old
pickles of ``numpy.random.RandomState`` instances MUST unpickle correctly.


``numpy.random.*``
Expand All @@ -209,27 +209,26 @@ consistently and usefully, but a very common usage is in unit tests where many
of the problems of global state are less likely.

This NEP does not propose removing these functions or changing them to use the
less-stable ``RandomGenerator`` distribution implementations. Future NEPs
might.
less-stable ``Generator`` distribution implementations. Future NEPs might.

Specifically, the initial release of the new PRNG subsystem SHALL leave these
convenience functions as aliases to the methods on a global ``RandomState``
that is initialized with a Mersenne Twister basic RNG object. A call to
``numpy.random.seed()`` will be forwarded to that basic RNG object. In
that is initialized with a Mersenne Twister BitGenerator object. A call to
``numpy.random.seed()`` will be forwarded to that BitGenerator object. In
addition, the global ``RandomState`` instance MUST be accessible in this
initial release by the name ``numpy.random.mtrand._rand``: Robert Kern long ago
promised ``scikit-learn`` that this name would be stable. Whoops.

In order to allow certain workarounds, it MUST be possible to replace the basic
RNG underneath the global ``RandomState`` with any other basic RNG object (we
leave the precise API details up to the new subsystem). Calling
In order to allow certain workarounds, it MUST be possible to replace the
BitGenerator underneath the global ``RandomState`` with any other BitGenerator
object (we leave the precise API details up to the new subsystem). Calling
``numpy.random.seed()`` thereafter SHOULD just pass the given seed to the
current basic RNG object and not attempt to reset the basic RNG to the Mersenne
Twister. The set of ``numpy.random.*`` convenience functions SHALL remain the
current BitGenerator object and not attempt to reset the BitGenerator to the
Mersenne Twister. The set of ``numpy.random.*`` convenience functions SHALL remain the
same as they currently are. They SHALL be aliases to the ``RandomState``
methods and not the new less-stable distributions class (``RandomGenerator``,
in the examples above). Users who want to get the fastest, best distributions
can follow best practices and instantiate generator objects explicitly.
methods and not the new less-stable distributions class (``Generator``, in the
examples above). Users who want to get the fastest, best distributions can
follow best practices and instantiate Generator objects explicitly.

This NEP does not propose that these requirements remain in perpetuity. After
we have experience with the new PRNG subsystem, we can and should revisit these
Expand Down Expand Up @@ -298,8 +297,8 @@ positive improvement to the downstream project, just avoiding being broken.

Furthermore, under this old proposal, we would have had a quite lengthy
deprecation period where ``RandomState`` existed alongside the new system of
basic RNGs and distribution classes. Leaving the implementation of
``RandomState`` fixed meant that it could not use the new basic RNG state
BitGenerator and Generator classes. Leaving the implementation of
``RandomState`` fixed meant that it could not use the new BitGenerator state
objects. Developing programs that use a mixture of libraries that have and
have not upgraded would require managing two sets of PRNG states. This would
notionally have been time-limited, but we intended the deprecation to be very
Expand All @@ -308,9 +307,9 @@ long.
The current proposal solves all of these problems. All current usages of
``RandomState`` will continue to work in perpetuity, though some may be
discouraged through documentation. Unit tests can continue to use the full
complement of ``RandomState`` methods. Mixed ``RandomState/RandomGenerator``
code can safely share the common basic RNG state. Unmodified ``RandomState``
code can make use of the new features of alternative basic RNGs like settable
complement of ``RandomState`` methods. Mixed ``RandomState/Generator``
code can safely share the common BitGenerator state. Unmodified ``RandomState``
code can make use of the new features of alternative BitGenerators like settable
streams.


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,7 @@ Parallel generation
.. autosummary::
:toctree: generated/

~DSFMT.jump

Random Generator
================
.. autosummary::
:toctree: generated/

~DSFMT.generator
~DSFMT.jumped

Extending
=========
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
.. _brng:
.. _bit_generator:

Basic Random Number Generators
------------------------------
Bit Generators
--------------

.. currentmodule:: numpy.random

The random values produced by :class:`~RandomGenerator`
are produced by a basic RNG. These basic RNGs do not directly provide
The random values produced by :class:`~Generator`
orignate in a BitGenerator. The BitGenerators do not directly provide
random numbers and only contains methods used for seeding, getting or
setting the state, jumping or advancing the state, and for accessing
low-level wrappers for consumption by code that can efficiently
Expand All @@ -20,23 +20,20 @@ Stable RNGs

DSFMT <dsfmt>
MT19937 <mt19937>
PCG64 <pcg64>
Philox <philox>
ThreeFry <threefry>
XoroShiro128+ <xoroshiro128>
Xorshift1024*φ <xorshift1024>
Xoshiro256** <xoshiro256starstar>
Xoshiro512** <xoshiro512starstar>
Xoshiro256** <xoshiro256>
Xoshiro512** <xoshiro512>


Experimental RNGs
=================

These RNGs are currently included for testing but are may not be
permanent.
These BitGenerators are currently included but are may not be permanent.

.. toctree::
:maxdepth: 1

PCG32 <pcg32>
ThreeFry32 <threefry32>
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,7 @@ Parallel generation
.. autosummary::
:toctree: generated/

~MT19937.jump

Random Generator
================
.. autosummary::
:toctree: generated/

~MT19937.generator
~MT19937.jumped

Extending
=========
Expand Down
Loading

0 comments on commit 17e0070

Please sign in to comment.