Skip to content

Commit

Permalink
ENH: Added Lemire algorithms for generating random numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
bduvenhage authored and mattip committed May 20, 2019
1 parent 707371d commit 15bebed
Show file tree
Hide file tree
Showing 9 changed files with 604 additions and 189 deletions.
1 change: 0 additions & 1 deletion _randomgen/.travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,3 @@ after_success:
cd ${BUILD_DIR}
python benchmark.py;
fi
2 changes: 1 addition & 1 deletion _randomgen/appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ test_script:

on_success:
- cd %GIT_DIR%\
- python benchmark.py
- python benchmark.py
110 changes: 102 additions & 8 deletions _randomgen/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,54 @@ def timer_uniform():
run_timer(dist, command, None, SETUP, 'Uniforms')


def timer_8bit_bounded(max=95, use_masked=True):
min = 0

dist = 'random_uintegers'

# Note on performance of generating random numbers in an interval:
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.

if use_masked:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=True)' # Use masking & rejection.
else:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=False)' # Use Lemire's algo.

command = command.format(min=min, max=max)

command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8)'
command_numpy = command_numpy.format(min=min, max=max)

run_timer(dist, command, command_numpy, SETUP,
'8-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))


def timer_16bit_bounded(max=1535, use_masked=True):
min = 0

dist = 'random_uintegers'

# Note on performance of generating random numbers in an interval:
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.

if use_masked:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=True)' # Use masking & rejection.
else:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=False)' # Use Lemire's algo.

command = command.format(min=min, max=max)

command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16)'
command_numpy = command_numpy.format(min=min, max=max)

run_timer(dist, command, command_numpy, SETUP,
'16-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))


def timer_32bit():
info = np.iinfo(np.uint32)
min, max = info.min, info.max
Expand All @@ -93,10 +141,8 @@ def timer_32bit():
run_timer(dist, command, command_numpy, SETUP, '32-bit unsigned integers')


def timer_32bit_bounded():
# info = np.iinfo(np.uint32)
# min, max = info.min, info.max
min, max = 0, 1024 # Worst case for masking & rejection algorithm!
def timer_32bit_bounded(max=1535, use_masked=True):
min = 0

dist = 'random_uintegers'

Expand All @@ -105,15 +151,18 @@ def timer_32bit_bounded():
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.

# command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=True)' # Use masking & rejection.
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=False)' # Use Lemire's algo.
if use_masked:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=True)' # Use masking & rejection.
else:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=False)' # Use Lemire's algo.

command = command.format(min=min, max=max)

command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32)'
command_numpy = command_numpy.format(min=min, max=max)

run_timer(dist, command, command_numpy, SETUP, '32-bit bounded unsigned integers')
run_timer(dist, command, command_numpy, SETUP,
'32-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))


def timer_64bit():
Expand All @@ -126,6 +175,30 @@ def timer_64bit():
run_timer(dist, command, command_numpy, SETUP, '64-bit unsigned integers')


def timer_64bit_bounded(max=1535, use_masked=True):
min = 0

dist = 'random_uintegers'

# Note on performance of generating random numbers in an interval:
# use_masked=True : masking and rejection sampling is used to generate a random number in an interval.
# use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval.
# Lemire's algorithm has improved performance when {max}+1 is not a power of two.

if use_masked:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=True)' # Use masking & rejection.
else:
command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=False)' # Use Lemire's algo.

command = command.format(min=min, max=max)

command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64)'
command_numpy = command_numpy.format(min=min, max=max)

run_timer(dist, command, command_numpy, SETUP,
'64-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked))


def timer_normal_zig():
dist = 'standard_normal'
command = 'rg.standard_normal(1000000)'
Expand All @@ -143,7 +216,28 @@ def timer_normal_zig():
timer_uniform()
if args.full:
timer_raw()
timer_8bit_bounded(use_masked=True)
timer_8bit_bounded(max=64, use_masked=False) # Worst case for Numpy.
timer_8bit_bounded(max=95, use_masked=False) # Typ. avrg. case for Numpy.
timer_8bit_bounded(max=127, use_masked=False) # Best case for Numpy.

timer_16bit_bounded(use_masked=True)
timer_16bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
timer_16bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
timer_16bit_bounded(max=2047, use_masked=False) # Best case for Numpy.

timer_32bit()
timer_32bit_bounded()

timer_32bit_bounded(use_masked=True)
timer_32bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
timer_32bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
timer_32bit_bounded(max=2047, use_masked=False) # Best case for Numpy.

timer_64bit()

timer_64bit_bounded(use_masked=True)
timer_64bit_bounded(max=1024, use_masked=False) # Worst case for Numpy.
timer_64bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy.
timer_64bit_bounded(max=2047, use_masked=False) # Best case for Numpy.

timer_normal_zig()
4 changes: 1 addition & 3 deletions _randomgen/randomgen/bounded_integers.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size,
The internal generator does not have this issue since it generates from
the closes interval [low, high-1] and high-1 is always in range for the
64 bit integer type.
Note: use_masked currently ignored for 64bit generators.
"""

cdef np.ndarray low_arr, high_arr, out_arr, highm1_arr
Expand Down Expand Up @@ -162,7 +160,7 @@ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size,

if rng != last_rng:
mask = _gen_mask(rng)
out_data[i] = random_bounded_uint64(state, off, rng, mask)
out_data[i] = random_bounded_uint64(state, off, rng, mask, use_masked)

np.PyArray_MultiIter_NEXT(it)

Expand Down
4 changes: 3 additions & 1 deletion _randomgen/randomgen/distributions.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ cdef extern from "src/distributions/distributions.h":

uint64_t random_interval(brng_t *brng_state, uint64_t max) nogil

# Generate random uint64 numbers in closed interval [off, off + rng].
uint64_t random_bounded_uint64(brng_t *brng_state,
uint64_t off, uint64_t rng,
uint64_t mask) nogil
uint64_t mask, bint use_masked) nogil

# Generate random uint32 numbers in closed interval [off, off + rng].
uint32_t random_buffered_bounded_uint32(brng_t *brng_state,
uint32_t off, uint32_t rng,
uint32_t mask, bint use_masked,
Expand Down
2 changes: 1 addition & 1 deletion _randomgen/randomgen/generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ cdef class RandomGenerator:
use_masked : bool
If True the generator uses rejection sampling with a bit mask to
reject random numbers that are out of bounds. If False the generator
will use Lemire's rejection sampling algorithm when available.
will use Lemire's rejection sampling algorithm.
.. versionadded:: 1.15.1
Expand Down
Loading

0 comments on commit 15bebed

Please sign in to comment.