Skip to content

Commit

Permalink
Merge pull request numpy#12968 from hameerabbasi/sort-benchmarks
Browse files Browse the repository at this point in the history
BENCH: Re-write sorting benchmarks
  • Loading branch information
mattip authored Feb 27, 2019
2 parents a03afed + 19770c7 commit 71aab21
Showing 1 changed file with 130 additions and 32 deletions.
162 changes: 130 additions & 32 deletions benchmarks/benchmarks/bench_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,47 +95,145 @@ def time_select_larger(self):
np.select(self.cond_large, ([self.d, self.e] * 10))


class SortGenerator(object):
# The size of the unsorted area in the "random unsorted area"
# benchmarks
AREA_SIZE = 100
# The size of the "partially ordered" sub-arrays
BUBBLE_SIZE = 100

@staticmethod
def random(size, dtype):
"""
Returns a randomly-shuffled array.
"""
arr = np.arange(size, dtype=dtype)
np.random.shuffle(arr)
return arr

@staticmethod
def ordered(size, dtype):
"""
Returns an ordered array.
"""
return np.arange(size, dtype=dtype)

@staticmethod
def reversed(size, dtype):
"""
Returns an array that's in descending order.
"""
return np.arange(size-1, -1, -1, dtype=dtype)

@staticmethod
def uniform(size, dtype):
"""
Returns an array that has the same value everywhere.
"""
return np.ones(size, dtype=dtype)

@staticmethod
def swapped_pair(size, dtype, swap_frac):
"""
Returns an ordered array, but one that has ``swap_frac * size``
pairs swapped.
"""
a = np.arange(size, dtype=dtype)
for _ in range(int(size * swap_frac)):
x, y = np.random.randint(0, size, 2)
a[x], a[y] = a[y], a[x]
return a

@staticmethod
def sorted_block(size, dtype, block_size):
"""
Returns an array with blocks that are all sorted.
"""
a = np.arange(size, dtype=dtype)
b = []
if size < block_size:
return a
block_num = size // block_size
for i in range(block_num):
b.extend(a[i::block_num])
return np.array(b)

@classmethod
def random_unsorted_area(cls, size, dtype, frac, area_size=None):
"""
This type of array has random unsorted areas such that they
compose the fraction ``frac`` of the original array.
"""
if area_size is None:
area_size = cls.AREA_SIZE

area_num = int(size * frac / area_size)
a = np.arange(size, dtype=dtype)
for _ in range(area_num):
start = np.random.randint(size-area_size)
end = start + area_size
np.random.shuffle(a[start:end])
return a

@classmethod
def random_bubble(cls, size, dtype, bubble_num, bubble_size=None):
"""
This type of array has ``bubble_num`` random unsorted areas.
"""
if bubble_size is None:
bubble_size = cls.BUBBLE_SIZE
frac = bubble_size * bubble_num / size

return cls.random_unsorted_area(size, dtype, frac, bubble_size)


class Sort(Benchmark):
"""
This benchmark tests sorting performance with several
different types of arrays that are likely to appear in
real-world applications.
"""
params = [
# In NumPy 1.17 and newer, 'merge' can be one of several
# stable sorts, it isn't necessarily merge sort.
['quick', 'merge', 'heap'],
['float32', 'int32', 'uint32']
['float64', 'int64', 'uint64'],
[
('ordered',),
('reversed',),
('uniform',),
('sorted_block', 10),
('sorted_block', 100),
('sorted_block', 1000),
('swapped_pair', 0.01),
('swapped_pair', 0.1),
('swapped_pair', 0.5),
('random_unsorted_area', 0.5),
('random_unsorted_area', 0.1),
('random_unsorted_area', 0.01),
('random_bubble', 1),
('random_bubble', 5),
('random_bubble', 10),
],
]
param_names = ['kind', 'dtype']

def setup(self, kind, dtype):
self.e = np.arange(10000, dtype=dtype)
self.o = np.arange(10001, dtype=dtype)
np.random.seed(25)
np.random.shuffle(self.o)
# quicksort implementations can have issues with equal elements
self.equal = np.ones(10000, dtype=dtype)
self.many_equal = np.sort(np.arange(10000) % 10).astype(dtype)

try:
np.sort(self.e, kind=kind)
except TypeError:
raise NotImplementedError()

def time_sort(self, kind, dtype):
np.sort(self.e, kind=kind)

def time_sort_random(self, kind, dtype):
np.sort(self.o, kind=kind)
param_names = ['kind', 'dtype', 'array_type']

def time_sort_inplace(self, kind, dtype):
self.e.sort(kind=kind)
# The size of the benchmarked arrays.
ARRAY_SIZE = 10000

def time_sort_equal(self, kind, dtype):
self.equal.sort(kind=kind)
def setup(self, kind, dtype, array_type):
np.random.seed(1234)
array_class = array_type[0]
self.arr = getattr(SortGenerator, array_class)(self.ARRAY_SIZE, dtype, *array_type[1:])

def time_sort_many_equal(self, kind, dtype):
self.many_equal.sort(kind=kind)
def time_sort_inplace(self, kind, dtype, array_type):
self.arr.sort(kind=kind)

def time_argsort(self, kind, dtype):
self.e.argsort(kind=kind)
def time_sort(self, kind, dtype, array_type):
np.sort(self.arr, kind=kind)

def time_argsort_random(self, kind, dtype):
self.o.argsort(kind=kind)
def time_argsort(self, kind, dtype, array_type):
np.argsort(self.arr, kind=kind)


class SortWorst(Benchmark):
Expand Down

0 comments on commit 71aab21

Please sign in to comment.