diff --git a/.gitignore b/.gitignore index 24f562af3df15..68299a202b7c7 100644 --- a/.gitignore +++ b/.gitignore @@ -95,5 +95,5 @@ sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx -sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd -sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx diff --git a/setup.cfg b/setup.cfg index 81fbbffadb233..3aa89052a4b8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -79,8 +79,8 @@ ignore = sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx - sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd - sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx + sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd + sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx [codespell] diff --git a/setup.py b/setup.py index a22df6e647a8e..cc5b6f11749cb 100755 --- a/setup.py +++ b/setup.py @@ -92,7 +92,7 @@ "sklearn.metrics._pairwise_distances_reduction._gemm_term_computer", "sklearn.metrics._pairwise_distances_reduction._base", "sklearn.metrics._pairwise_distances_reduction._argkmin", - "sklearn.metrics._pairwise_distances_reduction._radius_neighborhood", + "sklearn.metrics._pairwise_distances_reduction._radius_neighbors", "sklearn.metrics._pairwise_fast", "sklearn.neighbors._partition_nodes", "sklearn.tree._splitter", diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index 1aebb8bc4a572..133c854682f0c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -32,7 +32,7 @@ # # Dispatchers are meant to be used in the Python code. Under the hood, a # dispatcher must only define the logic to choose at runtime to the correct -# dtype-specialized :class:`BaseDistanceReductionDispatcher` implementation based +# dtype-specialized :class:`BaseDistancesReductionDispatcher` implementation based # on the dtype of X and of Y. # # @@ -46,7 +46,7 @@ # # # (base dispatcher) -# BaseDistanceReductionDispatcher +# BaseDistancesReductionDispatcher # ∆ # | # | @@ -56,8 +56,8 @@ # ArgKmin RadiusNeighbors # | | # | | -# | (64bit implem.) | -# | BaseDistanceReducer{32,64} | +# | (float{32,64} implem.) | +# | BaseDistancesReduction{32,64} | # | ∆ | # | | | # | | | @@ -74,9 +74,9 @@ # x | | x # EuclideanArgKmin{32,64} EuclideanRadiusNeighbors{32,64} # -# For instance :class:`ArgKmin`, dispatches to both :class:`ArgKmin64` -# and :class:`ArgKmin32` if X and Y are both dense NumPy arrays with a `float64` -# or `float32` dtype respectively. +# For instance :class:`ArgKmin` dispatches to: +# - :class:`ArgKmin64` if X and Y are two `float64` array-likes +# - :class:`ArgKmin32` if X and Y are two `float32` array-likes # # In addition, if the metric parameter is set to "euclidean" or "sqeuclidean", # then `ArgKmin{32,64}` further dispatches to `EuclideanArgKmin{32,64}`. For @@ -87,14 +87,14 @@ from ._dispatcher import ( - BaseDistanceReductionDispatcher, + BaseDistancesReductionDispatcher, ArgKmin, RadiusNeighbors, sqeuclidean_row_norms, ) __all__ = [ - "BaseDistanceReductionDispatcher", + "BaseDistancesReductionDispatcher", "ArgKmin", "RadiusNeighbors", "sqeuclidean_row_norms", diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp index 7bcfab9ac4767..b738cda119c11 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp @@ -5,11 +5,11 @@ cnp.import_array() {{for name_suffix in ['64', '32']}} -from ._base cimport BaseDistanceReducer{{name_suffix}} +from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """{{name_suffix}}bit implementation of BaseDistanceReducer{{name_suffix}} for the `ArgKmin` reduction.""" +cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """float{{name_suffix}} implementation of the ArgKmin.""" cdef: ITYPE_t k @@ -23,7 +23,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): - """EuclideanDistance-specialized {{name_suffix}}bit implementation of ArgKmin{{name_suffix}}.""" + """EuclideanDistance-specialisation of ArgKmin{{name_suffix}}.""" cdef: GEMMTermComputer{{name_suffix}} gemm_term_computer const DTYPE_t[::1] X_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp index 03c4d187fd9c2..a2ecb7c2266b6 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp @@ -24,7 +24,7 @@ cnp.import_array() {{for name_suffix in ['64', '32']}} from ._base cimport ( - BaseDistanceReducer{{name_suffix}}, + BaseDistancesReduction{{name_suffix}}, _sqeuclidean_row_norms{{name_suffix}}, ) @@ -36,8 +36,8 @@ from ._datasets_pair cimport ( from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """{{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistanceReducer{{name_suffix}}.""" +cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """float{{name_suffix}} implementation of the ArgKmin.""" @classmethod def compute( @@ -311,7 +311,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}): - """EuclideanDistance-specialized implementation for ArgKmin{{name_suffix}}.""" + """EuclideanDistance-specialisation of ArgKmin{{name_suffix}}.""" @classmethod def is_usable_for(cls, X, Y, metric) -> bool: diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp index 7e4b639e3803b..44f48f5bf1558 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp @@ -21,10 +21,10 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( from ._datasets_pair cimport DatasetsPair{{name_suffix}} -cdef class BaseDistanceReducer{{name_suffix}}: +cdef class BaseDistancesReduction{{name_suffix}}: """ - Base {{name_suffix}}bit implementation template of the pairwise-distances reduction - backend. + Base float{{name_suffix}} implementation template of the pairwise-distances + reduction backends. Implementations inherit from this template and may override the several defined hooks as needed in order to easily extend functionality with diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp index d3303511b64d9..d03c7e5fa0b2a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp @@ -67,7 +67,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( ITYPE_t d = X.shape[1] DTYPE_t[::1] squared_row_norms = np.empty(n, dtype=DTYPE) - # To upcast the i-th row of X from 32bit to 64bit + # To upcast the i-th row of X from float32 to float64 vector[vector[DTYPE_t]] X_i_upcast = vector[vector[DTYPE_t]]( num_threads, vector[DTYPE_t](d) ) @@ -75,7 +75,7 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( with nogil, parallel(num_threads=num_threads): thread_num = openmp.omp_get_thread_num() for i in prange(n, schedule='static'): - # Upcasting the i-th row of X from 32bit to 64bit + # Upcasting the i-th row of X from float32 to float64 for j in range(d): X_i_upcast[thread_num][j] = deref(X_ptr + i * d + j) @@ -90,10 +90,10 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms32( from ._datasets_pair cimport DatasetsPair{{name_suffix}} -cdef class BaseDistanceReducer{{name_suffix}}: +cdef class BaseDistancesReduction{{name_suffix}}: """ - Base {{name_suffix}}bit implementation template of the pairwise-distances reduction - backend. + Base float{{name_suffix}} implementation template of the pairwise-distances + reduction backends. Implementations inherit from this template and may override the several defined hooks as needed in order to easily extend functionality with @@ -209,7 +209,6 @@ cdef class BaseDistanceReducer{{name_suffix}}: X_end = X_start + self.X_n_samples_chunk # Reinitializing thread datastructures for the new X chunk - # If necessary, upcast X[X_start:X_end] to 64bit self._parallel_on_X_init_chunk(thread_num, X_start, X_end) for Y_chunk_idx in range(self.Y_n_chunks): @@ -219,7 +218,6 @@ cdef class BaseDistanceReducer{{name_suffix}}: else: Y_end = Y_start + self.Y_n_samples_chunk - # If necessary, upcast Y[Y_start:Y_end] to 64bit self._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, @@ -280,7 +278,6 @@ cdef class BaseDistanceReducer{{name_suffix}}: thread_num = _openmp_thread_num() # Initializing datastructures used in this thread - # If necessary, upcast X[X_start:X_end] to 64bit self._parallel_on_Y_parallel_init(thread_num, X_start, X_end) for Y_chunk_idx in prange(self.Y_n_chunks, schedule='static'): @@ -290,7 +287,6 @@ cdef class BaseDistanceReducer{{name_suffix}}: else: Y_end = Y_start + self.Y_n_samples_chunk - # If necessary, upcast Y[Y_start:Y_end] to 64bit self._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( X_start, X_end, Y_start, Y_end, @@ -326,7 +322,7 @@ cdef class BaseDistanceReducer{{name_suffix}}: ) nogil: """Compute the pairwise distances on two chunks of X and Y and reduce them. - This is THE core computational method of BaseDistanceReducer{{name_suffix}}. + This is THE core computational method of BaseDistancesReduction{{name_suffix}}. This must be implemented in subclasses agnostically from the parallelization strategies. """ @@ -358,7 +354,19 @@ cdef class BaseDistanceReducer{{name_suffix}}: ITYPE_t X_start, ITYPE_t X_end, ) nogil: - """Initialize datastructures used in a thread given its number.""" + """Initialize datastructures used in a thread given its number. + + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: + + self.gemm_term_computer._parallel_on_X_init_chunk( + thread_num, X_start, X_end, + ) + + to ensure the proper upcast of X[X_start:X_end] to float64 prior + to the reduction with float64 accumulator buffers when X.dtype is + float32. + """ return cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( @@ -371,7 +379,16 @@ cdef class BaseDistanceReducer{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - This is eventually used to upcast X[X_start:X_end] to 64bit. + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: + + self.gemm_term_computer._parallel_on_X_pre_compute_and_reduce_distances_on_chunks( + X_start, X_end, Y_start, Y_end, thread_num, + ) + + to ensure the proper upcast of Y[Y_start:Y_end] to float64 prior + to the reduction with float64 accumulator buffers when Y.dtype is + float32. """ return @@ -403,7 +420,19 @@ cdef class BaseDistanceReducer{{name_suffix}}: ITYPE_t X_start, ITYPE_t X_end, ) nogil: - """Initialize datastructures used in a thread given its number.""" + """Initialize datastructures used in a thread given its number. + + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: + + self.gemm_term_computer._parallel_on_Y_parallel_init( + thread_num, X_start, X_end, + ) + + to ensure the proper upcast of X[X_start:X_end] to float64 prior + to the reduction with float64 accumulator buffers when X.dtype is + float32. + """ return cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( @@ -416,7 +445,16 @@ cdef class BaseDistanceReducer{{name_suffix}}: ) nogil: """Initialize datastructures just before the _compute_and_reduce_distances_on_chunks. - This is eventually used to upcast Y[Y_start:Y_end] to 64bit. + In this method, EuclideanDistance specialisations of subclass of + BaseDistancesReduction _must_ call: + + self.gemm_term_computer._parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( + X_start, X_end, Y_start, Y_end, thread_num, + ) + + to ensure the proper upcast of Y[Y_start:Y_end] to float64 prior + to the reduction with float64 accumulator buffers when Y.dtype is + float32. """ return diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp index cfa37a004f17a..78857341f9c97 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp @@ -35,7 +35,7 @@ cdef class DatasetsPair{{name_suffix}}: The handling of parallelization over chunks to compute the distances and aggregation for several rows at a time is done in dedicated - subclasses of :class:`BaseDistanceReductionDispatcher` that in-turn rely on + subclasses of :class:`BaseDistancesReductionDispatcher` that in-turn rely on subclasses of :class:`DatasetsPair` for each pair of rows in the data. The goal is to make it possible to decouple the generic parallelization and aggregation logic from metric-specific computation as much as possible. diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index d028d7e0b5189..cd693f352dbc5 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -16,7 +16,7 @@ ArgKmin64, ArgKmin32, ) -from ._radius_neighborhood import ( +from ._radius_neighbors import ( RadiusNeighbors64, RadiusNeighbors32, ) @@ -51,10 +51,10 @@ def sqeuclidean_row_norms(X, num_threads): ) -class BaseDistanceReductionDispatcher: +class BaseDistancesReductionDispatcher: """Abstract base dispatcher for pairwise distance computation & reduction. - Each dispatcher extending the base :class:`BaseDistanceReductionDispatcher` + Each dispatcher extending the base :class:`BaseDistancesReductionDispatcher` dispatcher must implement the :meth:`compute` classmethod. """ @@ -168,7 +168,7 @@ def compute( """ -class ArgKmin(BaseDistanceReductionDispatcher): +class ArgKmin(BaseDistancesReductionDispatcher): """Compute the argkmin of row vectors of X on the ones of Y. For each row vector of X, computes the indices of k first the rows @@ -304,7 +304,7 @@ def compute( ) -class RadiusNeighbors(BaseDistanceReductionDispatcher): +class RadiusNeighbors(BaseDistancesReductionDispatcher): """Compute radius-based neighbors for two sets of vectors. For each row-vector X[i] of the queries X, find all the indices j of diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp index 90a74374244ba..2f4f040b91ad8 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pxd.tp @@ -35,7 +35,7 @@ cdef class GEMMTermComputer{{name_suffix}}: vector[vector[DTYPE_t]] dist_middle_terms_chunks {{if upcast_to_float64}} - # Buffers for upcasting chunks of X and Y from 32bit to 64bit + # Buffers for upcasting chunks of X and Y from float32 to float64 vector[vector[DTYPE_t]] X_c_upcast vector[vector[DTYPE_t]] Y_c_upcast {{endif}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp index 2646d7b7f2e53..e69d1c3df9f7d 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_gemm_term_computer.pyx.tp @@ -65,7 +65,7 @@ cdef class GEMMTermComputer{{name_suffix}}: self.dist_middle_terms_chunks = vector[vector[DTYPE_t]](self.effective_n_threads) {{if upcast_to_float64}} - # We populate the buffer for upcasting chunks of X and Y from 32bit to 64bit. + # We populate the buffer for upcasting chunks of X and Y from float32 to float64. self.X_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) self.Y_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp similarity index 88% rename from sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp rename to sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp index 506b02e4216a8..80fd2775b5acd 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp @@ -28,14 +28,11 @@ cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays( ##################### {{for name_suffix in ['64', '32']}} -from ._base cimport BaseDistanceReducer{{name_suffix}} +from ._base cimport BaseDistancesReduction{{name_suffix}} from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """ - {{name_suffix}}bit implementation of BaseDistanceReducer{{name_suffix}} for the - `RadiusNeighbors` reduction. - """ +cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """float{{name_suffix}} implementation of the RadiusNeighbors.""" cdef: DTYPE_t radius @@ -82,7 +79,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}}): - """EuclideanDistance-specialized {{name_suffix}}bit implementation for RadiusNeighbors{{name_suffix}}.""" + """EuclideanDistance-specialisation of RadiusNeighbors{{name_suffix}}.""" cdef: GEMMTermComputer{{name_suffix}} gemm_term_computer const DTYPE_t[::1] X_norm_squared diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp similarity index 98% rename from sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp rename to sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp index e9a16de288299..80747fd1c1a12 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp @@ -44,7 +44,7 @@ cdef cnp.ndarray[object, ndim=1] coerce_vectors_to_nd_arrays( {{for name_suffix in ['64', '32']}} from ._base cimport ( - BaseDistanceReducer{{name_suffix}}, + BaseDistancesReduction{{name_suffix}}, _sqeuclidean_row_norms{{name_suffix}} ) @@ -56,11 +56,8 @@ from ._datasets_pair cimport ( from ._gemm_term_computer cimport GEMMTermComputer{{name_suffix}} -cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): - """ - {{name_suffix}}bit implementation of the pairwise-distance reduction BaseDistanceReducer{{name_suffix}} for the - `RadiusNeighbors` reduction. - """ +cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}}): + """float{{name_suffix}} implementation of the RadiusNeighbors.""" @classmethod def compute( @@ -320,7 +317,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistanceReducer{{name_suffix}}): cdef class EuclideanRadiusNeighbors{{name_suffix}}(RadiusNeighbors{{name_suffix}}): - """EuclideanDistance-specialized implementation for RadiusNeighbors{{name_suffix}}.""" + """EuclideanDistance-specialisation of RadiusNeighbors{{name_suffix}}.""" @classmethod def is_usable_for(cls, X, Y, metric) -> bool: diff --git a/sklearn/metrics/_pairwise_distances_reduction/setup.py b/sklearn/metrics/_pairwise_distances_reduction/setup.py index f55ec659b5821..e1fbbceea7eb8 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/setup.py +++ b/sklearn/metrics/_pairwise_distances_reduction/setup.py @@ -21,8 +21,8 @@ def configuration(parent_package="", top_path=None): "sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp", "sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp", "sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd.tp", - "sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pyx.tp", - "sklearn/metrics/_pairwise_distances_reduction/_radius_neighborhood.pxd.tp", + "sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp", + "sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd.tp", ] gen_from_templates(templates) @@ -32,7 +32,7 @@ def configuration(parent_package="", top_path=None): "_gemm_term_computer.pyx", "_base.pyx", "_argkmin.pyx", - "_radius_neighborhood.pyx", + "_radius_neighbors.pyx", ] for source_file in cython_sources: diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 52f5f3e73948c..f929a55105509 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -10,7 +10,7 @@ from scipy.spatial.distance import cdist from sklearn.metrics._pairwise_distances_reduction import ( - BaseDistanceReductionDispatcher, + BaseDistancesReductionDispatcher, ArgKmin, RadiusNeighbors, sqeuclidean_row_norms, @@ -185,7 +185,7 @@ def assert_argkmin_results_quasi_equality( ), msg -def assert_radius_neighborhood_results_equality( +def assert_radius_neighbors_results_equality( ref_dist, dist, ref_indices, indices, radius ): # We get arrays of arrays and we need to check for individual pairs @@ -204,7 +204,7 @@ def assert_radius_neighborhood_results_equality( ) -def assert_radius_neighborhood_results_quasi_equality( +def assert_radius_neighbors_results_quasi_equality( ref_dist, dist, ref_indices, @@ -308,7 +308,7 @@ def assert_radius_neighborhood_results_quasi_equality( ( RadiusNeighbors, np.float64, - ): assert_radius_neighborhood_results_equality, + ): assert_radius_neighbors_results_equality, # In the case of 32bit, indices can be permuted due to small difference # in the computations of their associated distances, hence we test equality of # results up to valid permutations. @@ -316,7 +316,7 @@ def assert_radius_neighborhood_results_quasi_equality( ( RadiusNeighbors, np.float32, - ): assert_radius_neighborhood_results_quasi_equality, + ): assert_radius_neighbors_results_quasi_equality, } @@ -404,7 +404,7 @@ def test_assert_argkmin_results_quasi_equality(): ) -def test_assert_radius_neighborhood_results_quasi_equality(): +def test_assert_radius_neighbors_results_quasi_equality(): rtol = 1e-7 eps = 1e-7 @@ -425,7 +425,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ] # Sanity check: compare the reference results to themselves. - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( ref_dist, ref_dist, ref_indices, @@ -435,7 +435,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ) # Apply valid permutation on indices - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -443,7 +443,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): radius=6.1, rtol=rtol, ) - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([6, 7, 8, 9, 10])]), @@ -455,7 +455,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Apply invalid permutation on indices msg = "Neighbors indices for query 0 are not matching" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -465,7 +465,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): ) # Having extra last elements is valid if they are in: [radius ± rtol] - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -479,7 +479,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): "The last extra elements ([6.]) aren't in [radius ± rtol]=[6.1 ± 1e-07]" ) with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, 6])]), np.array([np.array([1.2, 2.5])]), np.array([np.array([1, 2, 3])]), @@ -491,7 +491,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Indices aren't properly sorted w.r.t their distances msg = "Neighbors indices for query 0 are not matching" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -503,7 +503,7 @@ def test_assert_radius_neighborhood_results_quasi_equality(): # Distances aren't properly sorted msg = "Distances aren't sorted on row 0" with pytest.raises(AssertionError, match=msg): - assert_radius_neighborhood_results_quasi_equality( + assert_radius_neighbors_results_quasi_equality( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([2.5, 1.2, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), @@ -522,33 +522,33 @@ def test_pairwise_distances_reduction_is_usable_for(): metric = "manhattan" # Must be usable for all possible pair of {dense, sparse} datasets - assert BaseDistanceReductionDispatcher.is_usable_for(X, Y, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X_csr, Y_csr, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X_csr, Y, metric) - assert BaseDistanceReductionDispatcher.is_usable_for(X, Y_csr, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X, Y, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y_csr, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y, metric) + assert BaseDistancesReductionDispatcher.is_usable_for(X, Y_csr, metric) - assert BaseDistanceReductionDispatcher.is_usable_for( + assert BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float64), Y.astype(np.float64), metric ) - assert BaseDistanceReductionDispatcher.is_usable_for( + assert BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float32), Y.astype(np.float32), metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.int64), Y.astype(np.int64), metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for(X, Y, metric="pyfunc") - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for(X, Y, metric="pyfunc") + assert not BaseDistancesReductionDispatcher.is_usable_for( X.astype(np.float32), Y, metric ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X, Y.astype(np.int32), metric ) # F-ordered arrays are not supported - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( np.asfortranarray(X), Y, metric ) @@ -558,17 +558,17 @@ def test_pairwise_distances_reduction_is_usable_for(): # See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa # TODO: implement specialisation for (sq)euclidean on fused sparse-dense # using sparse-dense routines for matrix-vector multiplications. - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X_csr, Y, metric="euclidean" ) - assert not BaseDistanceReductionDispatcher.is_usable_for( + assert not BaseDistancesReductionDispatcher.is_usable_for( X_csr, Y_csr, metric="sqeuclidean" ) # CSR matrices without non-zeros elements aren't currently supported # TODO: support CSR matrices without non-zeros elements X_csr_0_nnz = csr_matrix(X * 0) - assert not BaseDistanceReductionDispatcher.is_usable_for(X_csr_0_nnz, Y, metric) + assert not BaseDistancesReductionDispatcher.is_usable_for(X_csr_0_nnz, Y, metric) # CSR matrices with int64 indices and indptr (e.g. large nnz, or large n_features) # aren't supported as of now. @@ -576,7 +576,7 @@ def test_pairwise_distances_reduction_is_usable_for(): # TODO: support CSR matrices with int64 indices and indptr X_csr_int64 = csr_matrix(X) X_csr_int64.indices = X_csr_int64.indices.astype(np.int64) - assert not BaseDistanceReductionDispatcher.is_usable_for(X_csr_int64, Y, metric) + assert not BaseDistancesReductionDispatcher.is_usable_for(X_csr_int64, Y, metric) def test_argkmin_factory_method_wrong_usages(): @@ -631,7 +631,7 @@ def test_argkmin_factory_method_wrong_usages(): ) -def test_radius_neighborhood_factory_method_wrong_usages(): +def test_radius_neighbors_factory_method_wrong_usages(): rng = np.random.RandomState(1) X = rng.rand(100, 10) Y = rng.rand(100, 10) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 0a75b57cc60ae..4cea0bbb00a5f 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -29,7 +29,7 @@ from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS from sklearn.metrics.tests.test_pairwise_distances_reduction import ( - assert_radius_neighborhood_results_equality, + assert_radius_neighbors_results_equality, ) from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split @@ -2174,7 +2174,7 @@ def test_radius_neighbors_brute_backend( X_test, return_distance=True ) - assert_radius_neighborhood_results_equality( + assert_radius_neighbors_results_equality( legacy_brute_dst, pdr_brute_dst, legacy_brute_idx,