From ccd7c11fb2cbcd781e894e8ee12bde9745e48aad Mon Sep 17 00:00:00 2001 From: Yashovardhan Thevalil Date: Fri, 27 Sep 2024 09:38:23 +0200 Subject: [PATCH] PR feedback --- .github/workflows/push-pull.yml | 5 ++++- sliceline/slicefinder.py | 19 ++++++++++++++++--- tests/test_slicefinder.py | 12 +++++++++--- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/workflows/push-pull.yml b/.github/workflows/push-pull.yml index ac8aa1d..d720409 100644 --- a/.github/workflows/push-pull.yml +++ b/.github/workflows/push-pull.yml @@ -1,6 +1,9 @@ name: Test -on: [push, pull_request] +on: + push: + branches: [ master ] + pull_request: jobs: test: diff --git a/sliceline/slicefinder.py b/sliceline/slicefinder.py index 2ddfcc4..9aa6656 100644 --- a/sliceline/slicefinder.py +++ b/sliceline/slicefinder.py @@ -378,7 +378,11 @@ def _eval_slice( slice_candidates = x_encoded @ slices.T == level slice_sizes = slice_candidates.sum(axis=0).A[0] slice_errors = errors @ slice_candidates - max_slice_errors = slice_candidates.T.multiply(errors).max(axis=1).toarray() + # Here we can't use the .A shorthand because it is not + # implemented in all scipy versions for coo_matrix objects + max_slice_errors = ( + slice_candidates.T.multiply(errors).max(axis=1).toarray() + ) # score of relative error and relative size slice_scores = self._score( @@ -397,7 +401,11 @@ def _create_and_score_basic_slices( """Initialise 1-slices, i.e. slices with one predicate.""" slice_sizes = x_encoded.sum(axis=0).A[0] slice_errors = errors @ x_encoded - max_slice_errors = x_encoded.T.multiply(errors).max(axis=1).toarray()[:, 0] + # Here we can't use the .A shorthand because it is not + # implemented in all scipy versions for coo_matrix objects + max_slice_errors = ( + x_encoded.T.multiply(errors).max(axis=1).toarray()[:, 0] + ) # working set of active slices (#attr x #slices) and top-k valid_slices_mask = (slice_sizes >= self.min_sup) & (slice_errors > 0) @@ -440,6 +448,8 @@ def _join_compatible_slices( ) -> np.ndarray: """Join compatible slices according to `level`.""" slices_int = slices.astype(int) + # Here we can't use the .A shorthand because it is not + # implemented in all scipy versions for coo_matrix objects join = (slices_int @ slices_int.T).toarray() == level - 2 return np.triu(join, 1) * join @@ -503,7 +513,10 @@ def _prepare_deduplication_and_pruning( sub_pair_candidates = pair_candidates[:, start:end] # sub_p should not contain multiple True on the same line i = sub_pair_candidates.argmax(axis=1).T + np.any( - sub_pair_candidates.toarray(), axis=1 + # Here we can't use the .A shorthand because it is not + # implemented in all scipy versions for coo_matrix objects + sub_pair_candidates.toarray(), + axis=1, ) ids = ids + i.A * np.prod(dom[(j + 1) : dom.shape[0]]) return ids diff --git a/tests/test_slicefinder.py b/tests/test_slicefinder.py index d3ba5b6..72402d8 100644 --- a/tests/test_slicefinder.py +++ b/tests/test_slicefinder.py @@ -19,7 +19,9 @@ def test_dummify(benchmark, basic_test_data): basic_test_data["n_col_x_encoded"], ) - assert np.array_equal(computed.toarray(), basic_test_data["slices"].toarray()) + assert np.array_equal( + computed.toarray(), basic_test_data["slices"].toarray() + ) def test_maintain_top_k(benchmark, basic_test_data): @@ -278,7 +280,9 @@ def test_create_and_score_basic_slices(benchmark, basic_test_data): [0.42499999999999993, 3, 1, 4], ] ) - assert np.array_equal(computed_slices.toarray(), basic_test_data["slices"].toarray()) + assert np.array_equal( + computed_slices.toarray(), basic_test_data["slices"].toarray() + ) assert np.array_equal(computed_statistics, expected_r) @@ -308,7 +312,9 @@ def test_get_pair_candidates(benchmark, basic_test_data): basic_test_data["feature_offset_start"], basic_test_data["feature_offset_end"], ) - assert np.array_equal(computed.toarray(), basic_test_data["candidates"].toarray()) + assert np.array_equal( + computed.toarray(), basic_test_data["candidates"].toarray() + ) def test_get_pair_candidates_with_missing_parents_pruning(