Skip to content

Commit

Permalink
fix: invalid predicate optimization (pola-rs#13313)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Dec 29, 2023
1 parent f14ac23 commit e27039f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/physical_plan/expressions/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ impl ColumnExpr {
&& _state.ext_contexts.is_empty()
&& std::env::var("POLARS_NO_SCHEMA_CHECK").is_err()
{
panic!("invalid schema")
panic!("invalid schema: df {:?}; column: {}", df, &self.name)
}
}
// in release we fallback to linear search
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,24 +117,28 @@ pub(super) fn process_join(

debug_assert_aexpr_allows_predicate_pushdown(predicate, expr_arena);

if check_input_node(predicate, &schema_left, expr_arena) && !block_pushdown_left {
if !block_pushdown_left && check_input_node(predicate, &schema_left, expr_arena) {
insert_and_combine_predicate(&mut pushdown_left, predicate, expr_arena);
filter_left = true;
// If we push down to the left and all predicate columns are also
// join columns, we also push down right for inner, left or semi join
if all_pred_cols_in_left_on(predicate, expr_arena, &left_on) {
filter_right = match &options.args.how {
JoinType::Inner | JoinType::Left => true,
// TODO! if join_on right has a different name
// we can set this to `true` IFF we rename the predicate
JoinType::Inner | JoinType::Left => {
check_input_node(predicate, &schema_right, expr_arena)
},
#[cfg(feature = "semi_anti_join")]
JoinType::Semi => true,
JoinType::Semi => check_input_node(predicate, &schema_right, expr_arena),
_ => false,
}
}
// this is `else if` because if the predicate is in the left hand side
// the right hand side should be renamed with the suffix.
// in that case we should not push down as the user wants to filter on `x`
// not on `x_rhs`.
} else if check_input_node(predicate, &schema_right, expr_arena) && !block_pushdown_right {
} else if !block_pushdown_right && check_input_node(predicate, &schema_right, expr_arena) {
filter_right = true
}
if filter_right {
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/unit/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,12 @@ def test_hconcat_predicate() -> None:

result = query.collect(predicate_pushdown=True)
assert_frame_equal(result, expected)


def test_predicate_pd_join_13300() -> None:
lf = pl.LazyFrame({"col3": range(10, 14), "new_col": range(11, 15)})
lf_other = pl.LazyFrame({"col4": [0, 11, 2, 13]})

lf = lf.join(lf_other, left_on="new_col", right_on="col4", how="left")
lf = lf.filter(pl.col("new_col") < 12)
assert lf.collect().to_dict(as_series=False) == {"col3": [10], "new_col": [11]}

0 comments on commit e27039f

Please sign in to comment.