Skip to content

Commit

Permalink
Enhance the query planner cost estimation for index scans to take int…
Browse files Browse the repository at this point in the history
…o account

WHERE clause terms that can be computed using only the index and that do not
require looking up rows in the original table.  This fixes an obscure
performance regression that arose when the ORDER BY LIMIT optimization was
added by check-in [bf46179d44843].
  • Loading branch information
D. Richard Hipp committed Jul 27, 2016
2 parents dc17d97 + ee33eb0 commit 81a2e69
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 7 deletions.
55 changes: 55 additions & 0 deletions src/expr.c
Original file line number Diff line number Diff line change
Expand Up @@ -3965,6 +3965,61 @@ int sqlite3ExprImpliesExpr(Expr *pE1, Expr *pE2, int iTab){
return 0;
}

/*
** An instance of the following structure is used by the tree walker
** to determine if an expression can be evaluated by reference to the
** index only, without having to do a search for the corresponding
** table entry. The IdxCover.pIdx field is the index. IdxCover.iCur
** is the cursor for the table.
*/
struct IdxCover {
Index *pIdx; /* The index to be tested for coverage */
int iCur; /* Cursor number for the table corresponding to the index */
};

/*
** Check to see if there are references to columns in table
** pWalker->u.pIdxCover->iCur can be satisfied using the index
** pWalker->u.pIdxCover->pIdx.
*/
static int exprIdxCover(Walker *pWalker, Expr *pExpr){
if( pExpr->op==TK_COLUMN
&& pExpr->iTable==pWalker->u.pIdxCover->iCur
&& sqlite3ColumnOfIndex(pWalker->u.pIdxCover->pIdx, pExpr->iColumn)<0
){
pWalker->eCode = 1;
return WRC_Abort;
}
return WRC_Continue;
}

/*
** Determine if an index pIdx on table with cursor iCur contains will
** the expression pExpr. Return true if the index does cover the
** expression and false if the pExpr expression references table columns
** that are not found in the index pIdx.
**
** An index covering an expression means that the expression can be
** evaluated using only the index and without having to lookup the
** corresponding table entry.
*/
int sqlite3ExprCoveredByIndex(
Expr *pExpr, /* The index to be tested */
int iCur, /* The cursor number for the corresponding table */
Index *pIdx /* The index that might be used for coverage */
){
Walker w;
struct IdxCover xcov;
memset(&w, 0, sizeof(w));
xcov.iCur = iCur;
xcov.pIdx = pIdx;
w.xExprCallback = exprIdxCover;
w.u.pIdxCover = &xcov;
sqlite3WalkExpr(&w, pExpr);
return !w.eCode;
}


/*
** An instance of the following structure is used by the tree walker
** to count references to table columns in the arguments of an
Expand Down
2 changes: 2 additions & 0 deletions src/sqliteInt.h
Original file line number Diff line number Diff line change
Expand Up @@ -3257,6 +3257,7 @@ struct Walker {
struct SrcCount *pSrcCount; /* Counting column references */
struct CCurHint *pCCurHint; /* Used by codeCursorHint() */
int *aiCol; /* array of column indexes */
struct IdxCover *pIdxCover; /* Check for index coverage */
} u;
};

Expand Down Expand Up @@ -3700,6 +3701,7 @@ int sqlite3ExprListCompare(ExprList*, ExprList*, int);
int sqlite3ExprImpliesExpr(Expr*, Expr*, int);
void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*);
void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*);
int sqlite3ExprCoveredByIndex(Expr*, int iCur, Index *pIdx);
int sqlite3FunctionUsesThisSrc(Expr*, SrcList*);
Vdbe *sqlite3GetVdbe(Parse*);
#ifndef SQLITE_OMIT_BUILTIN_TEST
Expand Down
33 changes: 28 additions & 5 deletions src/where.c
Original file line number Diff line number Diff line change
Expand Up @@ -2478,11 +2478,11 @@ static int whereLoopAddBtreeIndex(
pNew->nSkip++;
pNew->aLTerm[pNew->nLTerm++] = 0;
pNew->wsFlags |= WHERE_SKIPSCAN;
nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
nIter = pProbe->aiRowLogEst[saved_nEq]+1 - pProbe->aiRowLogEst[saved_nEq+1];
pNew->nOut -= nIter;
/* TUNING: Because uncertainties in the estimates for skip-scan queries,
** add a 1.375 fudge factor to make skip-scan slightly less likely. */
nIter += 5;
nIter += 4;
whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
pNew->nOut = saved_nOut;
pNew->u.btree.nEq = saved_nEq;
Expand Down Expand Up @@ -2775,11 +2775,34 @@ static int whereLoopAddBtree(

/* The cost of visiting the index rows is N*K, where K is
** between 1.1 and 3.0, depending on the relative sizes of the
** index and table rows. If this is a non-covering index scan,
** also add the cost of visiting table rows (N*3.0). */
** index and table rows. */
pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
if( m!=0 ){
pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
/* If this is a non-covering index scan, add in the cost of
** doing table lookups. The cost will be 3x the number of
** lookups. Take into account WHERE clause terms that can be
** satisfied using just the index, and that do not require a
** table lookup. */
LogEst nLookup = rSize + 16; /* Base cost: N*3 */
int ii;
int iCur = pSrc->iCursor;
WhereClause *pWC = &pWInfo->sWC;
for(ii=0; ii<pWC->nTerm; ii++){
WhereTerm *pTerm = &pWC->a[ii];
if( !sqlite3ExprCoveredByIndex(pTerm->pExpr, iCur, pProbe) ){
break;
}
/* pTerm can be evaluated using just the index. So reduce
** the expected number of table lookups accordingly */
if( pTerm->truthProb<=0 ){
nLookup += pTerm->truthProb;
}else{
nLookup--;
if( pTerm->eOperator & (WO_EQ|WO_IS) ) nLookup -= 19;
}
}

pNew->rRun = sqlite3LogEstAdd(pNew->rRun, nLookup);
}
ApplyCostMultiplier(pNew->rRun, pTab->costMult);
whereLoopOutputAdjust(pWC, pNew, rSize);
Expand Down
60 changes: 60 additions & 0 deletions test/index8.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# 2016-07-27
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test cases for ORDER BY and LIMIT on an index scan.
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Performance regression reported at
# http://www.mail-archive.com/[email protected]/msg98615.html
#
# Caused by the ORDER BY LIMIT optionation for check-in
# https://sqlite.org/src/info/bf46179d44843769
#
# Fixed on approximately 2016-07-27 by changes that compute a better score
# for index scans by taking into account WHERE clause constraints that can
# be handled by the index and do not require a table lookup.
#
do_execsql_test 1.0 {
CREATE TABLE t1(a,b,c,d);
WITH RECURSIVE c(x) AS (VALUES(0) UNION ALL SELECT x+1 FROM c WHERE x<100)
INSERT INTO t1(a,b,c,d)
SELECT x/10, x%10, x%19, x FROM c;
CREATE INDEX t1abc ON t1(a,b,c);
SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {0 4 4 4 2 3 4 23}

# Prior to the fix, the following EQP would show a table scan and a sort
# rather than an index scan.
#
do_execsql_test 1.0eqp {
EXPLAIN QUERY PLAN
SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {/SCAN TABLE t1 USING INDEX t1abc/}

# If we change the index so that it no longer covers the WHERE clause,
# then we should (correctly) revert to using a table scan.
#
do_execsql_test 1.1 {
DROP INDEX t1abc;
CREATE INDEX t1abd ON t1(a,b,d);
SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {0 4 4 4 2 3 4 23}
do_execsql_test 1.1eqp {
EXPLAIN QUERY PLAN
SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {~/USING INDEX/}


finish_test
4 changes: 2 additions & 2 deletions test/scanstatus.test
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ do_execsql_test 5.3.2 {
SELECT count(*) FROM t2 WHERE y = 'j';
} {19}
do_scanstatus_test 5.3.3 {
nLoop 1 nVisit 19 nEst 56.0 zName t2xy zExplain
nLoop 1 nVisit 19 nEst 52.0 zName t2xy zExplain
{SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

Expand All @@ -349,7 +349,7 @@ do_execsql_test 5.4.2 {
do_scanstatus_test 5.4.3 {
nLoop 1 nVisit 10 nEst 10.0 zName t1bc
zExplain {SCAN TABLE t1 USING COVERING INDEX t1bc}
nLoop 10 nVisit 200 nEst 56.0 zName t2xy
nLoop 10 nVisit 200 nEst 52.0 zName t2xy
zExplain {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

Expand Down

0 comments on commit 81a2e69

Please sign in to comment.