Skip to content

Commit

Permalink
*: AnalyzeV2 supports processing special global indexes (pingcap#56115)
Browse files Browse the repository at this point in the history
  • Loading branch information
Defined2014 authored Sep 25, 2024
1 parent a5e07a2 commit 75d9830
Show file tree
Hide file tree
Showing 21 changed files with 518 additions and 67 deletions.
5 changes: 5 additions & 0 deletions errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1561,6 +1561,11 @@ error = '''
Global Index is needed for index '%-.192s', since the unique index is not including all partitioning columns, and GLOBAL is not given as IndexOption
'''

["ddl:8265"]
error = '''
Auto analyze is not effective for index '%-.192s', need analyze manually
'''

["domain:8027"]
error = '''
Information schema is out of date: schema failed to update in 1 lease, please make sure TiDB can connect to TiKV
Expand Down
6 changes: 3 additions & 3 deletions pkg/ddl/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ func buildTableInfoWithCheck(ctx sessionctx.Context, s *ast.CreateTableStmt, dbC
if err = checkTableInfoValidWithStmt(ctx, tbInfo, s); err != nil {
return nil, err
}
if err = checkTableInfoValidExtra(tbInfo); err != nil {
if err = checkTableInfoValidExtra(ctx, tbInfo); err != nil {
return nil, err
}
return tbInfo, nil
Expand Down Expand Up @@ -507,7 +507,7 @@ func checkGeneratedColumn(ctx sessionctx.Context, schemaName pmodel.CIStr, table
// name length and column count.
// (checkTableInfoValid is also used in repairing objects which don't perform
// these checks. Perhaps the two functions should be merged together regardless?)
func checkTableInfoValidExtra(tbInfo *model.TableInfo) error {
func checkTableInfoValidExtra(ctx sessionctx.Context, tbInfo *model.TableInfo) error {
if err := checkTooLongTable(tbInfo.Name); err != nil {
return err
}
Expand All @@ -527,7 +527,7 @@ func checkTableInfoValidExtra(tbInfo *model.TableInfo) error {
if err := checkColumnsAttributes(tbInfo.Columns); err != nil {
return errors.Trace(err)
}
if err := checkGlobalIndexes(tbInfo); err != nil {
if err := checkGlobalIndexes(ctx, tbInfo); err != nil {
return errors.Trace(err)
}

Expand Down
23 changes: 19 additions & 4 deletions pkg/ddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ func checkInvisibleIndexOnPK(tblInfo *model.TableInfo) error {
}

// checkGlobalIndex check if the index is allowed to have global index
func checkGlobalIndex(tblInfo *model.TableInfo, indexInfo *model.IndexInfo) error {
func checkGlobalIndex(ctx sessionctx.Context, tblInfo *model.TableInfo, indexInfo *model.IndexInfo) error {
pi := tblInfo.GetPartitionInfo()
isPartitioned := pi != nil && pi.Type != pmodel.PartitionTypeNone
if indexInfo.Global {
Expand All @@ -974,14 +974,15 @@ func checkGlobalIndex(tblInfo *model.TableInfo, indexInfo *model.IndexInfo) erro
if inAllPartitionColumns {
return dbterror.ErrGeneralUnsupportedDDL.GenWithStackByArgs("Global Index including all columns in the partitioning expression")
}
validateGlobalIndexWithGeneratedColumns(ctx.GetSessionVars().StmtCtx.ErrCtx(), tblInfo, indexInfo.Name.O, indexInfo.Columns)
}
return nil
}

// checkGlobalIndexes check if global index is supported.
func checkGlobalIndexes(tblInfo *model.TableInfo) error {
func checkGlobalIndexes(ctx sessionctx.Context, tblInfo *model.TableInfo) error {
for _, indexInfo := range tblInfo.Indices {
err := checkGlobalIndex(tblInfo, indexInfo)
err := checkGlobalIndex(ctx, tblInfo, indexInfo)
if err != nil {
return err
}
Expand Down Expand Up @@ -1090,7 +1091,7 @@ func (e *executor) createTableWithInfoJob(
}
}

if err := checkTableInfoValidExtra(tbInfo); err != nil {
if err := checkTableInfoValidExtra(ctx, tbInfo); err != nil {
return nil, err
}

Expand Down Expand Up @@ -4557,6 +4558,7 @@ func (e *executor) CreatePrimaryKey(ctx sessionctx.Context, ti ast.Ident, indexN
if indexOption == nil || !indexOption.Global {
return dbterror.ErrGlobalIndexNotExplicitlySet.GenWithStackByArgs("PRIMARY")
}
validateGlobalIndexWithGeneratedColumns(ctx.GetSessionVars().StmtCtx.ErrCtx(), tblInfo, indexName.O, indexColumns)
}
}

Expand Down Expand Up @@ -4716,6 +4718,7 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast
if !globalIndex {
return dbterror.ErrGlobalIndexNotExplicitlySet.GenWithStackByArgs(indexName.O)
}
validateGlobalIndexWithGeneratedColumns(ctx.GetSessionVars().StmtCtx.ErrCtx(), tblInfo, indexName.O, indexColumns)
} else if globalIndex {
// TODO: remove this restriction
return dbterror.ErrGeneralUnsupportedDDL.GenWithStackByArgs("Global IndexOption on index including all columns in the partitioning expression")
Expand Down Expand Up @@ -5165,6 +5168,18 @@ func validateCommentLength(ec errctx.Context, sqlMode mysql.SQLMode, name string
return *comment, nil
}

func validateGlobalIndexWithGeneratedColumns(ec errctx.Context, tblInfo *model.TableInfo, indexName string, indexColumns []*model.IndexColumn) {
// Auto analyze is not effective when a global index contains prefix columns or virtual generated columns.
for _, col := range indexColumns {
colInfo := tblInfo.Columns[col.Offset]
isPrefixCol := col.Length != types.UnspecifiedLength
if colInfo.IsVirtualGenerated() || isPrefixCol {
ec.AppendWarning(dbterror.ErrWarnGlobalIndexNeedManuallyAnalyze.FastGenByArgs(indexName))
return
}
}
}

// BuildAddedPartitionInfo build alter table add partition info
func BuildAddedPartitionInfo(ctx expression.BuildContext, meta *model.TableInfo, spec *ast.AlterTableSpec) (*model.PartitionInfo, error) {
numParts := uint64(0)
Expand Down
2 changes: 2 additions & 0 deletions pkg/errno/errcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,8 @@ const (

ErrGlobalIndexNotExplicitlySet = 8264

ErrWarnGlobalIndexNeedManuallyAnalyze = 8265

// Resource group errors.
ErrResourceGroupExists = 8248
ErrResourceGroupNotExists = 8249
Expand Down
2 changes: 2 additions & 0 deletions pkg/errno/errname.go
Original file line number Diff line number Diff line change
Expand Up @@ -1174,4 +1174,6 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{
ErrBDRRestrictedDDL: mysql.Message("The operation is not allowed while the bdr role of this cluster is set to %s.", nil),

ErrGlobalIndexNotExplicitlySet: mysql.Message("Global Index is needed for index '%-.192s', since the unique index is not including all partitioning columns, and GLOBAL is not given as IndexOption", nil),

ErrWarnGlobalIndexNeedManuallyAnalyze: mysql.Message("Auto analyze is not effective for index '%-.192s', need analyze manually", nil),
}
3 changes: 1 addition & 2 deletions pkg/executor/analyze_col_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,8 @@ func (e *AnalyzeColumnsExecV2) analyzeColumnsPushDownV2(gp *gp.Pool) *statistics
isSpecial := false
for _, col := range idx.Columns {
colInfo := e.colsInfo[col.Offset]
isVirtualCol := colInfo.IsGenerated() && !colInfo.GeneratedStored
isPrefixCol := col.Length != types.UnspecifiedLength
if isVirtualCol || isPrefixCol {
if colInfo.IsVirtualGenerated() || isPrefixCol {
isSpecial = true
break
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/executor/analyze_idx.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ func analyzeIndexPushdown(idxExec *AnalyzeIndexExec) *statistics.AnalyzeResults
Count: cnt,
Snapshot: idxExec.snapshot,
}
if idxExec.idxInfo.MVIndex {
result.ForMVIndex = true
if idxExec.idxInfo.MVIndex || (idxExec.idxInfo.Global && statsVer == statistics.Version2) {
result.ForMVIndexOrGlobalIndex = true
}
return result
}
Expand Down
170 changes: 123 additions & 47 deletions pkg/planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2304,18 +2304,24 @@ func getColOffsetForAnalyze(colsInfo []*model.ColumnInfo, colID int64) int {
// in the execution phase of ANALYZE, we need to modify index.Columns[i].Offset according to colInfos.
// TODO: find a better way to find indexed columns in ANALYZE rather than use IndexColumn.Offset
// For multi-valued index, we need to collect it separately here and analyze it as independent index analyze task.
// For a special global index, we also need to analyze it as independent index analyze task.
// See comments for AnalyzeResults.ForMVIndex for more details.
func getModifiedIndexesInfoForAnalyze(
tblInfo *model.TableInfo,
allColumns bool,
colsInfo []*model.ColumnInfo,
) ([]*model.IndexInfo, []*model.IndexInfo) {
) ([]*model.IndexInfo, []*model.IndexInfo, []*model.IndexInfo) {
idxsInfo := make([]*model.IndexInfo, 0, len(tblInfo.Indices))
independentIdxsInfo := make([]*model.IndexInfo, 0)
specialGlobalIdxsInfo := make([]*model.IndexInfo, 0)
for _, originIdx := range tblInfo.Indices {
if originIdx.State != model.StatePublic {
continue
}
if handleutil.IsSpecialGlobalIndex(originIdx, tblInfo) {
specialGlobalIdxsInfo = append(specialGlobalIdxsInfo, originIdx)
continue
}
if originIdx.MVIndex {
independentIdxsInfo = append(independentIdxsInfo, originIdx)
continue
Expand All @@ -2333,7 +2339,7 @@ func getModifiedIndexesInfoForAnalyze(
}
idxsInfo = append(idxsInfo, idx)
}
return idxsInfo, independentIdxsInfo
return idxsInfo, independentIdxsInfo, specialGlobalIdxsInfo
}

// filterSkipColumnTypes filters out columns whose types are in the skipTypes list.
Expand Down Expand Up @@ -2370,6 +2376,43 @@ func (b *PlanBuilder) filterSkipColumnTypes(origin []*model.ColumnInfo, tbl *res
return
}

// This function is to check whether all indexes is special global index or not.
// A special global index is an index that is both a global index and an expression index or a prefix index.
func checkIsAllSpecialGlobalIndex(as *ast.AnalyzeTableStmt, tbl *resolve.TableNameW) (bool, error) {
isAnalyzeTable := len(as.PartitionNames) == 0

// For `Analyze table t index`
if as.IndexFlag && len(as.IndexNames) == 0 {
for _, idx := range tbl.TableInfo.Indices {
if idx.State != model.StatePublic {
continue
}
if !handleutil.IsSpecialGlobalIndex(idx, tbl.TableInfo) {
return false, nil
}
// For `Analyze table t partition p0 index`
if !isAnalyzeTable {
return false, errors.NewNoStackErrorf("Analyze global index '%s' can't work with analyze specified partitions", idx.Name.O)
}
}
} else {
for _, idxName := range as.IndexNames {
idx := tbl.TableInfo.FindIndexByName(idxName.L)
if idx == nil || idx.State != model.StatePublic {
return false, plannererrors.ErrAnalyzeMissIndex.GenWithStackByArgs(idxName.O, tbl.Name.O)
}
if !handleutil.IsSpecialGlobalIndex(idx, tbl.TableInfo) {
return false, nil
}
// For `Analyze table t partition p0 index idx0`
if !isAnalyzeTable {
return false, errors.NewNoStackErrorf("Analyze global index '%s' can't work with analyze specified partitions", idx.Name.O)
}
}
}
return true, nil
}

func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
as *ast.AnalyzeTableStmt,
analyzePlan *Analyze,
Expand All @@ -2385,6 +2428,13 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackError("The version 2 stats would ignore the INCREMENTAL keyword and do full sampling"))
}

isAnalyzeTable := len(as.PartitionNames) == 0

allSpecialGlobalIndex, err := checkIsAllSpecialGlobalIndex(as, tbl)
if err != nil {
return err
}

astOpts, err := handleAnalyzeOptionsV2(as.AnalyzeOpts)
if err != nil {
return err
Expand All @@ -2405,7 +2455,7 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
if err != nil {
return err
}
isAnalyzeTable := len(as.PartitionNames) == 0

optionsMap, colsInfoMap, err := b.genV2AnalyzeOptions(persistOpts, tbl, isAnalyzeTable, physicalIDs, astOpts, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns)
if err != nil {
return err
Expand All @@ -2414,51 +2464,79 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask(
analyzePlan.OptionsMap[physicalID] = opts
}

// Build tasks for each partition.
for i, id := range physicalIDs {
physicalID := id
if id == tbl.TableInfo.ID {
id = statistics.NonPartitionTableID
}
info := AnalyzeInfo{
DBName: tbl.Schema.O,
TableName: tbl.Name.O,
PartitionName: partitionNames[i],
TableID: statistics.AnalyzeTableID{TableID: tbl.TableInfo.ID, PartitionID: id},
StatsVersion: version,
var indexes, independentIndexes, specialGlobalIndexes []*model.IndexInfo

needAnalyzeCols := !(as.IndexFlag && allSpecialGlobalIndex)

if needAnalyzeCols {
if as.IndexFlag {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackErrorf("The version 2 would collect all statistics not only the selected indexes"))
}
if optsV2, ok := optionsMap[physicalID]; ok {
info.V2Options = &optsV2
}
execColsInfo := astColsInfo
if colsInfo, ok := colsInfoMap[physicalID]; ok {
execColsInfo = colsInfo
}
execColsInfo = b.filterSkipColumnTypes(execColsInfo, tbl, &mustAnalyzedCols)
allColumns := len(tbl.TableInfo.Columns) == len(execColsInfo)
indexes, independentIndexes := getModifiedIndexesInfoForAnalyze(tbl.TableInfo, allColumns, execColsInfo)
handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
newTask := AnalyzeColumnsTask{
HandleCols: handleCols,
ColsInfo: execColsInfo,
AnalyzeInfo: info,
TblInfo: tbl.TableInfo,
Indexes: indexes,
}
if newTask.HandleCols == nil {
extraCol := model.NewExtraHandleColInfo()
// Always place _tidb_rowid at the end of colsInfo, this is corresponding to logics in `analyzeColumnsPushdown`.
newTask.ColsInfo = append(newTask.ColsInfo, extraCol)
newTask.HandleCols = util.NewIntHandleCols(colInfoToColumn(extraCol, len(newTask.ColsInfo)-1))
}
analyzePlan.ColTasks = append(analyzePlan.ColTasks, newTask)
for _, indexInfo := range independentIndexes {
newIdxTask := AnalyzeIndexTask{
IndexInfo: indexInfo,
TblInfo: tbl.TableInfo,
// Build tasks for each partition.
for i, id := range physicalIDs {
physicalID := id
if id == tbl.TableInfo.ID {
id = statistics.NonPartitionTableID
}
info := AnalyzeInfo{
DBName: tbl.Schema.O,
TableName: tbl.Name.O,
PartitionName: partitionNames[i],
TableID: statistics.AnalyzeTableID{TableID: tbl.TableInfo.ID, PartitionID: id},
StatsVersion: version,
}
if optsV2, ok := optionsMap[physicalID]; ok {
info.V2Options = &optsV2
}
execColsInfo := astColsInfo
if colsInfo, ok := colsInfoMap[physicalID]; ok {
execColsInfo = colsInfo
}
execColsInfo = b.filterSkipColumnTypes(execColsInfo, tbl, &mustAnalyzedCols)
allColumns := len(tbl.TableInfo.Columns) == len(execColsInfo)
indexes, independentIndexes, specialGlobalIndexes = getModifiedIndexesInfoForAnalyze(tbl.TableInfo, allColumns, execColsInfo)
handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo)
newTask := AnalyzeColumnsTask{
HandleCols: handleCols,
ColsInfo: execColsInfo,
AnalyzeInfo: info,
TblInfo: tbl.TableInfo,
Indexes: indexes,
}
if newTask.HandleCols == nil {
extraCol := model.NewExtraHandleColInfo()
// Always place _tidb_rowid at the end of colsInfo, this is corresponding to logics in `analyzeColumnsPushdown`.
newTask.ColsInfo = append(newTask.ColsInfo, extraCol)
newTask.HandleCols = util.NewIntHandleCols(colInfoToColumn(extraCol, len(newTask.ColsInfo)-1))
}
analyzePlan.ColTasks = append(analyzePlan.ColTasks, newTask)
for _, indexInfo := range independentIndexes {
newIdxTask := AnalyzeIndexTask{
IndexInfo: indexInfo,
TblInfo: tbl.TableInfo,
AnalyzeInfo: info,
}
analyzePlan.IdxTasks = append(analyzePlan.IdxTasks, newIdxTask)
}
}
}

if isAnalyzeTable {
if needAnalyzeCols {
// When `needAnalyzeCols == true`, non-global indexes already covered by previous loop,
// deal with global index here.
for _, indexInfo := range specialGlobalIndexes {
analyzePlan.IdxTasks = append(analyzePlan.IdxTasks, generateIndexTasks(indexInfo, as, tbl.TableInfo, nil, nil, version)...)
}
} else {
// For `analyze table t index idx1[, idx2]` and all indexes are global index.
for _, idxName := range as.IndexNames {
idx := tbl.TableInfo.FindIndexByName(idxName.L)
if idx == nil || !handleutil.IsSpecialGlobalIndex(idx, tbl.TableInfo) {
continue
}
analyzePlan.IdxTasks = append(analyzePlan.IdxTasks, generateIndexTasks(idx, as, tbl.TableInfo, nil, nil, version)...)
}
analyzePlan.IdxTasks = append(analyzePlan.IdxTasks, newIdxTask)
}
}

Expand Down Expand Up @@ -2740,7 +2818,6 @@ func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt, opts map[ast.A
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackError("The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead"))
}
if version == statistics.Version2 {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackError("The version 2 would collect all statistics not only the selected indexes"))
return b.buildAnalyzeTable(as, opts, version)
}
for _, idxName := range as.IndexNames {
Expand Down Expand Up @@ -2793,7 +2870,6 @@ func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt, opts map[as
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackErrorf("The analyze version from the session is not compatible with the existing statistics of the table. Use the existing version instead"))
}
if version == statistics.Version2 {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackErrorf("The version 2 would collect all statistics not only the selected indexes"))
return b.buildAnalyzeTable(as, opts, version)
}
for _, idx := range tblInfo.Indices {
Expand Down
5 changes: 4 additions & 1 deletion pkg/statistics/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ type AnalyzeResults struct {
// take care of those table-level fields.
// In conclusion, when saving the analyze result for mv index, we need to store the index stats, as for the
// table-level fields, we only need to update the version.
ForMVIndex bool
//
// The global index has only one key range, so an independent task is used to process it.
// Global index needs to update only the version at the table-level fields, just like mv index.
ForMVIndexOrGlobalIndex bool
}

// DestroyAndPutToPool destroys the result and put it to the pool.
Expand Down
Loading

0 comments on commit 75d9830

Please sign in to comment.