Skip to content

Commit

Permalink
Support partition pruning based on partition statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
shixuan-fan committed Jan 28, 2021
1 parent fc0c4d9 commit 90935e8
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ public class HiveClientConfig
private int partitionStatisticsSampleSize = 100;
private boolean ignoreCorruptedStatistics;
private boolean collectColumnStatisticsOnWrite;
private boolean partitionStatisticsBasedOptimizationEnabled;

private boolean s3SelectPushdownEnabled;
private int s3SelectPushdownMaxConnections = 500;
Expand Down Expand Up @@ -1281,6 +1282,19 @@ public HiveClientConfig setCollectColumnStatisticsOnWrite(boolean collectColumnS
return this;
}

public boolean isPartitionStatisticsBasedOptimizationEnabled()
{
return partitionStatisticsBasedOptimizationEnabled;
}

@Config("hive.partition-statistics-based-optimization-enabled")
@ConfigDescription("Enables partition statistics based optimization, including partition pruning and predicate stripping")
public HiveClientConfig setPartitionStatisticsBasedOptimizationEnabled(boolean partitionStatisticsBasedOptimizationEnabled)
{
this.partitionStatisticsBasedOptimizationEnabled = partitionStatisticsBasedOptimizationEnabled;
return this;
}

public boolean isS3SelectPushdownEnabled()
{
return s3SelectPushdownEnabled;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public final class HiveSessionProperties
private static final String PARTITION_STATISTICS_SAMPLE_SIZE = "partition_statistics_sample_size";
private static final String IGNORE_CORRUPTED_STATISTICS = "ignore_corrupted_statistics";
public static final String COLLECT_COLUMN_STATISTICS_ON_WRITE = "collect_column_statistics_on_write";
public static final String PARTITION_STATISTICS_BASED_OPTIMIZATION_ENABLED = "partition_stats_based_optimization_enabled";
private static final String OPTIMIZE_MISMATCHED_BUCKET_COUNT = "optimize_mismatched_bucket_count";
private static final String S3_SELECT_PUSHDOWN_ENABLED = "s3_select_pushdown_enabled";
public static final String SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE = "shuffle_partitioned_columns_for_table_write";
Expand Down Expand Up @@ -376,6 +377,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
"Experimental: Enables automatic column level statistics collection on write",
hiveClientConfig.isCollectColumnStatisticsOnWrite(),
false),
booleanProperty(
PARTITION_STATISTICS_BASED_OPTIMIZATION_ENABLED,
"Enables partition stats based optimization, including partition pruning and predicate stripping",
hiveClientConfig.isPartitionStatisticsBasedOptimizationEnabled(),
false),
booleanProperty(
OPTIMIZE_MISMATCHED_BUCKET_COUNT,
"Experimental: Enable optimization to avoid shuffle when bucket count is compatible but not the same",
Expand Down Expand Up @@ -776,6 +782,11 @@ public static boolean isCollectColumnStatisticsOnWrite(ConnectorSession session)
return session.getProperty(COLLECT_COLUMN_STATISTICS_ON_WRITE, Boolean.class);
}

public static boolean isPartitionStatisticsBasedOptimizationEnabled(ConnectorSession session)
{
return session.getProperty(PARTITION_STATISTICS_BASED_OPTIMIZATION_ENABLED, Boolean.class);
}

@Deprecated
public static boolean isOptimizedMismatchedBucketCount(ConnectorSession session)
{
Expand Down
Loading

0 comments on commit 90935e8

Please sign in to comment.