Skip to content

Commit

Permalink
Capture column level dependencies for CTEs and sub-queries
Browse files Browse the repository at this point in the history
Previously they weren't captured if some transformation is applied on
columns referenced from CTEs or Aliased relation.
  • Loading branch information
Praveen2112 committed Dec 20, 2021
1 parent c13f952 commit 087dc9f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1640,14 +1640,16 @@ private Scope createScopeForCommonTableExpression(Table table, Optional<Scope> s
for (int i = 0; i < queryDescriptor.getAllFieldCount(); i++) {
Field inputField = queryDescriptor.getFieldByIndex(i);
if (!inputField.isHidden()) {
fieldBuilder.add(Field.newQualified(
Field field = Field.newQualified(
QualifiedName.of(table.getName().getSuffix()),
Optional.of(aliases.next().getValue()),
inputField.getType(),
false,
inputField.getOriginTable(),
inputField.getOriginColumnName(),
inputField.isAliased()));
inputField.isAliased());
fieldBuilder.add(field);
analysis.addSourceColumns(field, analysis.getSourceColumns(inputField));
}
}
fields = fieldBuilder.build();
Expand All @@ -1657,14 +1659,16 @@ private Scope createScopeForCommonTableExpression(Table table, Optional<Scope> s
for (int i = 0; i < queryDescriptor.getAllFieldCount(); i++) {
Field inputField = queryDescriptor.getFieldByIndex(i);
if (!inputField.isHidden()) {
fieldBuilder.add(Field.newQualified(
Field field = Field.newQualified(
QualifiedName.of(table.getName().getSuffix()),
inputField.getName(),
inputField.getType(),
false,
inputField.getOriginTable(),
inputField.getOriginColumnName(),
inputField.isAliased()));
inputField.isAliased());
fieldBuilder.add(field);
analysis.addSourceColumns(field, analysis.getSourceColumns(inputField));
}
}
fields = fieldBuilder.build();
Expand Down Expand Up @@ -2050,14 +2054,27 @@ protected Scope visitAliasedRelation(AliasedRelation relation, Optional<Scope> s
}

List<String> aliases = null;
Collection<Field> inputFields = relationType.getAllFields();
if (relation.getColumnNames() != null) {
aliases = relation.getColumnNames().stream()
.map(Identifier::getValue)
.collect(Collectors.toList());
// hidden fields are not exposed when there are column aliases
inputFields = relationType.getVisibleFields();
}

RelationType descriptor = relationType.withAlias(relation.getAlias().getValue(), aliases);

checkArgument(inputFields.size() == descriptor.getAllFieldCount(),
"Expected %s fields, got %s",
descriptor.getAllFieldCount(),
inputFields.size());

Streams.forEachPair(
descriptor.getAllFields().stream(),
inputFields.stream(),
(newField, field) -> analysis.addSourceColumns(newField, analysis.getSourceColumns(field)));

return createAndAssignScope(relation, scope, descriptor);
}

Expand Down Expand Up @@ -3970,6 +3987,11 @@ private Scope setAliases(Scope scope, Identifier tableName, List<Identifier> col
RelationType oldDescriptor = scope.getRelationType();
validateColumnAliases(columnNames, oldDescriptor.getVisibleFieldCount());
RelationType newDescriptor = oldDescriptor.withAlias(tableName.getValue(), columnNames.stream().map(Identifier::getValue).collect(toImmutableList()));

Streams.forEachPair(
oldDescriptor.getAllFields().stream(),
newDescriptor.getAllFields().stream(),
(newField, field) -> analysis.addSourceColumns(newField, analysis.getSourceColumns(field)));
return scope.withRelationType(newDescriptor);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,40 @@ public void testOutputColumnsWithClause()
throws Exception
{
assertLineage(
"WITH w AS (SELECT * FROM orders) SELECT clerk AS test_varchar, orderkey AS test_bigint FROM w",
"WITH w AS (SELECT * FROM orders) SELECT lower(clerk) AS test_varchar, orderkey AS test_bigint FROM w",
ImmutableSet.of("tpch.tiny.orders"),
new OutputColumnMetadata("test_varchar", VARCHAR_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "clerk"))),
new OutputColumnMetadata("test_bigint", BIGINT_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "orderkey"))));
}

@Test
public void testOutputColumnsColumnAliasInWithClause()
throws Exception
{
assertLineage(
"WITH w(aliased_clerk, aliased_orderkey) AS (SELECT clerk, orderkey FROM orders) SELECT lower(aliased_clerk) AS test_varchar, aliased_orderkey AS test_bigint FROM w",
ImmutableSet.of("tpch.tiny.orders"),
new OutputColumnMetadata("test_varchar", VARCHAR_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "clerk"))),
new OutputColumnMetadata("test_bigint", BIGINT_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "orderkey"))));
}

@Test
public void testOutputColumnsWithAliasedRelation()
throws Exception
{
assertLineage(
"SELECT lower(clerk) AS test_varchar, orderkey AS test_bigint FROM (SELECT * FROM orders) w",
ImmutableSet.of("tpch.tiny.orders"),
new OutputColumnMetadata("test_varchar", VARCHAR_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "clerk"))),
new OutputColumnMetadata("test_bigint", BIGINT_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "orderkey"))));
}

@Test
public void testOutputColumnsWithColumnAliasInAliasedRelation()
throws Exception
{
assertLineage(
"SELECT lower(aliased_clerk) AS test_varchar, aliased_orderkey AS test_bigint FROM (SELECT clerk, orderkey FROM orders) w(aliased_clerk, aliased_orderkey)",
ImmutableSet.of("tpch.tiny.orders"),
new OutputColumnMetadata("test_varchar", VARCHAR_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "clerk"))),
new OutputColumnMetadata("test_bigint", BIGINT_TYPE, ImmutableSet.of(new ColumnDetail("tpch", "tiny", "orders", "orderkey"))));
Expand Down

0 comments on commit 087dc9f

Please sign in to comment.