Skip to content

Commit

Permalink
NIFI-1706: Extend QueryDatabaseTable to support arbitrary queries
Browse files Browse the repository at this point in the history
- Only include Maximum Value columns in the type map.
- Squashed commits in the previous PR
- Rebased against the latest master
- Added stop method to GenerateTableFetch so that it refreshes the
column type map when it gets restarted
- Fixed whitespacing around if/for statement
- Updated expressionLanguageSupported value since it is not auto-merged
correctly

This closes apache#2618.

Signed-off-by: Koji Kawamura <[email protected]>
  • Loading branch information
patricker authored and ijokarumawak committed Apr 13, 2018
1 parent b7272e3 commit 82ac815
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
Expand Down Expand Up @@ -152,12 +153,22 @@ public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFact
public static final PropertyDescriptor WHERE_CLAUSE = new PropertyDescriptor.Builder()
.name("db-fetch-where-clause")
.displayName("Additional WHERE clause")
.description("A custom clause to be added in the WHERE condition when generating SQL requests.")
.description("A custom clause to be added in the WHERE condition when building SQL queries.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();

public static final PropertyDescriptor SQL_QUERY = new PropertyDescriptor.Builder()
.name("db-fetch-sql-query")
.displayName("Custom Query")
.description("A custom SQL query used to retrieve data. Instead of building a SQL query from "
+ "other properties, this query will be wrapped as a sub-query. Query must have no ORDER BY statement.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();

protected List<PropertyDescriptor> propDescriptors;

// The delimiter to use when referencing qualified names (such as table@!@column in the state map)
Expand Down Expand Up @@ -246,6 +257,7 @@ public void setup(final ProcessContext context, boolean shouldCleanCache, FlowFi
// Try to fill the columnTypeMap with the types of the desired max-value columns
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String sqlQuery = context.getProperty(SQL_QUERY).evaluateAttributeExpressions().getValue();

final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
try (final Connection con = dbcpService.getConnection();
Expand All @@ -254,20 +266,52 @@ public void setup(final ProcessContext context, boolean shouldCleanCache, FlowFi
// Try a query that returns no rows, for the purposes of getting metadata about the columns. It is possible
// to use DatabaseMetaData.getColumns(), but not all drivers support this, notably the schema-on-read
// approach as in Apache Drill
String query = dbAdapter.getSelectStatement(tableName, maxValueColumnNames, "1 = 0", null, null, null);
String query;

if (StringUtils.isEmpty(sqlQuery)) {
query = dbAdapter.getSelectStatement(tableName, maxValueColumnNames, "1 = 0", null, null, null);
} else {
StringBuilder sbQuery = getWrappedQuery(sqlQuery, tableName);
sbQuery.append(" WHERE 1=0");

query = sbQuery.toString();
}

ResultSet resultSet = st.executeQuery(query);
ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
int numCols = resultSetMetaData.getColumnCount();
if (numCols > 0) {
if (shouldCleanCache) {
columnTypeMap.clear();
}

final List<String> maxValueColumnNameList = Arrays.asList(maxValueColumnNames.toLowerCase().split(","));
final List<String> maxValueQualifiedColumnNameList = new ArrayList<>();

for (String maxValueColumn:maxValueColumnNameList) {
String colKey = getStateKey(tableName, maxValueColumn.trim());
maxValueQualifiedColumnNameList.add(colKey);
}

for (int i = 1; i <= numCols; i++) {
String colName = resultSetMetaData.getColumnName(i).toLowerCase();
String colKey = getStateKey(tableName, colName);

//only include columns that are part of the maximum value tracking column list
if (!maxValueQualifiedColumnNameList.contains(colKey)) {
continue;
}

int colType = resultSetMetaData.getColumnType(i);
columnTypeMap.putIfAbsent(colKey, colType);
}

for (String maxValueColumn:maxValueColumnNameList) {
String colKey = getStateKey(tableName, maxValueColumn.trim().toLowerCase());
if (!columnTypeMap.containsKey(colKey)) {
throw new ProcessException("Column not found in the table/query specified: " + maxValueColumn);
}
}
} else {
throw new ProcessException("No columns found in table from those specified: " + maxValueColumnNames);
}
Expand All @@ -279,6 +323,10 @@ public void setup(final ProcessContext context, boolean shouldCleanCache, FlowFi
}
}

protected static StringBuilder getWrappedQuery(String sqlQuery, String tableName){
return new StringBuilder("SELECT * FROM (" + sqlQuery + ") AS " + tableName);
}

protected static String getMaxValueFromRow(ResultSet resultSet,
int columnIndex,
Integer type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
Expand Down Expand Up @@ -162,6 +163,12 @@ public void setup(final ProcessContext context) {
}
}

@OnStopped
public void stop() {
// Reset the column type map in case properties change
setupComplete.set(false);
}

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
// Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@
@InputRequirement(Requirement.INPUT_FORBIDDEN)
@Tags({"sql", "select", "jdbc", "query", "database"})
@SeeAlso({GenerateTableFetch.class, ExecuteSQL.class})
@CapabilityDescription("Generates and executes a SQL select query to fetch all rows whose values in the specified Maximum Value column(s) are larger than the "
@CapabilityDescription("Generates a SQL select query, or uses a provided statement, and executes it to fetch all rows whose values in the specified "
+ "Maximum Value column(s) are larger than the "
+ "previously-seen maxima. Query result will be converted to Avro format. Expression Language is supported for several properties, but no incoming "
+ "connections are permitted. The Variable Registry may be used to provide values for any property containing Expression Language. If it is desired to "
+ "leverage flow file attributes to perform these queries, the GenerateTableFetch and/or ExecuteSQL processors can be used for this purpose. "
Expand Down Expand Up @@ -168,8 +169,13 @@ public QueryDatabaseTable() {
final List<PropertyDescriptor> pds = new ArrayList<>();
pds.add(DBCP_SERVICE);
pds.add(DB_TYPE);
pds.add(TABLE_NAME);
pds.add(new PropertyDescriptor.Builder()
.fromPropertyDescriptor(TABLE_NAME)
.description("The name of the database table to be queried. When a custom query is used, this property is used to alias the query and appears as an attribute on the FlowFile.")
.build());
pds.add(COLUMN_NAMES);
pds.add(WHERE_CLAUSE);
pds.add(SQL_QUERY);
pds.add(MAX_VALUE_COLUMN_NAMES);
pds.add(QUERY_TIMEOUT);
pds.add(FETCH_SIZE);
Expand All @@ -180,7 +186,7 @@ public QueryDatabaseTable() {
pds.add(USE_AVRO_LOGICAL_TYPES);
pds.add(DEFAULT_PRECISION);
pds.add(DEFAULT_SCALE);
pds.add(WHERE_CLAUSE);

propDescriptors = Collections.unmodifiableList(pds);
}

Expand Down Expand Up @@ -220,6 +226,7 @@ public void onTrigger(final ProcessContext context, final ProcessSessionFactory
final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String sqlQuery = context.getProperty(SQL_QUERY).evaluateAttributeExpressions().getValue();
final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
Expand Down Expand Up @@ -275,7 +282,7 @@ public void onTrigger(final ProcessContext context, final ProcessSessionFactory
List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames)
? null
: Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
final String selectQuery = getQuery(dbAdapter, tableName, sqlQuery, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
final StopWatch stopWatch = new StopWatch(true);
final String fragmentIdentifier = UUID.randomUUID().toString();

Expand Down Expand Up @@ -404,10 +411,22 @@ public void onTrigger(final ProcessContext context, final ProcessSessionFactory

protected String getQuery(DatabaseAdapter dbAdapter, String tableName, String columnNames, List<String> maxValColumnNames,
String customWhereClause, Map<String, String> stateMap) {

return getQuery(dbAdapter, tableName, null, columnNames, maxValColumnNames, customWhereClause, stateMap);
}

protected String getQuery(DatabaseAdapter dbAdapter, String tableName, String sqlQuery, String columnNames, List<String> maxValColumnNames,
String customWhereClause, Map<String, String> stateMap) {
if (StringUtils.isEmpty(tableName)) {
throw new IllegalArgumentException("Table name must be specified");
}
final StringBuilder query = new StringBuilder(dbAdapter.getSelectStatement(tableName, columnNames, null, null, null, null));
final StringBuilder query;

if (StringUtils.isEmpty(sqlQuery)) {
query = new StringBuilder(dbAdapter.getSelectStatement(tableName, columnNames, null, null, null, null));
} else {
query = getWrappedQuery(sqlQuery, tableName);
}

List<String> whereClauses = new ArrayList<>();
// Check state map for last max values
Expand Down
Loading

0 comments on commit 82ac815

Please sign in to comment.