Skip to content

Commit

Permalink
IMPALA-8338 : Check CREATION_TIME while processing DROP_DATABASE events.
Browse files Browse the repository at this point in the history
Process the drop database event only if the CREATION_TIME of the
catalog's database object is lesser than or equal to that of the
database object present in the notification event. If the
CREATION_TIME in the notification event object is lesser than
the catalog's DB object, it means that the Database object
present in the catalog is the latest and we can skip the event
instead.

Testing :
 - Added unit tests in MetastoreEventsProcessorTest.
 - Enabled testCreateDropCreateDatabaseFromImpala as
   we now have CREATION_TIME in the notification events.

Change-Id: I8fd3685cf7261e4953f4f884850489a47c5bbd6c
Reviewed-on: http://gerrit.cloudera.org:8080/12938
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-on: https://gerrit.sjc.cloudera.com/c/cdh/impala/+/49579
Tested-by: Jenkins User <[email protected]>
Reviewed-by: Laszlo Gaal <[email protected]>
CDH-Build: Laszlo Gaal <[email protected]>
Quasar-L0: Laszlo Gaal <[email protected]>
Reviewed-by: Vihang Karajgaonkar <[email protected]>
  • Loading branch information
Bharath Krishna authored and gaallas committed Jun 21, 2019
1 parent 6fdeaf9 commit 325b5fe
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,39 @@ public Db removeDb(String dbName) {
}
}

/**
* @param msDb Metastore Database used to remove Db from Catalog
* @param dbFound Set to true if Database is found in Catalog
* @param dbMatched Set to true if Database is found in Catalog and it's CREATION_TIME
* is equal to the metastore DB
* @return the DB object removed. Return null if DB does not exist or was not removed
* because CREATION_TIME does not match.
*/
public Db removeDbIfExists(org.apache.hadoop.hive.metastore.api.Database msDb,
Reference<Boolean> dbFound, Reference<Boolean> dbMatched) {
dbFound.setRef(false);
dbMatched.setRef(false);
versionLock_.writeLock().lock();
try {
String dbName = msDb.getName();
Db catalogDb = getDb(dbName);
if (catalogDb == null) return null;

dbFound.setRef(true);
// Remove the DB only if the CREATION_TIME matches with the metastore DB from event.
if (msDb.getCreateTime() == catalogDb.getMetaStoreDb().getCreateTime()) {
Db removedDb = removeDb(dbName);
if (removedDb != null) {
dbMatched.setRef(true);
return removedDb;
}
}
return null;
} finally {
versionLock_.writeLock().unlock();
}
}

/**
* Helper function to clean up the state associated with a removed database. It creates
* the entries in the delete log for 'db' as well as for its tables and functions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hive.metastore.messaging.DropPartitionMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage;
import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage;
import org.apache.impala.analysis.TableName;
import org.apache.impala.catalog.CatalogException;
Expand Down Expand Up @@ -204,6 +205,8 @@ List<MetastoreEvent> getFilteredEvents(List<NotificationEvent> events)
}
LOG.info(String.format("Total number of events received: %d Total number of events "
+ "filtered out: %d", sizeBefore, numFilteredEvents));
metrics_.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC)
.inc(numFilteredEvents);
return metastoreEvents;
}
}
Expand Down Expand Up @@ -824,10 +827,7 @@ private CreateDatabaseEvent(CatalogServiceCatalog catalog, Metrics metrics,

/**
* Processes the create database event by adding the Db object from the event if it
* does not exist in the catalog already. TODO we should compare the creationTime of
* the Database in catalog with the Database in the event to make sure we are ignoring
* only catalog has the latest Database object. This will be added after HIVE-21077 is
* fixed and available
* does not exist in the catalog already.
*/
@Override
public void process() {
Expand Down Expand Up @@ -865,33 +865,55 @@ public boolean isRemovedAfter(List<MetastoreEvent> events) {
*/
public static class DropDatabaseEvent extends MetastoreDatabaseEvent {

// Metastore database object as parsed from NotificationEvent message
private final Database droppedDatabase_;

/**
* Prevent instantiation from outside should use MetastoreEventFactory instead
*/
private DropDatabaseEvent(
CatalogServiceCatalog catalog, Metrics metrics, NotificationEvent event) {
CatalogServiceCatalog catalog, Metrics metrics, NotificationEvent event)
throws MetastoreNotificationException {
super(catalog, metrics, event);
Preconditions.checkArgument(MetastoreEventType.DROP_DATABASE.equals(eventType_));
JSONDropDatabaseMessage dropDatabaseMessage =
(JSONDropDatabaseMessage) MetastoreEventsProcessor.getMessageFactory()
.getDeserializer().getDropDatabaseMessage(event.getMessage());
try {
droppedDatabase_ =
Preconditions.checkNotNull(dropDatabaseMessage.getDatabaseObject());
} catch (Exception e) {
throw new MetastoreNotificationException(debugString(
"Database object is null in the event. "
+ "This could be a metastore configuration problem. "
+ "Check if %s is set to true in metastore configuration",
MetastoreEventsProcessor.HMS_ADD_THRIFT_OBJECTS_IN_EVENTS_CONFIG_KEY), e);
}
}

/**
* Process the drop database event. Currently, this handler removes the db object from
* catalog. TODO Once we have HIVE-21077 we should compare creationTime to make sure
* that catalog's Db matches with the database object in the event
* Process the drop database event. This handler removes the db object from catalog
* only if the CREATION_TIME of the catalog's database object is lesser than or equal
* to that of the database object present in the notification event. If the
* CREATION_TIME of the catalog's DB object is greater than that of the notification
* event's DB object, it means that the Database object present in the catalog is a
* later version and we can skip the event. (For instance, when user does a create db,
* drop db and create db again with the same dbName.)
*/
@Override
public void process() {
// TODO this does not currently handle the case where the was a new instance
// of database with the same name created in catalog after this database instance
// was removed. For instance, user does a CREATE db, drop db and create db again
// with the same dbName. In this case, the drop database event will remove the
// database instance which is created after this create event. We should add a
// check to compare the creation time of the database with the creation time in
// the event to make sure we are removing the right databases object. Unfortunately,
// database do not have creation time currently. This would be fixed in HIVE-21077
Db removedDb = catalog_.removeDb(dbName_);
// if database did not exist in the cache there was nothing to do
Reference<Boolean> dbFound = new Reference<>();
Reference<Boolean> dbMatched = new Reference<>();
Db removedDb = catalog_.removeDbIfExists(droppedDatabase_, dbFound, dbMatched);
if (removedDb != null) {
infoLog("Successfully removed database {}", dbName_);
infoLog("Removed Database {} ", dbName_);
} else if (!dbFound.getRef()) {
debugLog("Database {} was not removed since it " +
"did not exist in catalog.", dbName_);
} else if (!dbMatched.getRef()) {
infoLog(debugString("Database %s was not removed from catalog since "
+ "the creation time of the Database did not match", dbName_));
metrics_.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).inc();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.impala.catalog.events;

import static java.lang.Thread.sleep;
import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.ALTER_TABLE;
import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.CREATE_DATABASE;
import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.CREATE_TABLE;
Expand Down Expand Up @@ -268,6 +269,60 @@ public void testdropDatabaseEvent() throws TException, ImpalaException {
}
}

/**
* DROP_DATABASE uses CREATION_TIME to filter events that try to drop an earlier version
* of DB, hence this test is to verify that the sequence of operations CREATE_DB,
* DROP_DB, CREATE_DB from Hive will produce expected result in Impala's Catalog.
*/
@Test
public void testCreateDropCreateDatabase() throws TException {
createDatabase(TEST_DB_NAME, null);
dropDatabaseCascade(TEST_DB_NAME);
createDatabase(TEST_DB_NAME, null);
eventsProcessor_.processEvents();
// Check that the database exists in Catalog
assertNotNull(catalog_.getDb(TEST_DB_NAME));
}

/**
* Test to verify that DROP_DATABASE event is processed such that it removes the DB from
* Catalog only if the CREATION_TIME of the Catalog's DB object is less than or equal to
* that in the event.
*/
@Test
public void testDropDatabaseCreationTime()
throws ImpalaException, InterruptedException {
long filteredCount = eventsProcessor_.getMetrics()
.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).getCount();
createDatabaseFromImpala(TEST_DB_NAME, "Test DB for CREATION_TIME");
// now drop the database with cascade option
dropDatabaseCascadeFromImpala(TEST_DB_NAME);

// Adding sleep here to make sure that the CREATION_TIME is not same
// as the previous CREATE_DB operation, so as to trigger the filtering logic
// based on CREATION_TIME in DROP_DB event processing. This is currently a
// limitation : the DROP_DB event filtering expects that while processing events,
// the CREATION_TIME of two DB's with same name won't have the same
// creation timestamp.
sleep(2000);
// Create database again with same name
createDatabaseFromImpala(TEST_DB_NAME, "Test DB for CREATION_TIME");
eventsProcessor_.processEvents();

// Here, we expect the events CREATE_DB, DROP_DB, CREATE_DB for the
// same Database name. Hence, the DROP_DB event should not be processed,
// as the CREATION_TIME of the catalog's Database object should be greater
// than that in the DROP_DB notification event. Two events are filtered here,
// 1 : first CREATE_DATABASE as it is followed by another create of the same name.
// 2 : DROP_DATABASE as it is trying to drop a database which is again created.
assertEquals(2, eventsProcessor_.getMetrics()
.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC)
.getCount() - filteredCount);

// Teardown step - Drop the created DB
dropDatabaseCascadeFromImpala(TEST_DB_NAME);
}

@Ignore("Disabled until we fix Hive bug to deserialize alter_database event messages")
@Test
public void testAlterDatabaseEvents() throws TException, ImpalaException {
Expand Down Expand Up @@ -779,10 +834,8 @@ private void verifyFilterEvents(int total, int numFiltered,
/**
* Similar to create,drop,create sequence table as in
* <code>testCreateDropCreateTableFromImpala</code> but operates on Database instead
* of Table. Makes sure that the database creationTime is checked before processing
* create and drop database events
* of Table.
*/
@Ignore("Ignored since database createTime is unavailable until we have HIVE-21077")
@Test
public void testCreateDropCreateDatabaseFromImpala() throws ImpalaException {
assertEquals(EventProcessorStatus.ACTIVE, eventsProcessor_.getStatus());
Expand Down

0 comments on commit 325b5fe

Please sign in to comment.