Skip to content

Commit

Permalink
Experimental patch that (for some unknown reason) fixes the memory le…
Browse files Browse the repository at this point in the history
…ak in IndexAll. (IQSS#4463)
  • Loading branch information
landreev committed Mar 15, 2018
1 parent 5ed5edf commit 9ca73d5
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ public List<Dataset> findAll() {
public List<Long> findAllLocalDatasetIds() {
return em.createQuery("SELECT o.id FROM Dataset o WHERE o.harvestedFrom IS null ORDER BY o.id", Long.class).getResultList();
}

public List<Long> findAllUnindexed() {
return em.createQuery("SELECT o.id FROM Dataset o WHERE o.indexTime IS null ORDER BY o.id DESC", Long.class).getResultList();
}

/**
* For docs, see the equivalent method on the DataverseServiceBean.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ public List<Dataverse> findAllOrSubset(long numPartitions, long partitionId, boo
typedQuery.setParameter("partitionId", partitionId);
return typedQuery.getResultList();
}

public List<Long> findAllUnindexed() {
return em.createQuery("SELECT o.id FROM Dataverse o WHERE o.indexTime IS null ORDER BY o.id", Long.class).getResultList();
}

public List<Dataverse> findByOwnerId(Long ownerId) {
String qr = "select object(o) from Dataverse as o where o.owner.id =:ownerId order by o.name";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;
import javax.json.JsonObjectBuilder;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import org.apache.solr.client.solrj.SolrServerException;

@Named
Expand All @@ -27,6 +29,9 @@ public class IndexAllServiceBean {

private static final Logger logger = Logger.getLogger(IndexAllServiceBean.class.getCanonicalName());

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

@EJB
IndexServiceBean indexService;
@EJB
Expand All @@ -53,21 +58,29 @@ public JsonObjectBuilder indexAllOrSubsetPreview(long numPartitions, long partit
JsonObjectBuilder response = Json.createObjectBuilder();
JsonObjectBuilder previewOfWorkload = Json.createObjectBuilder();
JsonObjectBuilder dvContainerIds = Json.createObjectBuilder();
JsonArrayBuilder dataverseIds = Json.createArrayBuilder();
List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataverse dataverse : dataverses) {
dataverseIds.add(dataverse.getId());

List<Long> dataverseIds = dataverseService.findAllUnindexed();

JsonArrayBuilder dataverseIdsJson = Json.createArrayBuilder();
//List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Long id : dataverseIds) {
dataverseIdsJson.add(id);
}
JsonArrayBuilder datasetIds = Json.createArrayBuilder();
List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataset dataset : datasets) {
datasetIds.add(dataset.getId());

// List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
// Note: no support for "partitions" in this experimental branch.
// The method below returns the ids of all the unindexed datasets.
List<Long> datasetIds = datasetService.findAllUnindexed();

JsonArrayBuilder datasetIdsJson = Json.createArrayBuilder();
for (Long id : datasetIds) {
datasetIdsJson.add(id);
}
dvContainerIds.add("dataverses", dataverseIds);
dvContainerIds.add("datasets", datasetIds);
dvContainerIds.add("dataverses", dataverseIdsJson);
dvContainerIds.add("datasets", datasetIdsJson);
previewOfWorkload.add("dvContainerIds", dvContainerIds);
previewOfWorkload.add("dataverseCount", dataverses.size());
previewOfWorkload.add("datasetCount", datasets.size());
previewOfWorkload.add("dataverseCount", dataverseIds.size());
previewOfWorkload.add("datasetCount", datasetIds.size());
previewOfWorkload.add("partitionId", partitionId);
response.add("previewOfPartitionWorkload", previewOfWorkload);
return response;
Expand Down Expand Up @@ -105,38 +118,46 @@ public Future<String> indexAllOrSubset(long numPartitions, long partitionId, boo
resultOfClearingIndexTimes = "Solr index was not cleared before indexing.";
}

List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
// List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
// Note: no support for "partitions" in this experimental branch.
// The method below returns the ids of all the unindexed dataverses.
List<Long> dataverseIds = dataverseService.findAllUnindexed();
int dataverseIndexCount = 0;
int dataverseFailureCount = 0;
for (Dataverse dataverse : dataverses) {
//for (Dataverse dataverse : dataverses) {
for (Long id : dataverseIds) {
try {
dataverseIndexCount++;
logger.info("indexing dataverse " + dataverseIndexCount + " of " + dataverses.size() + " (id=" + dataverse.getId() + ", persistentId=" + dataverse.getAlias() + ")");
Dataverse dataverse = dataverseService.find(id);
logger.info("indexing dataverse " + dataverseIndexCount + " of " + dataverseIds.size() + " (id=" + id + ", persistentId=" + dataverse.getAlias() + ")");
Future<String> result = indexService.indexDataverseInNewTransaction(dataverse);
dataverse = null;
} catch (Exception e) {
//We want to keep running even after an exception so throw some more info into the log
dataverseFailureCount++;
logger.info("FAILURE indexing dataverse " + dataverseIndexCount + " of " + dataverses.size() + " (id=" + dataverse.getId() + ", persistentId=" + dataverse.getAlias() + ") Exception info: " + e.getMessage());
logger.info("FAILURE indexing dataverse " + dataverseIndexCount + " of " + dataverseIds.size() + " (id=" + id + ") Exception info: " + e.getMessage());
}
}

int datasetIndexCount = 0;
int datasetFailureCount = 0;
List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataset dataset : datasets) {
// List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
// Note: no support for "partitions" in this experimental branch.
// The method below returns the ids of all the unindexed datasets.
List<Long> datasetIds = datasetService.findAllUnindexed();
for (Long id : datasetIds) {
try {
datasetIndexCount++;
logger.info("indexing dataset " + datasetIndexCount + " of " + datasets.size() + " (id=" + dataset.getId() + ", persistentId=" + dataset.getGlobalId() + ")");
Future<String> result = indexService.indexDatasetInNewTransaction(dataset);
//Dataset dataset = datasetService.find(id);
logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")");
//Future<String> result = indexService.indexDatasetInNewTransaction(dataset);
Future<String> result = indexService.indexDatasetInNewTransaction(id);
} catch (Exception e) {
//We want to keep running even after an exception so throw some more info into the log
datasetFailureCount++;
logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasets.size() + " (id=" + dataset.getId() + ", identifier = " + dataset.getIdentifier() + ") Exception info: " + e.getMessage());
logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ") Exception info: " + e.getMessage());
}

}
// logger.info("advanced search fields: " + advancedSearchFields);
// logger.info("not advanced search fields: " + notAdvancedSearchFields);
logger.info("done iterating through all datasets");

long indexAllTimeEnd = System.currentTimeMillis();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
import javax.ejb.TransactionAttribute;
import static javax.ejb.TransactionAttributeType.REQUIRES_NEW;
import javax.inject.Named;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
Expand All @@ -67,6 +69,9 @@ public class IndexServiceBean {

private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName());

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

@EJB
DvObjectServiceBean dvObjectService;
@EJB
Expand Down Expand Up @@ -253,9 +258,13 @@ public Future<String> indexDataverse(Dataverse dataverse) {
}

@TransactionAttribute(REQUIRES_NEW)
public Future<String> indexDatasetInNewTransaction(Dataset dataset) {
public Future<String> indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) {
boolean doNormalSolrDocCleanUp = false;
return indexDataset(dataset, doNormalSolrDocCleanUp);
Dataset dataset = em.find(Dataset.class, datasetId);
//return indexDataset(dataset, doNormalSolrDocCleanUp);
Future<String> ret = indexDataset(dataset, doNormalSolrDocCleanUp);
dataset = null;
return ret;
}

public Future<String> indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
Expand Down Expand Up @@ -1021,10 +1030,18 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) {
return ex.toString();
}

dvObjectService.updateContentIndexTime(dataset);
Long dsId = dataset.getId();
///Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
///updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;

// return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dataset.getId() + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}

/**
Expand Down

0 comments on commit 9ca73d5

Please sign in to comment.