Skip to content

Commit

Permalink
SAK-28202 Add indexed field to indicate the state of whether a document
Browse files Browse the repository at this point in the history
was indexed. Previously it depended on whether the content field had valid
data but some ECP's didn't produce the appropriate content.
  • Loading branch information
ern committed Dec 31, 2014
1 parent b51cb26 commit 4167e28
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -429,9 +429,12 @@ protected XContentBuilder buildIndexRequest(EntityContentProducer ecp, String re
if (includeContent) {
String content = ecp.getContent(resourceName);
// some of the ecp impls produce content with nothing but whitespace, its waste of time to index those
// add the trim check to remove those
if (StringUtils.isNotEmpty(content) && StringUtils.isNotEmpty(content.trim())) {
xContentBuilder.field(SearchService.FIELD_CONTENTS, content);
if (StringUtils.isNotBlank(content)) {
xContentBuilder
// cannot rely on ecp for providing something reliable to maintain index state
// indexed indicates if the document was indexed
.field(SearchService.FIELD_INDEXED, true)
.field(SearchService.FIELD_CONTENTS, content);
} else {
throw new NoContentException(ecp.getId(resourceName), resourceName, ecp.getSiteId(resourceName));
}
Expand Down Expand Up @@ -464,7 +467,7 @@ protected void rebuildSiteIndex(String siteId) {
if (bulkRequest.numberOfActions() < bulkRequestSize) {
String reference = i.next();

if (StringUtils.isNotEmpty(ecp.getContent(reference))) {
if (StringUtils.isNotBlank(ecp.getContent(reference))) {
//updating was causing issues without a _source, so doing delete and re-add
try {
deleteDocument(ecp.getId(reference), ecp.getSiteId(reference));
Expand Down Expand Up @@ -605,7 +608,7 @@ public void run() {
}

/**
* Searches for any docs in the search index that do not have content yet,
* Searches for any docs in the search index that have not been indexed yet,
* digests the content and loads it into the index. Any docs with empty content will be removed from
* the index.
*/
Expand All @@ -624,7 +627,9 @@ public void processContentQueue() {
SearchResponse response = client.prepareSearch(indexName)
.setQuery(matchAllQuery())
.setTypes(ElasticSearchService.SAKAI_DOC_TYPE)
.setPostFilter(missingFilter(SearchService.FIELD_CONTENTS))
.setPostFilter( orFilter(
missingFilter(SearchService.FIELD_INDEXED),
termFilter(SearchService.FIELD_INDEXED, false)))
.setSize(contentIndexBatchSize)
.addFields(SearchService.FIELD_REFERENCE, SearchService.FIELD_SITEID)
.execute().actionGet();
Expand Down Expand Up @@ -804,7 +809,9 @@ public void destroy() {
public int getPendingDocuments() {
try {
CountResponse response = client.prepareCount(indexName)
.setQuery(filteredQuery(matchAllQuery(), missingFilter(SearchService.FIELD_CONTENTS)))
.setQuery(filteredQuery(matchAllQuery(), orFilter(
missingFilter(SearchService.FIELD_INDEXED),
termFilter(SearchService.FIELD_INDEXED, false))))
.execute()
.actionGet();
return (int) response.getCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ public String getStatus() {
public int getNDocs() {
indexBuilder.assureIndex();
CountResponse response = client.prepareCount(indexName)
.setQuery(filteredQuery(matchAllQuery(),existsFilter(SearchService.FIELD_CONTENTS)))
.setQuery(filteredQuery(matchAllQuery(),termFilter(SearchService.FIELD_INDEXED, true)))
.execute()
.actionGet();
return (int) response.getCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
"index": "not_analyzed",
"store": "yes"
},
"indexed": {
"type": "boolean",
"index": "not_analyzed",
"null_value": "false",
"store": "no"
},
"contents": {
"type": "string",
"analyzer": "standard",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ public void testRebuildSiteIndex() {

elasticSearchIndexBuilder.refreshIndex();

System.out.println(elasticSearchService.getNDocs());
System.out.println("testRebuildSiteIndex: " + elasticSearchService.getNDocs());
assertTrue(elasticSearchService.getNDocs() == 106);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ public interface SearchService extends Diagnosable


public static final String FIELD_DIGEST_COUNT = "digestCount";

public static final String DATE_STAMP = "indexdate";

public static final String FIELD_INDEXED = "indexed";

/**
* Perform a search, return results in a list.
*
Expand Down

0 comments on commit 4167e28

Please sign in to comment.