Skip to content

Commit

Permalink
NIFI-5533: Be more efficient with heap utilization
Browse files Browse the repository at this point in the history
 - Updated FlowFile Repo / Write Ahead Log so that any update that writes more than 1 MB of data is written to a file inside the FlowFile Repo rather than being buffered in memory
 - Update SplitText so that it does not hold FlowFiles that are not the latest version in heap. Doing them from being garbage collected, so while the Process Session is holding the latest version of the FlowFile, SplitText is holding an older version, and this results in two copies of the same FlowFile object

NIFI-5533: Checkpoint

NIFI-5533: Bug Fixes

Signed-off-by: Matthew Burgess <[email protected]>

This closes apache#2974
  • Loading branch information
markap14 authored and mattyb149 committed Oct 9, 2018
1 parent c87d791 commit c425bd2
Show file tree
Hide file tree
Showing 21 changed files with 828 additions and 226 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
*/
package org.apache.nifi.attribute.expression.language;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.nifi.attribute.expression.language.evaluation.Evaluator;
import org.apache.nifi.attribute.expression.language.evaluation.literals.StringLiteralEvaluator;
import org.apache.nifi.attribute.expression.language.evaluation.selection.AllAttributesEvaluator;
Expand All @@ -34,7 +28,14 @@
import org.apache.nifi.expression.AttributeValueDecorator;
import org.apache.nifi.processor.exception.ProcessException;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class StandardPreparedQuery implements PreparedQuery {
private static final String EMPTY_STRING = "";

private final List<Expression> expressions;
private volatile VariableImpact variableImpact;
Expand All @@ -45,6 +46,14 @@ public StandardPreparedQuery(final List<Expression> expressions) {

@Override
public String evaluateExpressions(final Map<String, String> valMap, final AttributeValueDecorator decorator, final Map<String, String> stateVariables) throws ProcessException {
if (expressions.isEmpty()) {
return EMPTY_STRING;
}
if (expressions.size() == 1) {
final String evaluated = expressions.get(0).evaluate(valMap, decorator, stateVariables);
return evaluated == null ? EMPTY_STRING : evaluated;
}

final StringBuilder sb = new StringBuilder();

for (final Expression expression : expressions) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.repository.schema;

import java.io.Closeable;
import java.io.IOException;

public interface RecordIterator extends Closeable {

Record next() throws IOException;

boolean isNext() throws IOException;

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@

package org.apache.nifi.repository.schema;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
Expand All @@ -30,7 +33,6 @@
import java.util.Map;
import java.util.Optional;


public class SchemaRecordReader {
private final RecordSchema schema;

Expand All @@ -56,15 +58,24 @@ private static void fillBuffer(final InputStream in, final byte[] destination) t
}

public Record readRecord(final InputStream in) throws IOException {
final int sentinelByte = in.read();
if (sentinelByte < 0) {
final int recordIndicator = in.read();
if (recordIndicator < 0) {
return null;
}

if (sentinelByte != 1) {
throw new IOException("Expected to read a Sentinel Byte of '1' but got a value of '" + sentinelByte + "' instead");
if (recordIndicator == SchemaRecordWriter.EXTERNAL_FILE_INDICATOR) {
throw new IOException("Expected to read a Sentinel Byte of '1' indicating that the next record is inline but the Sentinel value was '" + SchemaRecordWriter.EXTERNAL_FILE_INDICATOR
+ ", indicating that data was written to an External File. This data cannot be recovered via calls to #readRecord(InputStream) but must be recovered via #readRecords(InputStream)");
}

if (recordIndicator != 1) {
throw new IOException("Expected to read a Sentinel Byte of '1' but got a value of '" + recordIndicator + "' instead");
}

return readInlineRecord(in);
}

private Record readInlineRecord(final InputStream in) throws IOException {
final List<RecordField> schemaFields = schema.getFields();
final Map<RecordField, Object> fields = new HashMap<>(schemaFields.size());

Expand All @@ -76,6 +87,53 @@ public Record readRecord(final InputStream in) throws IOException {
return new FieldMapRecord(fields, schema);
}

public RecordIterator readRecords(final InputStream in) throws IOException {
final int recordIndicator = in.read();
if (recordIndicator < 0) {
return null;
}

if (recordIndicator == SchemaRecordWriter.INLINE_RECORD_INDICATOR) {
final Record nextRecord = readInlineRecord(in);
return new SingleRecordIterator(nextRecord);
}

if (recordIndicator != SchemaRecordWriter.EXTERNAL_FILE_INDICATOR) {
throw new IOException("Expected to read a Sentinel Byte of '" + SchemaRecordWriter.INLINE_RECORD_INDICATOR + "' or '" + SchemaRecordWriter.EXTERNAL_FILE_INDICATOR
+ "' but encountered a value of '" + recordIndicator + "' instead");
}

final DataInputStream dis = new DataInputStream(in);
final String externalFilename = dis.readUTF();
final File externalFile = new File(externalFilename);
final FileInputStream fis = new FileInputStream(externalFile);
final InputStream bufferedIn = new BufferedInputStream(fis);

final RecordIterator recordIterator = new RecordIterator() {
@Override
public Record next() throws IOException {
return readRecord(bufferedIn);
}

@Override
public boolean isNext() throws IOException {
bufferedIn.mark(1);
final int nextByte = bufferedIn.read();
bufferedIn.reset();

return (nextByte > -1);
}

@Override
public void close() throws IOException {
bufferedIn.close();
}
};

return recordIterator;
}



private Object readField(final InputStream in, final RecordField field) throws IOException {
switch (field.getRepetition()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.slf4j.LoggerFactory;

import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UTFDataFormatException;
Expand All @@ -30,6 +31,8 @@
import java.util.Map;

public class SchemaRecordWriter {
static final int INLINE_RECORD_INDICATOR = 1;
static final int EXTERNAL_FILE_INDICATOR = 8;

public static final int MAX_ALLOWED_UTF_LENGTH = 65_535;

Expand All @@ -41,7 +44,7 @@ public void writeRecord(final Record record, final OutputStream out) throws IOEx
// write sentinel value to indicate that there is a record. This allows the reader to then read one
// byte and check if -1. If so, the reader knows there are no more records. If not, then the reader
// knows that it should be able to continue reading.
out.write(1);
out.write(INLINE_RECORD_INDICATOR);

final byte[] buffer = byteArrayCache.checkOut();
try {
Expand Down Expand Up @@ -226,4 +229,8 @@ static int getCharsInUTF8Limit(final String str, final int utf8Limit) {
return charsInOriginal;
}

public void writeExternalFileReference(final DataOutputStream out, final File externalFile) throws IOException {
out.write(EXTERNAL_FILE_INDICATOR);
out.writeUTF(externalFile.getAbsolutePath());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.repository.schema;

public class SingleRecordIterator implements RecordIterator {
private final Record record;
private boolean consumed = false;

public SingleRecordIterator(final Record record) {
this.record = record;
}

@Override
public Record next() {
if (consumed) {
return null;
}

consumed = true;
return record;
}

@Override
public void close() {
}

@Override
public boolean isNext() {
return !consumed;
}
}
Loading

0 comments on commit c425bd2

Please sign in to comment.