Skip to content

Commit

Permalink
Streamline Fingerprint implementation
Browse files Browse the repository at this point in the history
Thread all updates through a CodedOutputStream. This has the benefit of
potentially hashing less data, as many int values can be represented more
compactly, and reducing the churn of hashing strings (generally), since
CodedOutputStream is already heavily optimized for this. While the buffer
size is a little generous, it winds up paying off.

--
PiperOrigin-RevId: 142151062
MOS_MIGRATED_REVID=142151062
  • Loading branch information
michajlo authored and katre committed Dec 15, 2016
1 parent 4975760 commit e3db95e
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 151 deletions.
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ java_library(
"//src/main/java/com/google/devtools/common/options",
"//third_party:guava",
"//third_party:jsr305",
"//third_party/protobuf",
],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,15 +163,15 @@ public static Md5Digest fromEnv(Map<String, String> env) {
byte[] result = new byte[Md5Digest.MD5_SIZE];
Fingerprint fp = new Fingerprint();
for (Map.Entry<String, String> entry : env.entrySet()) {
fp.addStringLatin1(entry.getKey());
fp.addStringLatin1(entry.getValue());
fp.addString(entry.getKey());
fp.addString(entry.getValue());
xorWith(result, fp.digestAndReset());
}
return new Md5Digest(result);
}

private static byte[] getDigest(Fingerprint fp, String execPath, Metadata md) {
fp.addStringLatin1(execPath);
fp.addString(execPath);

if (md == null) {
// Move along, nothing to see here.
Expand Down
221 changes: 77 additions & 144 deletions src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@

package com.google.devtools.build.lib.util;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.Iterables;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Map;
Expand All @@ -35,9 +35,6 @@
*/
public final class Fingerprint {

private static final byte[] TRUE_BYTES = new byte[] { 1 };
private static final byte[] FALSE_BYTES = new byte[] { 0 };

private static final MessageDigest MD5_PROTOTYPE;
private static final boolean MD5_PROTOTYPE_SUPPORTS_CLONE;

Expand All @@ -46,198 +43,152 @@ public final class Fingerprint {
MD5_PROTOTYPE_SUPPORTS_CLONE = supportsClone(MD5_PROTOTYPE);
}

private final ByteBuffer scratch;
// Make novel use of a CodedOutputStream, which is good at efficiently serializing data. By
// flushing at the end of each digest we can continue to use the stream.
private final CodedOutputStream codedOut;
private final MessageDigest md5;

/**
* Creates and initializes a new instance.
*/
/** Creates and initializes a new instance. */
public Fingerprint() {
scratch = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
md5 = cloneOrCreateMd5();
// This is a lot of indirection, but CodedOutputStream does a reasonable job of converting
// strings to bytes without creating a whole bunch of garbage, which pays off.
codedOut = CodedOutputStream.newInstance(
new DigestOutputStream(ByteStreams.nullOutputStream(), md5),
/*bufferSize=*/ 1024);
}

/**
* Completes the hash computation by doing final operations, e.g., padding.
*
* <p>This method has the side-effect of resetting the underlying digest computer.
* Completes the hash computation by doing final operations and resets the underlying state,
* allowing this instance to be used again.
*
* @return the MD5 digest as a 16-byte array
* @see java.security.MessageDigest#digest()
*/
public byte[] digestAndReset() {
// Reset is implicit.
try {
codedOut.flush();
} catch (IOException e) {
throw new IllegalStateException("failed to flush", e);
}
return md5.digest();
}

/**
* Completes the hash computation and returns the digest as a string.
*
* <p>This method has the side-effect of resetting the underlying digest computer.
*
* @return the MD5 digest as a 32-character string of hexadecimal digits
*/
/** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */
public String hexDigestAndReset() {
return hexDigest(digestAndReset());
}

/**
* Updates the digest with 0 or more bytes.
*
* @param input the array of bytes with which to update the digest
* @see java.security.MessageDigest#update(byte[])
*/
/** Updates the digest with 0 or more bytes. */
public Fingerprint addBytes(byte[] input) {
md5.update(input);
addBytes(input, 0, input.length);
return this;
}

/**
* Updates the digest with the specified number of bytes starting at offset.
*
* @param input the array of bytes with which to update the digest
* @param offset the offset into the array
* @param len the number of bytes to use
* @see java.security.MessageDigest#update(byte[], int, int)
*/
/** Updates the digest with the specified number of bytes starting at offset. */
public Fingerprint addBytes(byte[] input, int offset, int len) {
md5.update(input, offset, len);
try {
codedOut.write(input, offset, len);
} catch (IOException e) {
throw new IllegalStateException("failed to write bytes", e);
}
return this;
}

/**
* Updates the digest with a boolean value.
*/
/** Updates the digest with a boolean value. */
public Fingerprint addBoolean(boolean input) {
md5.update(input ? TRUE_BYTES : FALSE_BYTES);
try {
codedOut.writeBoolNoTag(input);
} catch (IOException e) {
throw new IllegalStateException();
}
return this;
}

/**
* Updates the digest with a boolean value, correctly handling null.
*/
/** Same as {@link #addBoolean(boolean)}, except considers nullability. */
public Fingerprint addNullableBoolean(Boolean input) {
return addInt(input == null ? -1 : (input.booleanValue() ? 1 : 0));
if (input == null) {
addBoolean(false);
} else {
addBoolean(true);
addBoolean(input);
}
return this;
}

/**
* Updates the digest with the little-endian bytes of a given int value.
*
* @param input the integer with which to update the digest
*/
/** Updates the digest with the varint representation of input. */
public Fingerprint addInt(int input) {
scratch.putInt(input);
updateFromScratch(4);
try {
codedOut.writeInt32NoTag(input);
} catch (IOException e) {
throw new IllegalStateException(e);
}
return this;
}

/**
* Updates the digest with the little-endian bytes of a given long value.
*
* @param input the long with which to update the digest
*/
/** Updates the digest with the varint representation of a long value. */
public Fingerprint addLong(long input) {
scratch.putLong(input);
updateFromScratch(8);
try {
codedOut.writeInt64NoTag(input);
} catch (IOException e) {
throw new IllegalStateException("failed to write long", e);
}
return this;
}

/**
* Updates the digest with the little-endian bytes of a given int value, correctly distinguishing
* between null and non-null values.
*
* @param input the integer with which to update the digest
*/
/** Same as {@link #addInt(int)}, except considers nullability. */
public Fingerprint addNullableInt(@Nullable Integer input) {
if (input == null) {
addInt(0);
addBoolean(false);
} else {
addInt(1);
addBoolean(true);
addInt(input);
}
return this;
}

/**
* Updates the digest with a UUID.
*
* @param uuid the UUID with which to update the digest. Must not be null.
*/
/** Updates the digest with a UUID. */
public Fingerprint addUUID(UUID uuid) {
addLong(uuid.getLeastSignificantBits());
addLong(uuid.getMostSignificantBits());
return this;
}

/**
* Updates the digest with a String using its length plus its UTF8 encoded bytes.
*
* @param input the String with which to update the digest
* @see java.security.MessageDigest#update(byte[])
*/
/** Updates the digest with a String using UTF8 encoding. */
public Fingerprint addString(String input) {
byte[] bytes = input.getBytes(UTF_8);
addInt(bytes.length);
md5.update(bytes);
try {
codedOut.writeStringNoTag(input);
} catch (IOException e) {
throw new IllegalStateException("failed to write string", e);
}
return this;
}

/**
* Updates the digest with a String using its length plus its UTF8 encoded bytes; if the string
* is null, then it uses -1 as the length.
*
* @param input the String with which to update the digest
* @see java.security.MessageDigest#update(byte[])
*/
/** Same as {@link #addString(String)}, except considers nullability. */
public Fingerprint addNullableString(@Nullable String input) {
if (input == null) {
addInt(-1);
addBoolean(false);
} else {
addBoolean(true);
addString(input);
}
return this;
}

/**
* Updates the digest with a String using its length and content.
*
* @param input the String with which to update the digest
* @see java.security.MessageDigest#update(byte[])
*/
public Fingerprint addStringLatin1(String input) {
addInt(input.length());
byte[] bytes = new byte[input.length()];
for (int i = 0; i < input.length(); i++) {
bytes[i] = (byte) input.charAt(i);
}
md5.update(bytes);
return this;
}

/**
* Updates the digest with a Path.
*
* @param input the Path with which to update the digest.
*/
/** Updates the digest with a {@link Path}. */
public Fingerprint addPath(Path input) {
addStringLatin1(input.getPathString());
addString(input.getPathString());
return this;
}

/**
* Updates the digest with a Path.
*
* @param input the Path with which to update the digest.
*/
/** Updates the digest with a {@link PathFragment}. */
public Fingerprint addPath(PathFragment input) {
return addStringLatin1(input.getPathString());
return addString(input.getPathString());
}

/**
* Updates the digest with inputs by iterating over them and invoking
* {@code #addString(String)} on each element.
*
* @param inputs the inputs with which to update the digest
* Add the supplied sequence of {@link String}s to the digest as an atomic unit, that is this is
* different from adding them each individually.
*/
public Fingerprint addStrings(Iterable<String> inputs) {
addInt(Iterables.size(inputs));
Expand All @@ -248,13 +199,7 @@ public Fingerprint addStrings(Iterable<String> inputs) {
return this;
}

/**
* Updates the digest with inputs which are pairs in a map, by iterating over
* the map entries and invoking {@code #addString(String)} on each key and
* value.
*
* @param inputs the inputs in a map with which to update the digest
*/
/** Updates the digest with the supplied map. */
public Fingerprint addStringMap(Map<String, String> inputs) {
addInt(inputs.size());
for (Map.Entry<String, String> entry : inputs.entrySet()) {
Expand All @@ -266,8 +211,8 @@ public Fingerprint addStringMap(Map<String, String> inputs) {
}

/**
* Updates the digest with a list of paths by iterating over them and
* invoking {@link #addPath(PathFragment)} on each element.
* Add the supplied sequence of {@link PathFragment}s to the digest as an atomic unit, that is
* this is different from adding each item individually.
*
* @param inputs the paths with which to update the digest
*/
Expand All @@ -280,18 +225,6 @@ public Fingerprint addPaths(Iterable<PathFragment> inputs) {
return this;
}

/**
* Reset the Fingerprint for additional use as though previous digesting had not been done.
*/
public void reset() {
md5.reset();
}

private void updateFromScratch(int numBytes) {
md5.update(scratch.array(), 0, numBytes);
scratch.clear();
}

private static MessageDigest cloneOrCreateMd5() {
if (MD5_PROTOTYPE_SUPPORTS_CLONE) {
try {
Expand Down
Loading

0 comments on commit e3db95e

Please sign in to comment.