Skip to content

Commit

Permalink
Fixed a collision handling issue in IdMapper
Browse files Browse the repository at this point in the history
where encoded values where compared before clearing an internal collision
flag on each value. Better testing was added around collision handling to
assert correct behaviour there as well.
  • Loading branch information
tinwelint committed Dec 10, 2014
1 parent 63afc67 commit f57da0d
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import static org.neo4j.collection.primitive.PrimitiveIntCollections.alwaysTrue;
import static org.neo4j.graphdb.DynamicLabel.label;
import static org.neo4j.helpers.ArrayUtil.join;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,15 @@
import org.neo4j.collection.primitive.PrimitiveIntIterator;
import org.neo4j.collection.primitive.PrimitiveLongIterator;
import org.neo4j.graphdb.Resource;
import org.neo4j.graphdb.ResourceIterable;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.helpers.CloneableInPublic;
import org.neo4j.helpers.Function;
import org.neo4j.helpers.Pair;
import org.neo4j.kernel.impl.util.PrimitiveLongResourceIterator;

import static java.util.EnumSet.allOf;

import static org.neo4j.helpers.collection.Iterables.map;

/**
Expand Down Expand Up @@ -1101,6 +1103,18 @@ protected T fetchNextOrNull()
};
}

public static <T> ResourceIterable<T> resourceIterable( final Iterable<T> iterable )
{
return new ResourceIterable<T>()
{
@Override
public ResourceIterator<T> iterator()
{
return resourceIterator( iterable.iterator(), Resource.EMPTY );
}
};
}

@SafeVarargs
public static <T> T[] array( T... items )
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.neo4j.graphdb.ResourceIterable;
import org.neo4j.graphdb.ResourceIterator;
Expand Down Expand Up @@ -200,9 +202,7 @@ private int detectAndMarkCollisions()

if ( trackerCache.get( i ) > trackerCache.get( i + 1 ) )
{ // swap
int temp = trackerCache.get( i );
trackerCache.set( i, trackerCache.get( i + 1 ) );
trackerCache.set( i + 1, temp );
trackerCache.swap( i, i+1, 1 );
}
long value = dataCache.get( trackerCache.get( i ) );
value = setCollision( value );
Expand All @@ -219,12 +219,19 @@ private int detectAndMarkCollisions()
private void buildCollisionInfo( Iterator<Object> ids )
{
int collisionIndex = 0;
Set<Object> collidedIds = new HashSet<>();
for ( long i = 0; ids.hasNext(); i++ )
{
Object id = ids.next();
long value = dataCache.get( i );
if ( isCollision( value ) )
{
if ( !collidedIds.add( id ) )
{
throw new IllegalStateException( "Duplicate input ids. '" +
id + "' existed in input more than once" );
}

long val = encoder.encode( id );
assert val == clearCollision( value );
int valueIndex = collisionValues.size();
Expand Down Expand Up @@ -311,21 +318,17 @@ private int findFromCollisions( long index, Object inputId )
return -1;
}

long val = dataCache.get( trackerCache.get( index ) );

// This assertion doesn't work since an Encoder can be stateful, and so to code a certain value
// into a certain long, any previous encoded values would have to be encoded as well, from a reset state.
// An assertion like this would be nice to have, although it would require an added Encoder#reset()
// and also making sure the algorithm would end up here through the same "encode path" as last time
// we encoded this exact value.
// assert val == encoder.encode( inputId );
long val = clearCollision( dataCache.get( trackerCache.get( index ) ) );
assert val == encoder.encode( inputId );

while ( unsignedCompare( val, dataCache.get( trackerCache.get( index - 1 ) ), CompareType.EQ ) )
while ( index > 0 &&
unsignedCompare( val, clearCollision( dataCache.get( trackerCache.get( index - 1 ) ) ), CompareType.EQ ) )
{
index--;
}
long fromIndex = index;
while ( unsignedCompare( val, dataCache.get( trackerCache.get( index + 1 ) ), CompareType.EQ ) )
while ( index < trackerCache.highestSetIndex() &&
unsignedCompare( val, clearCollision( dataCache.get( trackerCache.get( index + 1 ) ) ), CompareType.EQ ) )
{
index++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.unsafe.impl.batchimport.cache.idmapping.string;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
Expand All @@ -32,6 +33,7 @@
import org.neo4j.graphdb.ResourceIterable;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.helpers.Exceptions;
import org.neo4j.helpers.collection.IteratorUtil;
import org.neo4j.helpers.collection.PrefetchingIterator;
import org.neo4j.helpers.collection.PrefetchingResourceIterator;
import org.neo4j.test.RepeatRule;
Expand All @@ -45,7 +47,11 @@
import static java.lang.System.currentTimeMillis;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import static org.neo4j.graphdb.Resource.EMPTY;
import static org.neo4j.helpers.collection.IteratorUtil.resourceIterator;
Expand Down Expand Up @@ -171,6 +177,59 @@ public void shouldEncodeSmallSetOfRandomData() throws Throwable
}
}

@Test
public void shouldRefuseCollisionsBasedOnSameInputId() throws Exception
{
// GIVEN
IdMapper mapper = new EncodingIdMapper( NumberArrayFactory.HEAP, new StringEncoder(), new Radix.String() );
ResourceIterable<Object> ids =
IteratorUtil.<Object>resourceIterable( Arrays.<Object>asList( "10", "9", "10" ) );
try ( ResourceIterator<Object> iterator = ids.iterator() )
{
for ( int i = 0; iterator.hasNext(); i++ )
{
mapper.put( iterator.next(), i );
}
}

// WHEN
try
{
mapper.prepare( ids );
fail( "Should have failed" );
}
catch ( IllegalStateException e )
{
// THEN
assertTrue( e.getMessage().contains( "10" ) );
}
}

@Test
public void shouldCopyWithCollisionsBasedOnDifferentInputIds() throws Exception
{
// GIVEN
Encoder encoder = mock( Encoder.class );
when( encoder.encode( any() ) ).thenReturn( 12345L );
IdMapper mapper = new EncodingIdMapper( NumberArrayFactory.HEAP, encoder, new Radix.String() );
ResourceIterable<Object> ids =
IteratorUtil.<Object>resourceIterable( Arrays.<Object>asList( "10", "9" ) );
try ( ResourceIterator<Object> iterator = ids.iterator() )
{
for ( int i = 0; iterator.hasNext(); i++ )
{
mapper.put( iterator.next(), i );
}
}

// WHEN
mapper.prepare( ids );

// THEN
assertEquals( 0L, mapper.get( "10" ) );
assertEquals( 1L, mapper.get( "9" ) );
}

private class ValueGenerator implements ResourceIterable<Object>
{
private final int size;
Expand Down

0 comments on commit f57da0d

Please sign in to comment.