Skip to content

Commit

Permalink
Merge branch 'master' into revisit-redux
Browse files Browse the repository at this point in the history
Conflicts:
	modules/src/main/java/org/archive/modules/CrawlURI.java
	modules/src/main/java/org/archive/modules/writer/WARCWriterProcessor.java
  • Loading branch information
nlevitt committed Jul 10, 2014
2 parents cc18521 + f101bba commit d9fd668
Show file tree
Hide file tree
Showing 81 changed files with 7,513 additions and 1,361 deletions.
19 changes: 4 additions & 15 deletions commons/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.3.1</version>
<version>4.3.3</version>
</dependency>
<dependency>
<groupId>com.sleepycat</groupId>
Expand Down Expand Up @@ -162,11 +162,6 @@
<artifactId>spring-expression</artifactId>
<version>3.0.5.RELEASE</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20090211</version>
</dependency>

<dependency>
<groupId>com.esotericsoftware</groupId>
Expand All @@ -187,22 +182,16 @@
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>r08</version>
</dependency>

<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>3.2.3</version>
</dependency>

<dependency>
<groupId>org.archive</groupId>
<artifactId>ia-web-commons</artifactId>
<version>1.1.1-SNAPSHOT</version>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.1-IA-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@

import org.archive.bdb.KryoBinding;

import com.google.common.collect.MapEvictionListener;
import com.google.common.collect.MapMaker;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
Expand Down Expand Up @@ -64,8 +65,9 @@
* @author paul baclace (conversion to ConcurrentMap)
*
*/
public class ObjectIdentityBdbManualCache<V extends IdentityCacheable>
implements ObjectIdentityCache<V>, Closeable, Serializable, MapEvictionListener<String, V> {
@SuppressWarnings("ALL")
public class ObjectIdentityBdbManualCache<V extends IdentityCacheable>
implements ObjectIdentityCache<V>, Closeable, Serializable {
private static final long serialVersionUID = 1L;
private static final Logger logger =
Logger.getLogger(ObjectIdentityBdbManualCache.class.getName());
Expand Down Expand Up @@ -110,6 +112,18 @@ public class ObjectIdentityBdbManualCache<V extends IdentityCacheable>
*/
public ObjectIdentityBdbManualCache() {
super();
dirtyItems = CacheBuilder.newBuilder()
.maximumSize(10000)
.expireAfterWrite(5, TimeUnit.MINUTES)
.removalListener(new RemovalListener<String, V>() {
@Override
public void onRemoval(RemovalNotification<String, V> stringVRemovalNotification) {
evictions.incrementAndGet();
diskMap.put(stringVRemovalNotification.getKey(), stringVRemovalNotification.getValue());
}
})
.<String, V>build()
.asMap();
}

/**
Expand All @@ -127,16 +141,18 @@ public ObjectIdentityBdbManualCache() {
public void initialize(final Environment env, String dbName,
final Class valueClass, final StoredClassCatalog classCatalog)
throws DatabaseException {
// TODO: tune capacity for actual threads, expected size of key caches?
this.memMap = new MapMaker().concurrencyLevel(64).initialCapacity(8192).softValues().makeMap();
// TODO: tune capacity for actual threads, expected size of key caches?
this.memMap = CacheBuilder.newBuilder()
.concurrencyLevel(64)
.initialCapacity(8192)
.softValues()
.<String, V>build()
.asMap();
this.db = openDatabase(env, dbName);
this.diskMap = createDiskMap(this.db, classCatalog, valueClass);
// keep a record of items that must be persisted; auto-persist if
// unchanged after 5 minutes, or more than 10K would collect
this.dirtyItems = new MapMaker().concurrencyLevel(64)
.maximumSize(10000).expireAfterWrite(5,TimeUnit.MINUTES)
.evictionListener(this).makeMap();


this.count = new AtomicLong(diskMap.size());
}

Expand Down Expand Up @@ -363,9 +379,9 @@ public void dirtyKey(String key) {
dirtyItems.put(key,val);
}

@Override
public void onEviction(String key, V val) {
/*@Override
public void onRemoval(RemovalNotification<String, V> stringVRemovalNotification) {
evictions.incrementAndGet();
diskMap.put(key, val);
}
diskMap.put(stringVRemovalNotification.getKey(), stringVRemovalNotification.getValue());
}*/
}
2 changes: 1 addition & 1 deletion commons/src/main/java/org/archive/util/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ private static void scanSuite(TestSuite suite, File start, File dir)
}
cname = cname.replace(File.separatorChar, '.');
cname = cname.substring(0, cname.length() - 5);
suite.addTestSuite(Class.forName(cname));
suite.addTestSuite((Class<? extends TestCase>) Class.forName(cname));
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion commons/src/main/java/org/archive/util/UriUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ public static String speculativeFixup(String candidate, UURI base) {
"tt", "u", "ul", "var", "video", "wbr"));
}

protected static final String QNV = "[a-zA-Z_]+=(?:[\\w-/.]|%[0-9a-fA-F]{2})+"; // name=value for query strings
protected static final String QNV = "[a-zA-Z_]+=(?:[\\w-/.]|%[0-9a-fA-F]{2})*"; // name=value for query strings
protected static final String VERY_LIKELY_RELATIVE_URI_PATTERN =
"(?:\\.?/)?" // may start with "/" or "./"
+ "(?:(?:[\\w-]+|\\.\\.)/)*" // may have path/segments/
Expand Down
5 changes: 5 additions & 0 deletions contrib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@
<artifactId>amqp-client</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.0</version>
</dependency>
</dependencies>
<repositories>
<repository>
Expand Down
Loading

0 comments on commit d9fd668

Please sign in to comment.