Skip to content

Commit

Permalink
Patch a bug introduced by Hadoop 0.4.0, which requires specified input
Browse files Browse the repository at this point in the history
directories to exist.


git-svn-id: https://svn.apache.org/repos/asf/lucene/nutch/trunk@421185 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
cutting committed Jul 12, 2006
1 parent bfda7b0 commit 3367ea7
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/java/org/apache/nutch/crawl/CrawlDb.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,20 @@ public void update(Path crawlDb, Path segment) throws IOException {
if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); }
}

public static JobConf createJob(Configuration config, Path crawlDb) {
public static JobConf createJob(Configuration config, Path crawlDb)
throws IOException {
Path newCrawlDb =
new Path(crawlDb,
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

JobConf job = new NutchJob(config);
job.setJobName("crawldb " + crawlDb);

job.addInputPath(new Path(crawlDb, CrawlDatum.DB_DIR_NAME));

Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
if (FileSystem.get(job).exists(current)) {
job.addInputPath(current);
}
job.setInputFormat(SequenceFileInputFormat.class);
job.setInputKeyClass(UTF8.class);
job.setInputValueClass(CrawlDatum.class);
Expand Down

0 comments on commit 3367ea7

Please sign in to comment.