From 3367ea7f2602189d7620aac4ec34be3c098627c1 Mon Sep 17 00:00:00 2001
From: Douglass Cutting <cutting@apache.org>
Date: Wed, 12 Jul 2006 08:16:37 +0000
Subject: [PATCH] Patch a bug introduced by Hadoop 0.4.0, which requires
 specified input directories to exist.

git-svn-id: https://svn.apache.org/repos/asf/lucene/nutch/trunk@421185 13f79535-47bb-0310-9956-ffa450edef68
---
 src/java/org/apache/nutch/crawl/CrawlDb.java | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/java/org/apache/nutch/crawl/CrawlDb.java b/src/java/org/apache/nutch/crawl/CrawlDb.java
index 7042274259..20482f1a62 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDb.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDb.java
@@ -65,7 +65,8 @@ public void update(Path crawlDb, Path segment) throws IOException {
     if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); }
   }
 
-  public static JobConf createJob(Configuration config, Path crawlDb) {
+  public static JobConf createJob(Configuration config, Path crawlDb)
+    throws IOException {
     Path newCrawlDb =
       new Path(crawlDb,
                Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
@@ -73,7 +74,11 @@ public static JobConf createJob(Configuration config, Path crawlDb) {
     JobConf job = new NutchJob(config);
     job.setJobName("crawldb " + crawlDb);
 
-    job.addInputPath(new Path(crawlDb, CrawlDatum.DB_DIR_NAME));
+
+    Path current = new Path(crawlDb, CrawlDatum.DB_DIR_NAME);
+    if (FileSystem.get(job).exists(current)) {
+      job.addInputPath(current);
+    }
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setInputKeyClass(UTF8.class);
     job.setInputValueClass(CrawlDatum.class);