36
36
*/
37
37
38
38
public class DocIDServer extends Configurable {
39
+ private static final Logger logger = LoggerFactory .getLogger (DocIDServer .class );
39
40
41
+ private final Database docIDsDB ;
40
42
private static final String DATABASE_NAME = "DocIDs" ;
41
- protected static final Logger logger = LoggerFactory .getLogger (DocIDServer .class );
42
43
43
- protected Database docIDsDB = null ;
44
+ private final Object mutex = new Object () ;
44
45
45
- protected final Object mutex = new Object ();
46
-
47
- protected int lastDocID ;
46
+ private int lastDocID ;
48
47
49
48
public DocIDServer (Environment env , CrawlConfig config ) {
50
49
super (config );
51
50
DatabaseConfig dbConfig = new DatabaseConfig ();
52
51
dbConfig .setAllowCreate (true );
53
52
dbConfig .setTransactional (config .isResumableCrawling ());
54
53
dbConfig .setDeferredWrite (!config .isResumableCrawling ());
54
+ lastDocID = 0 ;
55
55
docIDsDB = env .openDatabase (null , DATABASE_NAME , dbConfig );
56
56
if (config .isResumableCrawling ()) {
57
57
int docCount = getDocCount ();
58
58
if (docCount > 0 ) {
59
59
logger .info ("Loaded {} URLs that had been detected in previous crawl." , docCount );
60
60
lastDocID = docCount ;
61
61
}
62
- } else {
63
- lastDocID = 0 ;
64
62
}
65
63
}
66
64
@@ -72,46 +70,41 @@ public DocIDServer(Environment env, CrawlConfig config) {
72
70
*/
73
71
public int getDocId (String url ) {
74
72
synchronized (mutex ) {
75
- int docID = -1 ;
76
-
77
- if (docIDsDB != null ) {
78
- OperationStatus result = null ;
79
- DatabaseEntry value = new DatabaseEntry ();
80
- try {
81
- DatabaseEntry key = new DatabaseEntry (url .getBytes ());
82
- result = docIDsDB .get (null , key , value , null );
73
+ OperationStatus result = null ;
74
+ DatabaseEntry value = new DatabaseEntry ();
75
+ try {
76
+ DatabaseEntry key = new DatabaseEntry (url .getBytes ());
77
+ result = docIDsDB .get (null , key , value , null );
83
78
84
- } catch (Exception e ) {
85
- logger .error ("Exception thrown while getting DocID" , e );
86
- }
79
+ } catch (Exception e ) {
80
+ logger .error ("Exception thrown while getting DocID" , e );
81
+ return -1 ;
82
+ }
87
83
88
- if ((result != null ) && (result == OperationStatus .SUCCESS ) && (value .getData ().length > 0 )) {
89
- docID = Util .byteArray2Int (value .getData ());
90
- }
84
+ if ((result == OperationStatus .SUCCESS ) && (value .getData ().length > 0 )) {
85
+ return Util .byteArray2Int (value .getData ());
91
86
}
92
87
93
- return docID ;
88
+ return - 1 ;
94
89
}
95
90
}
96
91
97
92
public int getNewDocID (String url ) {
98
-
99
93
synchronized (mutex ) {
100
- int docID = -1 ;
101
94
try {
102
95
// Make sure that we have not already assigned a docid for this URL
103
- docID = getDocId (url );
104
-
105
- if (docID <= 0 ) {
106
- lastDocID ++;
107
- docIDsDB .put (null , new DatabaseEntry (url .getBytes ()), new DatabaseEntry (Util .int2ByteArray (lastDocID )));
108
- docID = lastDocID ;
96
+ int docID = getDocId (url );
97
+ if (docID > 0 ) {
98
+ return docID ;
109
99
}
100
+
101
+ ++lastDocID ;
102
+ docIDsDB .put (null , new DatabaseEntry (url .getBytes ()), new DatabaseEntry (Util .int2ByteArray (lastDocID )));
103
+ return lastDocID ;
110
104
} catch (Exception e ) {
111
105
logger .error ("Exception thrown while getting new DocID" , e );
106
+ return -1 ;
112
107
}
113
-
114
- return docID ;
115
108
}
116
109
}
117
110
@@ -140,14 +133,12 @@ public boolean isSeenBefore(String url) {
140
133
}
141
134
142
135
public final int getDocCount () {
143
- int count = -1 ;
144
-
145
136
try {
146
- count = (int ) docIDsDB .count ();
137
+ return (int ) docIDsDB .count ();
147
138
} catch (DatabaseException e ) {
148
139
logger .error ("Exception thrown while getting DOC Count" , e );
140
+ return -1 ;
149
141
}
150
- return count ;
151
142
}
152
143
153
144
public void close () {
0 commit comments