Skip to content

Commit

Permalink
NUTCH-2695: fix some alerts raised by LGTM
Browse files Browse the repository at this point in the history
- useless checks for null in OutlinkExtractor, protocol-http,
  ParsePluginsReader, parse-html, parse-tika
- called LOG.equals(...) instead of LOG.error(...) in ArcRecordReader
- constant loop condition in protocol-http
- implicit conversion from array to string in CrawlDbReducer
- `Missing format argument` in log methods
- spurious Javadoc @param tags
  • Loading branch information
sebastian-nagel committed Feb 22, 2019
1 parent 3abe7db commit 31ecf64
Show file tree
Hide file tree
Showing 20 changed files with 84 additions and 102 deletions.
4 changes: 3 additions & 1 deletion src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.StringUtil;

/** Merge new page entries with existing entries. */
public class CrawlDbReducer extends
Expand Down Expand Up @@ -169,7 +170,8 @@ public void reduce(Text key, Iterable<CrawlDatum> values,
context.getCounter("CrawlDB status",
CrawlDatum.getStatusName(old.getStatus())).increment(1);
} else {
LOG.warn("Missing fetch and old value, signature=" + signature);
LOG.warn("Missing fetch and old value, signature="
+ StringUtil.toHexString(signature));
}
return;
}
Expand Down
3 changes: 0 additions & 3 deletions src/java/org/apache/nutch/hostdb/UpdateHostDbMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ public class UpdateHostDbMapper
protected URLFilters filters = null;
protected URLNormalizers normalizers = null;

/**
* @param job
*/
@Override
public void setup(Mapper<Text, Writable, Text, NutchWritable>.Context context) {
Configuration conf = context.getConfiguration();
Expand Down
2 changes: 0 additions & 2 deletions src/java/org/apache/nutch/hostdb/UpdateHostDbReducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ public class UpdateHostDbReducer

/**
* Configures the thread pool and prestarts all resolver threads.
*
* @param job
*/
@Override
public void setup(Reducer<Text, NutchWritable, Text, HostDatum>.Context context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ protected int process(String url, StringBuilder output) throws Exception {

output.append("\n"); // For readability if keepClientCnxOpen

if (getConf().getBoolean("doIndex", false) && doc != null) {
if (getConf().getBoolean("doIndex", false)) {
IndexWriters writers = IndexWriters.get(getConf());
writers.open(getConf(), "IndexingFilterChecker");
writers.write(doc);
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/parse/OutlinkExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public static Outlink[] getOutlinks(final String plainText, String anchor,
final Outlink[] retval;

// create array of the Outlinks
if (outlinks != null && outlinks.size() > 0) {
if (outlinks.size() > 0) {
retval = outlinks.toArray(new Outlink[0]);
} else {
retval = new Outlink[0];
Expand Down
18 changes: 4 additions & 14 deletions src/java/org/apache/nutch/parse/ParseData.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,10 @@ public final void readFields(DataInput in) throws IOException {
outlinks[i] = Outlink.read(in);
}

if (version < 3) {
int propertyCount = in.readInt(); // read metadata
contentMeta.clear();
for (int i = 0; i < propertyCount; i++) {
contentMeta.add(Text.readString(in), Text.readString(in));
}
} else {
contentMeta.clear();
contentMeta.readFields(in);
}
if (version > 3) {
parseMeta.clear();
parseMeta.readFields(in);
}
contentMeta.clear();
contentMeta.readFields(in);
parseMeta.clear();
parseMeta.readFields(in);
}

public final void write(DataOutput out) throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/parse/ParsePluginsReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ private Map<String, String> getAliases(Element parsePluginsRoot) {
Map<String, String> aliases = new HashMap<>();
NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");

if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
if (aliasRoot == null || aliasRoot.getLength() == 0) {
if (LOG.isWarnEnabled()) {
LOG.warn("No aliases defined in parse-plugins.xml!");
}
Expand Down
4 changes: 2 additions & 2 deletions src/java/org/apache/nutch/segment/SegmentMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -392,15 +392,15 @@ public void map(Text key, MetaWrapper value,
try {
url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); // normalize the url.
} catch (Exception e) {
LOG.warn("Skipping {} :", url, e.getMessage());
LOG.warn("Skipping {} : {}", url, e.getMessage());
url = null;
}
}
if (url != null && filters != null) {
try {
url = filters.filter(url);
} catch (Exception e) {
LOG.warn("Skipping key {} : ", url, e.getMessage());
LOG.warn("Skipping key {} : {}", url, e.getMessage());
url = null;
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/java/org/apache/nutch/service/impl/LinkReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public List read(String path) throws FileNotFoundException {

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}

Expand Down Expand Up @@ -93,7 +93,7 @@ public List head(String path, int nrows) throws FileNotFoundException {

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}

Expand Down Expand Up @@ -127,7 +127,7 @@ public List slice(String path, int start, int end)

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}

Expand All @@ -151,7 +151,7 @@ public int count(String path) throws FileNotFoundException {
} catch(FileNotFoundException fne){
throw new FileNotFoundException();
}catch (IOException e) {
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}
return i;
Expand Down
8 changes: 4 additions & 4 deletions src/java/org/apache/nutch/service/impl/NodeReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public List read(String path) throws FileNotFoundException {

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e));
LOG.error("Error occurred while reading file {} : {}", file, StringUtils.stringifyException(e));
throw new WebApplicationException();
}

Expand Down Expand Up @@ -93,7 +93,7 @@ public List head(String path, int nrows) throws FileNotFoundException {

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down Expand Up @@ -128,7 +128,7 @@ public List slice(String path, int start, int end)

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down Expand Up @@ -157,7 +157,7 @@ public int count(String path) throws FileNotFoundException {

}catch (IOException e) {
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down
8 changes: 4 additions & 4 deletions src/java/org/apache/nutch/service/impl/SequenceReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public List<List<String>> read(String path) throws FileNotFoundException {
}catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down Expand Up @@ -99,7 +99,7 @@ public List<List<String>> head(String path, int nrows)
throw new FileNotFoundException();
}catch (IOException e) {
// TODO Auto-generated catch block
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down Expand Up @@ -134,7 +134,7 @@ public List<List<String>> slice(String path, int start, int end)
throw new FileNotFoundException();
}catch (IOException e) {
// TODO Auto-generated catch block
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand All @@ -161,7 +161,7 @@ public int count(String path) throws FileNotFoundException {
throw new FileNotFoundException();
}catch (IOException e) {
// TODO Auto-generated catch block
LOG.error("Error occurred while reading file {} : ", file,
LOG.error("Error occurred while reading file {} : {}", file,
StringUtils.stringifyException(e));
throw new WebApplicationException();
}
Expand Down
4 changes: 2 additions & 2 deletions src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ public RecordReader<Text, BytesWritable> createRecordReader(InputSplit split,
* The InputSplit of the arc file to process.
* @param job
* The job configuration.
* @param reporter
* The progress reporter.
* @param context
* The task context.
*/
public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
Job job, Context context) throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public boolean next(Text key, BytesWritable value) throws IOException {
return true;
}
} catch (Exception e) {
LOG.equals(StringUtils.stringifyException(e));
LOG.error("Failed reading ARC record: ", e);
}

// couldn't populate the record or there is no next record to read
Expand Down
4 changes: 2 additions & 2 deletions src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ private ParseStatus output(Context context,
* and other relevant data.
* </p>
*
* @param job
* The job configuration.
* @param context
* The task context.
*/
public void setup(Mapper<Text, BytesWritable, Text, NutchWritable>.Context context) {
// set the url filters, scoring filters the parse util and the url
Expand Down
6 changes: 2 additions & 4 deletions src/java/org/apache/nutch/util/EncodingDetector.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,8 @@ public void autoDetectClues(Content content, boolean filter) {
// will sometimes throw exceptions
try {
detector.enableInputFilter(filter);
if (data.length > MIN_LENGTH) {
detector.setText(data);
matches = detector.detectAll();
}
detector.setText(data);
matches = detector.detectAll();
} catch (Exception e) {
LOG.debug("Exception from ICU4J (ignoring): ", e);
}
Expand Down
3 changes: 1 addition & 2 deletions src/java/org/apache/nutch/util/MimeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ public String autoResolveContentType(String typeName, String url, byte[] data) {
}

// if returned null, or if it's the default type then try url resolution
if (type == null
|| (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
if (type == null || type.getName().equals(MimeTypes.OCTET_STREAM)) {
// If no mime-type header, or cannot find a corresponding registered
// mime-type, then guess a mime-type from the url pattern
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,35 +81,32 @@ private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
String name = nameNode.getNodeValue().toLowerCase();
metaTags.getGeneralTags().add(name, contentNode.getNodeValue());
if ("robots".equals(name)) {
String directives = contentNode.getNodeValue().toLowerCase();
int index = directives.indexOf("none");

if (contentNode != null) {
String directives = contentNode.getNodeValue().toLowerCase();
int index = directives.indexOf("none");

if (index >= 0) {
metaTags.setNoIndex();
metaTags.setNoFollow();
}
if (index >= 0) {
metaTags.setNoIndex();
metaTags.setNoFollow();
}

index = directives.indexOf("all");
if (index >= 0) {
// do nothing...
}
index = directives.indexOf("all");
if (index >= 0) {
// do nothing...
}

index = directives.indexOf("noindex");
if (index >= 0) {
metaTags.setNoIndex();
}
index = directives.indexOf("noindex");
if (index >= 0) {
metaTags.setNoIndex();
}

index = directives.indexOf("nofollow");
if (index >= 0) {
metaTags.setNoFollow();
}
index = directives.indexOf("nofollow");
if (index >= 0) {
metaTags.setNoFollow();
}

index = directives.indexOf("noarchive");
if (index >= 0) {
metaTags.setNoCache();
}
index = directives.indexOf("noarchive");
if (index >= 0) {
metaTags.setNoCache();
}

} // end if (name == robots)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,35 +82,32 @@ private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
String name = nameNode.getNodeValue().toLowerCase();
metaTags.getGeneralTags().add(name, contentNode.getNodeValue());
if ("robots".equals(name)) {
String directives = contentNode.getNodeValue().toLowerCase();
int index = directives.indexOf("none");

if (contentNode != null) {
String directives = contentNode.getNodeValue().toLowerCase();
int index = directives.indexOf("none");

if (index >= 0) {
metaTags.setNoIndex();
metaTags.setNoFollow();
}
if (index >= 0) {
metaTags.setNoIndex();
metaTags.setNoFollow();
}

index = directives.indexOf("all");
if (index >= 0) {
// do nothing...
}
index = directives.indexOf("all");
if (index >= 0) {
// do nothing...
}

index = directives.indexOf("noindex");
if (index >= 0) {
metaTags.setNoIndex();
}
index = directives.indexOf("noindex");
if (index >= 0) {
metaTags.setNoIndex();
}

index = directives.indexOf("nofollow");
if (index >= 0) {
metaTags.setNoFollow();
}
index = directives.indexOf("nofollow");
if (index >= 0) {
metaTags.setNoFollow();
}

index = directives.indexOf("noarchive");
if (index >= 0) {
metaTags.setNoCache();
}
index = directives.indexOf("noarchive");
if (index >= 0) {
metaTags.setNoCache();
}

} // end if (name == robots)
Expand Down
Loading

0 comments on commit 31ecf64

Please sign in to comment.