forked from elastic/logstash
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
elastic#7128 ingest json to grok js converter
Fixes elastic#7238
- Loading branch information
1 parent
c95e643
commit 9a3ffb8
Showing
7 changed files
with
267 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
include ':logstash-core', 'logstash-core-benchmarks' | ||
include ':logstash-core', 'logstash-core-benchmarks', 'ingest-converter' | ||
project(':logstash-core').projectDir = new File('./logstash-core') | ||
project(':logstash-core-benchmarks').projectDir = new File('./logstash-core/benchmarks') | ||
project(':ingest-converter').projectDir = new File('./tools/ingest-converter') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import org.yaml.snakeyaml.Yaml | ||
|
||
apply plugin: 'java' | ||
apply plugin: 'idea' | ||
|
||
// fetch version from Logstash's master versions.yml file | ||
def versionMap = (Map) (new Yaml()).load(new File("$projectDir/../../versions.yml").text) | ||
|
||
group = 'org.logstash' | ||
description = """Ingest JSON to Logstash Grok Config Converter""" | ||
version = versionMap['logstash-core'] | ||
|
||
project.sourceCompatibility = JavaVersion.VERSION_1_8 | ||
project.targetCompatibility = JavaVersion.VERSION_1_8 | ||
|
||
repositories { | ||
mavenCentral() | ||
jcenter() | ||
} | ||
|
||
buildscript { | ||
repositories { | ||
mavenCentral() | ||
jcenter() | ||
} | ||
dependencies { | ||
classpath 'org.yaml:snakeyaml:1.17' | ||
classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.4' | ||
} | ||
} | ||
|
||
dependencies { | ||
testCompile "junit:junit:4.12" | ||
} | ||
|
||
javadoc { | ||
enabled = true | ||
} | ||
|
||
apply plugin: 'com.github.johnrengelman.shadow' | ||
|
||
shadowJar { | ||
baseName = 'ingest-converter' | ||
classifier = null | ||
version = null | ||
} |
41 changes: 41 additions & 0 deletions
41
tools/ingest-converter/src/main/java/org/logstash/ingest/Grok.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package org.logstash.ingest; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.io.Reader; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import javax.script.Invocable; | ||
import javax.script.ScriptEngine; | ||
import javax.script.ScriptEngineManager; | ||
import javax.script.ScriptException; | ||
|
||
/** | ||
* Ingest JSON DSL to Logstash Grok Transpiler. | ||
*/ | ||
public final class Grok { | ||
|
||
private Grok() { | ||
// Utility Wrapper for JS Script. | ||
} | ||
|
||
public static void main(final String... args) throws ScriptException, NoSuchMethodException { | ||
try (final Reader reader = new InputStreamReader( | ||
Grok.class.getResourceAsStream("/ingest-to-grok.js") | ||
) | ||
) { | ||
final ScriptEngine engine = | ||
new ScriptEngineManager().getEngineByName("nashorn"); | ||
engine.eval(reader); | ||
Files.write(Paths.get(args[1]), ((String) ((Invocable) engine).invokeFunction( | ||
"json_to_grok", | ||
new String( | ||
Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8 | ||
) | ||
)).getBytes(StandardCharsets.UTF_8)); | ||
} catch (final IOException ex) { | ||
throw new IllegalStateException(ex); | ||
} | ||
} | ||
} |
116 changes: 116 additions & 0 deletions
116
tools/ingest-converter/src/main/resources/ingest-to-grok.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/** | ||
* Converts Ingest JSON to Grok. | ||
*/ | ||
function json_to_grok(json) { | ||
|
||
function quote_string(string) { | ||
return "\"" + string.replace(/"/g, "\\\"") + "\""; | ||
} | ||
|
||
function wrap_in_curly(string) { | ||
return "{\n" + string + "\n}"; | ||
} | ||
|
||
function create_field(name, content) { | ||
return name + " => " + content; | ||
} | ||
|
||
function create_hash_field(name, content) { | ||
return create_field(name, wrap_in_curly(content)); | ||
} | ||
|
||
function create_hash(name, content) { | ||
return name + " " + wrap_in_curly(content); | ||
} | ||
|
||
/** | ||
* Converts Ingest/JSON style pattern array to Grok pattern array, performing necessary variable | ||
* name and quote escaping adjustments. | ||
* @param patterns Pattern Array in JSON formatting | ||
* @returns {string} Pattern array in Grok formatting | ||
*/ | ||
function create_pattern_array(patterns) { | ||
|
||
/** | ||
* Translates the JSON naming pattern (`name.qualifier.sub`) into the grok pattern | ||
* [name][qualifier][sub] for all applicable tokens in the given string. | ||
* This function correctly identifies and omits renaming of string literals. | ||
* @param string to replace naming pattern in | ||
* @returns {string} with Json naming translated into grok naming | ||
*/ | ||
function dots_to_square_brackets(string) { | ||
|
||
function token_dots_to_square_brackets(string) { | ||
return string.replace(/(\w*)\.(\w*)/g, "$1][$2").replace(/(\w+)}/g, "$1]}") | ||
.replace(/{(\w+):(\w+)]/g, "{$1:[$2]"); | ||
} | ||
|
||
var literals = string.match(/\(\?:%{.*\|-\)/); | ||
var i; | ||
var tokens = []; | ||
// Copy String before Manipulation | ||
var right = string; | ||
if (literals) { | ||
for (i = 0; i < literals.length; ++i) { | ||
var parts = right.split(literals[i], 2); | ||
right = parts[1]; | ||
tokens.push(token_dots_to_square_brackets(parts[0])); | ||
tokens.push(literals[i]); | ||
} | ||
} | ||
tokens.push(token_dots_to_square_brackets(right)); | ||
return tokens.join(""); | ||
} | ||
|
||
return "[\n" + patterns.map(dots_to_square_brackets).map(quote_string).join(",\n") + "\n]"; | ||
} | ||
|
||
/** | ||
* Fixes indentation in Grok string. | ||
* @param string Grok string to fix indentation in, that has no indentation intentionally with | ||
* all lines starting on a token without preceding spaces. | ||
* @returns {string} Grok string indented by 3 spaces per level | ||
*/ | ||
function fix_indent(string) { | ||
|
||
function indent(string, shifts) { | ||
return new Array(shifts * 3 + 1).join(" ") + string; | ||
} | ||
|
||
var lines = string.split("\n"); | ||
var count = 0; | ||
var i; | ||
for (i = 0; i < lines.length; ++i) { | ||
if (lines[i].match(/(\{|\[)$/)) { | ||
lines[i] = indent(lines[i], count); | ||
++count; | ||
} else if (lines[i].match(/(\}|\])$/)) { | ||
--count; | ||
lines[i] = indent(lines[i], count); | ||
} else { | ||
lines[i] = indent(lines[i], count); | ||
} | ||
} | ||
return lines.join("\n"); | ||
} | ||
|
||
var parsed = JSON.parse(json); | ||
var processors = parsed["processors"]; | ||
return processors.map(function (filter) { | ||
return fix_indent( | ||
create_hash( | ||
"filter", | ||
create_hash( | ||
"grok", | ||
create_hash_field( | ||
"match", | ||
create_field( | ||
quote_string(filter["grok"]["field"]), | ||
create_pattern_array(filter["grok"]["patterns"]) | ||
) | ||
) | ||
) | ||
) | ||
) | ||
}).join("\n\n") + "\n"; | ||
} |
37 changes: 37 additions & 0 deletions
37
tools/ingest-converter/src/test/java/org/logstash/ingest/GrokTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.logstash.ingest; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.rules.TemporaryFolder; | ||
|
||
import static org.hamcrest.CoreMatchers.is; | ||
import static org.hamcrest.MatcherAssert.assertThat; | ||
|
||
public final class GrokTest { | ||
|
||
@Rule | ||
public final TemporaryFolder temp = new TemporaryFolder(); | ||
|
||
@Test | ||
public void convertsCorrectly() throws Exception { | ||
final File testdir = temp.newFolder(); | ||
final String grok = testdir.toPath().resolve("converted.grok").toString(); | ||
Grok.main(resourcePath("ingestTestConfig.json"), grok); | ||
assertThat( | ||
utf8File(grok), is(utf8File(resourcePath("ingestTestConfig.grok"))) | ||
); | ||
} | ||
|
||
private static String utf8File(final String path) throws IOException { | ||
return new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8); | ||
} | ||
|
||
private static String resourcePath(final String name) { | ||
return Grok.class.getResource(name).getPath(); | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.grok
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
filter { | ||
grok { | ||
match => { | ||
"message" => [ | ||
"%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \[%{HTTPDATE:[apache2][access][time]}\] \"%{WORD:[apache2][access][method]} %{DATA:[apache2][access][url]} HTTP/%{NUMBER:[apache2][access][http_version]}\" %{NUMBER:[apache2][access][response_code]} (?:%{NUMBER:apache2.access.body_sent.bytes}|-)( \"%{DATA:[apache2][access][referrer]}\")?( \"%{DATA:[apache2][access][agent]}\")?", | ||
"%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \[%{HTTPDATE:[apache2][access][time]}\] \"-\" %{NUMBER:[apache2][access][response_code]} -" | ||
] | ||
} | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"description": "Pipeline for parsing apache error logs", | ||
"processors": [ | ||
{ | ||
"grok": { | ||
"field": "message", | ||
"patterns": [ | ||
"%{IPORHOST:apache2.access.remote_ip} - %{DATA:apache2.access.user_name} \\[%{HTTPDATE:apache2.access.time}\\] \"%{WORD:apache2.access.method} %{DATA:apache2.access.url} HTTP/%{NUMBER:apache2.access.http_version}\" %{NUMBER:apache2.access.response_code} (?:%{NUMBER:apache2.access.body_sent.bytes}|-)( \"%{DATA:apache2.access.referrer}\")?( \"%{DATA:apache2.access.agent}\")?", | ||
"%{IPORHOST:apache2.access.remote_ip} - %{DATA:apache2.access.user_name} \\[%{HTTPDATE:apache2.access.time}\\] \"-\" %{NUMBER:apache2.access.response_code} -" | ||
], | ||
"ignore_missing": true | ||
} | ||
} | ||
] | ||
} |