Skip to content

Commit

Permalink
Merge pull request elastic#310 from electrical/csv_regexp_deprecate
Browse files Browse the repository at this point in the history
Deprecating regexp field for LOGSTASH-756
  • Loading branch information
jordansissel committed Feb 5, 2013
2 parents df0c6c8 + e994ce9 commit 73722a6
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 31 deletions.
48 changes: 41 additions & 7 deletions lib/logstash/filters/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,65 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
# The CSV data in the value of the source field will be expanded into a
# datastructure in the "dest" field. Note: if the "dest" field
# already exists, it will be overridden.
config /[A-Za-z0-9_-]+/, :validate => :string
config /[A-Za-z0-9_-]+/, :validate => :string, :deprecated => true

# Define a list of field names (in the order they appear in the CSV,
# as if it were a header line). If this is not specified or there
# are not enough fields specified, the default field name is "fieldN"
# (where N is the field number, starting from 1).
# Optional.
config :fields, :validate => :array, :default => []
config :fields, :validate => :array, :deprecated => true

# The CSV data in the value of the source field will be expanded into a
# datastructure.
# This deprecates the regexp '[A-Za-z0-9_-]' variable.
config :source, :validate => :string

# Define a list of column names (in the order they appear in the CSV,
# as if it were a header line). If this is not specified or there
# are not enough columns specified, the default column name is "columnX"
# (where X is the field number, starting from 1).
# This deprecates the 'fields' variable.
# Optional.
config :columns, :validate => :array, :default => []

# Define the column separator value. If this is not specified the default
# is a comma ','
# Optional.
config :separator, :validate => :string, :default => ","

# Define target for placing the data
# Defaults to @fields
# Optional
config :target, :validate => :string, :default => "@fields"

public
def register
@csv = {}

#TODO(electrical): At some point 'fields' and the regexp parts need to be removed.
if @fields
if @columns
@logger.error("'fields' and 'columns' are the same setting, but 'fields' is deprecated. Please use only 'columns'")
end
@columns = @fields
end

@csv = {}
#TODO(electrical): At some point this can be removed
@config.each do |field, dest|
next if (RESERVED + ["fields", "separator"]).member?(field)
next if (RESERVED + ["fields", "separator", "source", "columns", "target"]).member?(field)
@csv[field] = dest
end

# Default to parsing @message and dumping into @fields
#TODO(electrical): Will we make @source required or not?
if @source
#Add the source field to the list.
@csv[@source] = @target
end

# Default to parsing @message and dumping into @target
if @csv.empty?
@csv["@message"] = "@fields"
@csv["@message"] = @target
end
end # def register

Expand All @@ -50,6 +83,7 @@ def filter(event)
@logger.debug("Running csv filter", :event => event)

matches = 0
#TODO(electrical): When old stuff can be removed. this block will need to be changed also
@csv.each do |key, dest|
if event[key]
if event[key].is_a?(String)
Expand All @@ -68,7 +102,7 @@ def filter(event)
values = CSV.parse_line(raw, {:col_sep => @separator})
data = {}
values.each_index do |i|
field_name = @fields[i] || "field#{i+1}"
field_name = @columns[i] || "column#{i+1}"
data[field_name] = values[i]
end

Expand Down
157 changes: 133 additions & 24 deletions spec/filters/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,46 +14,46 @@
CONFIG

sample "big,bird,sesame street" do
insist { subject["field1"] } == "big"
insist { subject["field2"] } == "bird"
insist { subject["field3"] } == "sesame street"
insist { subject["column1"] } == "big"
insist { subject["column2"] } == "bird"
insist { subject["column3"] } == "sesame street"
end
end

describe "given fields" do
# The logstash config goes here.
# At this time, only filters are supported.
describe "custom separator" do
config <<-CONFIG
filter {
csv {
fields => ["first", "last", "address" ]
separator => ";"
}
}
CONFIG

sample "big,bird,sesame street" do
insist { subject["first"] } == "big"
insist { subject["last"] } == "bird"
insist { subject["address"] } == "sesame street"
sample "big,bird;sesame street" do
insist { subject["column1"] } == "big,bird"
insist { subject["column2"] } == "sesame street"
end
end

describe "custom separator" do
describe "given fields ( deprecated test )" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
separator => ";"
fields => ["first", "last", "address" ]
}
}
CONFIG

sample "big,bird;sesame street" do
insist { subject["field1"] } == "big,bird"
insist { subject["field2"] } == "sesame street"
sample "big,bird,sesame street" do
insist { subject["first"] } == "big"
insist { subject["last"] } == "bird"
insist { subject["address"] } == "sesame street"
end
end

describe "parse csv with more data than defined field names" do
describe "parse csv with more data than defined field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
Expand All @@ -65,11 +65,11 @@
sample "val1,val2,val3" do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["field3"] } == "val3"
insist { subject["column3"] } == "val3"
end
end

describe "parse csv from a given field without field names" do
describe "parse csv from a given field without field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
Expand All @@ -79,13 +79,13 @@
CONFIG

sample({"@fields" => {"raw" => "val1,val2,val3"}}) do
insist { subject["data"]["field1"] } == "val1"
insist { subject["data"]["field2"] } == "val2"
insist { subject["data"]["field3"] } == "val3"
insist { subject["data"]["column1"] } == "val1"
insist { subject["data"]["column2"] } == "val2"
insist { subject["data"]["column3"] } == "val3"
end
end

describe "parse csv from a given field with field names" do
describe "parse csv from a given field with field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
Expand All @@ -102,7 +102,7 @@
end
end

describe "fail to parse any data in a multi-value field" do
describe "fail to parse any data in a multi-value field ( deprecated test )" do
config <<-CONFIG
filter {
csv {
Expand All @@ -115,4 +115,113 @@
insist { subject["data"] } == nil
end
end


# New tests

describe "fail to parse any data in a multi-value field ( deprecated test )" do
config <<-CONFIG
filter {
csv {
source => "datain"
target => "data"
}
}
CONFIG

sample({"@fields" => {"datain" => ["val1,val2,val3", "val1,val2,val3"]}}) do
insist { subject["data"] } == nil
end
end

describe "given columns" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
columns => ["first", "last", "address" ]
}
}
CONFIG

sample "big,bird,sesame street" do
insist { subject["first"] } == "big"
insist { subject["last"] } == "bird"
insist { subject["address"] } == "sesame street"
end
end

describe "parse csv with more data than defined column names" do
config <<-CONFIG
filter {
csv {
columns => ["custom1", "custom2"]
}
}
CONFIG

sample "val1,val2,val3" do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["column3"] } == "val3"
end
end


describe "parse csv from a given source with column names" do
config <<-CONFIG
filter {
csv {
source => "datafield"
columns => ["custom1", "custom2", "custom3"]
}
}
CONFIG

sample({"@fields" => {"datafield" => "val1,val2,val3"}}) do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["custom3"] } == "val3"
end
end

describe "given target" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
target => "data"
}
}
CONFIG

sample "big,bird,sesame street" do
insist { subject["data"]["column1"] } == "big"
insist { subject["data"]["column2"] } == "bird"
insist { subject["data"]["column3"] } == "sesame street"
end
end

describe "given target and source" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
source => "datain"
target => "data"
}
}
CONFIG

sample({"@fields" => {"datain" => "big,bird,sesame street"}}) do
insist { subject["data"]["column1"] } == "big"
insist { subject["data"]["column2"] } == "bird"
insist { subject["data"]["column3"] } == "sesame street"
end
end


end

0 comments on commit 73722a6

Please sign in to comment.