From b489155ca6238b39c93c1e6f982354c4f6a4b6f0 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sat, 12 Jan 2013 11:52:32 +0100 Subject: [PATCH 1/2] Deprecating regexp field for LOGSTASH-756 - Deprecating fields to columns - Deprecating regexp fields to source - Adding target for sub field specifcation - Added and modified rspec checks --- lib/logstash/filters/csv.rb | 48 +++++++++-- spec/filters/csv.rb | 157 ++++++++++++++++++++++++++++++------ 2 files changed, 174 insertions(+), 31 deletions(-) diff --git a/lib/logstash/filters/csv.rb b/lib/logstash/filters/csv.rb index d3a5da9cd1f..8f17b24a8bf 100644 --- a/lib/logstash/filters/csv.rb +++ b/lib/logstash/filters/csv.rb @@ -14,32 +14,65 @@ class LogStash::Filters::CSV < LogStash::Filters::Base # The CSV data in the value of the source field will be expanded into a # datastructure in the "dest" field. Note: if the "dest" field # already exists, it will be overridden. - config /[A-Za-z0-9_-]+/, :validate => :string + config /[A-Za-z0-9_-]+/, :validate => :string, :deprecated => true # Define a list of field names (in the order they appear in the CSV, # as if it were a header line). If this is not specified or there # are not enough fields specified, the default field name is "fieldN" # (where N is the field number, starting from 1). # Optional. - config :fields, :validate => :array, :default => [] + config :fields, :validate => :array, :deprecated => true + + # The CSV data in the value of the source field will be expanded into a + # datastructure. + # This deprecates the regexp '[A-Za-z0-9_-]' variable. + config :source, :validate => :string + + # Define a list of field names (in the order they appear in the CSV, + # as if it were a header line). If this is not specified or there + # are not enough fields specified, the default field name is "fieldN" + # (where N is the field number, starting from 1). + # This deprecates the 'fields' variable. + # Optional. + config :columns, :validate => :array, :default => [] # Define the column separator value. If this is not specified the default # is a comma ',' # Optional. config :separator, :validate => :string, :default => "," + # Define target for placing the data + # Defaults to @fields + # Optional + config :target, :validate => :string, :default => "@fields" + public def register - @csv = {} + #TODO(electrical): At some point 'fields' and the regexp parts need to be removed. + if @fields + if @columns + @logger.error("'fields' and 'columns' are the same setting, but 'fields' is deprecated. Please use only 'columns'") + end + @columns = @fields + end + + @csv = {} + #TODO(electrical): At some point this can be removed @config.each do |field, dest| - next if (RESERVED + ["fields", "separator"]).member?(field) + next if (RESERVED + ["fields", "separator", "source", "columns", "target"]).member?(field) @csv[field] = dest end - # Default to parsing @message and dumping into @fields + #TODO(electrical): Will we make @source required or not? + if @source + #Add the source field to the list. + @csv[@source] = @target + end + + # Default to parsing @message and dumping into @target if @csv.empty? - @csv["@message"] = "@fields" + @csv["@message"] = @target end end # def register @@ -50,6 +83,7 @@ def filter(event) @logger.debug("Running csv filter", :event => event) matches = 0 + #TODO(electrical): When old stuff can be removed. this block will need to be changed also @csv.each do |key, dest| if event[key] if event[key].is_a?(String) @@ -68,7 +102,7 @@ def filter(event) values = CSV.parse_line(raw, {:col_sep => @separator}) data = {} values.each_index do |i| - field_name = @fields[i] || "field#{i+1}" + field_name = @columns[i] || "column#{i+1}" data[field_name] = values[i] end diff --git a/spec/filters/csv.rb b/spec/filters/csv.rb index 3de9e6be208..7e230c706bc 100644 --- a/spec/filters/csv.rb +++ b/spec/filters/csv.rb @@ -14,46 +14,46 @@ CONFIG sample "big,bird,sesame street" do - insist { subject["field1"] } == "big" - insist { subject["field2"] } == "bird" - insist { subject["field3"] } == "sesame street" + insist { subject["column1"] } == "big" + insist { subject["column2"] } == "bird" + insist { subject["column3"] } == "sesame street" end end - describe "given fields" do - # The logstash config goes here. - # At this time, only filters are supported. + describe "custom separator" do config <<-CONFIG filter { csv { - fields => ["first", "last", "address" ] + separator => ";" } } CONFIG - sample "big,bird,sesame street" do - insist { subject["first"] } == "big" - insist { subject["last"] } == "bird" - insist { subject["address"] } == "sesame street" + sample "big,bird;sesame street" do + insist { subject["column1"] } == "big,bird" + insist { subject["column2"] } == "sesame street" end end - describe "custom separator" do + describe "given fields ( deprecated test )" do + # The logstash config goes here. + # At this time, only filters are supported. config <<-CONFIG filter { csv { - separator => ";" + fields => ["first", "last", "address" ] } } CONFIG - sample "big,bird;sesame street" do - insist { subject["field1"] } == "big,bird" - insist { subject["field2"] } == "sesame street" + sample "big,bird,sesame street" do + insist { subject["first"] } == "big" + insist { subject["last"] } == "bird" + insist { subject["address"] } == "sesame street" end end - describe "parse csv with more data than defined field names" do + describe "parse csv with more data than defined field names ( deprecated test )" do config <<-CONFIG filter { csv { @@ -65,11 +65,11 @@ sample "val1,val2,val3" do insist { subject["custom1"] } == "val1" insist { subject["custom2"] } == "val2" - insist { subject["field3"] } == "val3" + insist { subject["column3"] } == "val3" end end - describe "parse csv from a given field without field names" do + describe "parse csv from a given field without field names ( deprecated test )" do config <<-CONFIG filter { csv { @@ -79,13 +79,13 @@ CONFIG sample({"@fields" => {"raw" => "val1,val2,val3"}}) do - insist { subject["data"]["field1"] } == "val1" - insist { subject["data"]["field2"] } == "val2" - insist { subject["data"]["field3"] } == "val3" + insist { subject["data"]["column1"] } == "val1" + insist { subject["data"]["column2"] } == "val2" + insist { subject["data"]["column3"] } == "val3" end end - describe "parse csv from a given field with field names" do + describe "parse csv from a given field with field names ( deprecated test )" do config <<-CONFIG filter { csv { @@ -102,7 +102,7 @@ end end - describe "fail to parse any data in a multi-value field" do + describe "fail to parse any data in a multi-value field ( deprecated test )" do config <<-CONFIG filter { csv { @@ -115,4 +115,113 @@ insist { subject["data"] } == nil end end + + + # New tests + + describe "fail to parse any data in a multi-value field ( deprecated test )" do + config <<-CONFIG + filter { + csv { + source => "datain" + target => "data" + } + } + CONFIG + + sample({"@fields" => {"datain" => ["val1,val2,val3", "val1,val2,val3"]}}) do + insist { subject["data"] } == nil + end + end + + describe "given columns" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + csv { + columns => ["first", "last", "address" ] + } + } + CONFIG + + sample "big,bird,sesame street" do + insist { subject["first"] } == "big" + insist { subject["last"] } == "bird" + insist { subject["address"] } == "sesame street" + end + end + + describe "parse csv with more data than defined column names" do + config <<-CONFIG + filter { + csv { + columns => ["custom1", "custom2"] + } + } + CONFIG + + sample "val1,val2,val3" do + insist { subject["custom1"] } == "val1" + insist { subject["custom2"] } == "val2" + insist { subject["column3"] } == "val3" + end + end + + + describe "parse csv from a given source with column names" do + config <<-CONFIG + filter { + csv { + source => "datafield" + columns => ["custom1", "custom2", "custom3"] + } + } + CONFIG + + sample({"@fields" => {"datafield" => "val1,val2,val3"}}) do + insist { subject["custom1"] } == "val1" + insist { subject["custom2"] } == "val2" + insist { subject["custom3"] } == "val3" + end + end + + describe "given target" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + csv { + target => "data" + } + } + CONFIG + + sample "big,bird,sesame street" do + insist { subject["data"]["column1"] } == "big" + insist { subject["data"]["column2"] } == "bird" + insist { subject["data"]["column3"] } == "sesame street" + end + end + + describe "given target and source" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + csv { + source => "datain" + target => "data" + } + } + CONFIG + + sample({"@fields" => {"datain" => "big,bird,sesame street"}}) do + insist { subject["data"]["column1"] } == "big" + insist { subject["data"]["column2"] } == "bird" + insist { subject["data"]["column3"] } == "sesame street" + end + end + + end From e994ce984515d15b3f5730263cb5a8ded3b6028a Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sat, 12 Jan 2013 12:36:18 +0100 Subject: [PATCH 2/2] Minor modification to documentation --- lib/logstash/filters/csv.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/csv.rb b/lib/logstash/filters/csv.rb index 8f17b24a8bf..28d51242765 100644 --- a/lib/logstash/filters/csv.rb +++ b/lib/logstash/filters/csv.rb @@ -28,10 +28,10 @@ class LogStash::Filters::CSV < LogStash::Filters::Base # This deprecates the regexp '[A-Za-z0-9_-]' variable. config :source, :validate => :string - # Define a list of field names (in the order they appear in the CSV, + # Define a list of column names (in the order they appear in the CSV, # as if it were a header line). If this is not specified or there - # are not enough fields specified, the default field name is "fieldN" - # (where N is the field number, starting from 1). + # are not enough columns specified, the default column name is "columnX" + # (where X is the field number, starting from 1). # This deprecates the 'fields' variable. # Optional. config :columns, :validate => :array, :default => []