diff --git a/lib/logstash/filters/geoip.rb b/lib/logstash/filters/geoip.rb index 60d2af98340..aa604b8b938 100644 --- a/lib/logstash/filters/geoip.rb +++ b/lib/logstash/filters/geoip.rb @@ -140,9 +140,17 @@ def filter(event) geo_data_hash.each do |key, value| next if value.nil? || (value.is_a?(String) && value.empty?) if @fields.nil? || @fields.empty? || @fields.include?(key.to_s) - # no fields requested, so add all geoip hash items to - # the event's fields. # convert key to string (normally a Symbol) + if value.is_a?(String) + # Some strings from GeoIP don't have the correct encoding... + value = case value.encoding + # I have found strings coming from GeoIP that are ASCII-8BIT are actually + # ISO-8859-1... + when Encoding::ASCII_8BIT; value.force_encoding("ISO-8859-1").encode("UTF-8") + when Encoding::ISO_8859_1; value.encode("UTF-8") + else; value + end + end event[@target][key.to_s] = value end end # geo_data_hash.each diff --git a/spec/filters/geoip.rb b/spec/filters/geoip.rb index 0b5124ee237..b1864de6e3e 100644 --- a/spec/filters/geoip.rb +++ b/spec/filters/geoip.rb @@ -73,14 +73,22 @@ dma_code area_code timezone) sample("ip" => "1.1.1.1") do + checked = 0 expected_fields.each do |f| + next unless subject["geoip"][f] + checked += 1 insist { subject["geoip"][f].encoding } == Encoding::UTF_8 end + insist { checked } > 0 end sample("ip" => "189.2.0.0") do + checked = 0 expected_fields.each do |f| + next unless subject["geoip"][f] + checked += 1 insist { subject["geoip"][f].encoding } == Encoding::UTF_8 end + insist { checked } > 0 end end