Skip to content

Commit

Permalink
Added more Twitter fields to TwitterSpritzerFirehoseFactory
Browse files Browse the repository at this point in the history
* Now with GEOGRAPHY support!
  • Loading branch information
drcrallen committed Dec 12, 2014
1 parent fc7f1e6 commit 687c82d
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 109 deletions.
33 changes: 30 additions & 3 deletions examples/bin/examples/twitter/query.body
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
{
"queryType": "timeBoundary",
"dataSource": "twitterstream"
}
"description": "Simple data split up by hour",
"aggregations": [
{
"name": "tweets",
"type": "longSum",
"fieldName": "tweets"
},
{
"fieldName": "text_hll",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags_hll",
"name": "htag_hll",
"type": "hyperUnique"
},
{
"fieldName": "user_id_hll",
"name": "user_id_hll",
"type": "hyperUnique"
}
],
"dataSource": "twitterstream",
"granularity": "hour",
"intervals": [
"1970-01-01T00:00:00.000/2019-01-03T00:00:00.000"
],
"queryType": "timeseries"
}
94 changes: 94 additions & 0 deletions examples/bin/examples/twitter/topN_query.body
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"description": "Top 10 languages by count of tweets in the contiguous US",
"aggregations": [
{
"fieldName": "tweets",
"name": "tweets",
"type": "longSum"
},
{
"fieldName": "user_id_hll",
"name": "user_id_hll",
"type": "hyperUnique"
},
{
"fieldName": "contributors_hll",
"name": "contributors_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags_hll",
"name": "htags_hll",
"type": "hyperUnique"
},
{
"fieldName": "text_hll",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "min_follower_count",
"name": "min_follower_count",
"type": "min"
},
{
"fieldName": "max_follower_count",
"name": "max_follower_count",
"type": "max"
},
{
"fieldName": "min_friends_count",
"name": "min_friends_count",
"type": "min"
},
{
"fieldName": "max_friends_count",
"name": "max_friends_count",
"type": "max"
},
{
"fieldName": "min_statuses_count",
"name": "min_statuses_count",
"type": "min"
},
{
"fieldName": "max_statuses_count",
"name": "max_statuses_count",
"type": "max"
},
{
"fieldName": "min_retweet_count",
"name": "min_retweet_count",
"type": "min"
},
{
"fieldName": "max_retweet_count",
"name": "max_retweet_count",
"type": "max"
}
],
"dataSource": "twitterstream",
"dimension": "lang",
"filter": {
"bound": {
"maxCoords": [
50,
-65
],
"minCoords": [
25,
-127
],
"type": "rectangular"
},
"dimension": "geo",
"type": "spatial"
},
"granularity": "all",
"intervals": [
"2013-06-01T00:00/2020-01-01T00"
],
"metric": "tweets",
"queryType": "topN",
"threshold": "10"
}
164 changes: 98 additions & 66 deletions examples/bin/examples/twitter/twitter_realtime.spec
Original file line number Diff line number Diff line change
@@ -1,119 +1,151 @@
[
{
{
"description": "Ingestion spec for Twitter spritzer. Dimension values taken from io.druid.examples.twitter.TwitterSpritzerFirehoseFactory",
"spec": {
"dataSchema": {
"dataSource": "twitterstream",
"parser": {
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "utcdt",
"format": "iso"
},
"dimensionsSpec": {
"dimensions": [

],
"dimensionExclusions": [

],
"spatialDimensions": [

]
}
}
"granularitySpec": {
"queryGranularity": "all",
"segmentGranularity": "hour",
"type": "uniform"
},
"metricsSpec": [
{
"type": "count",
"name": "tweets"
"name": "tweets",
"type": "count"
},
{
"type": "doubleSum",
"fieldName": "follower_count",
"name": "total_follower_count"
"name": "total_follower_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "retweet_count",
"name": "total_retweet_count"
"name": "total_retweet_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "friends_count",
"name": "total_friends_count"
"name": "total_friends_count",
"type": "doubleSum"
},
{
"type": "doubleSum",
"fieldName": "statuses_count",
"name": "total_statuses_count"
"name": "total_statuses_count",
"type": "doubleSum"
},
{
"fieldName": "text",
"name": "text_hll",
"type": "hyperUnique"
},
{
"fieldName": "user_id",
"name": "user_id_hll",
"type": "hyperUnique"
},
{
"fieldName": "contributors",
"name": "contributors_hll",
"type": "hyperUnique"
},
{
"fieldName": "htags",
"name": "htags_hll",
"type": "hyperUnique"
},
{
"type": "min",
"fieldName": "follower_count",
"name": "min_follower_count"
"name": "min_follower_count",
"type": "min"
},
{
"type": "max",
"fieldName": "follower_count",
"name": "max_follower_count"
"name": "max_follower_count",
"type": "max"
},
{
"type": "min",
"fieldName": "friends_count",
"name": "min_friends_count"
"name": "min_friends_count",
"type": "min"
},
{
"type": "max",
"fieldName": "friends_count",
"name": "max_friends_count"
"name": "max_friends_count",
"type": "max"
},
{
"type": "min",
"fieldName": "statuses_count",
"name": "min_statuses_count"
"name": "min_statuses_count",
"type": "min"
},
{
"type": "max",
"fieldName": "statuses_count",
"name": "max_statuses_count"
"name": "max_statuses_count",
"type": "max"
},
{
"type": "min",
"fieldName": "retweet_count",
"name": "min_retweet_count"
"name": "min_retweet_count",
"type": "min"
},
{
"type": "max",
"fieldName": "retweet_count",
"name": "max_retweet_count"
"name": "max_retweet_count",
"type": "max"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": "NONE"
"parser": {
"parseSpec": {
"dimensionsSpec": {
"dimensions": [
"text",
"htags",
"contributors",
"lat",
"lon",
"retweet_count",
"follower_count",
"friendscount",
"lang",
"utc_offset",
"statuses_count",
"user_id",
"ts"
],
"dimensionExclusions": [
],
"spatialDimensions": [
{
"dimName": "geo",
"dims": [
"lat",
"lon"
]
}
]
},
"format": "json",
"timestampSpec": {
"column": "ts",
"format": "millis"
}
}
}
},
"ioConfig": {
"type": "realtime",
"firehose": {
"type": "twitzer",
"maxEventCount": 500000,
"maxRunMinutes": 120
"maxRunMinutes": 120,
"type": "twitzer"
},
"plumber": {
"type": "realtime"
}
"type": "realtime"
},
"tuningConfig": {
"type": "realtime",
"intermediatePersistPeriod": "PT10m",
"maxRowsInMemory": 500000,
"intermediatePersistPeriod": "PT2m",
"windowPeriod": "PT3m",
"basePersistDirectory": "\/tmp\/realtime\/basePersist",
"rejectionPolicy": {
"type": "messageTime"
}
"type": "realtime",
"windowPeriod": "PT10m"
}
}
]
},
"type": "index_realtime"
}
Loading

0 comments on commit 687c82d

Please sign in to comment.