diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 583039512c..14d8fd7400 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -93,6 +93,15 @@ producer: name: Analytics SEO url: http://www.analyticsseo.com +- + user_agent: ApacheBench/2.3 + bot: + name: ApacheBench + category: Benchmark + url: 'https://httpd.apache.org/docs/2.4/programs/ab.html' + producer: + name: 'The Apache Software Foundation' + url: 'http://www.apache.org/foundation/' - user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10 _1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot) bot: @@ -417,7 +426,16 @@ producer: name: Cốc Cốc url: http://coccoc.com/ -- +- + user_agent: Datadog Agent/5.10.1 + bot: + name: 'Datadog Agent' + url: 'https://github.com/DataDog/dd-agent' + category: 'Site Monitor' + producer: + name: 'Datadog' + url: 'https://www.datadoghq.com/' +- user_agent: Mozilla/5.0 (compatible; Dataprovider/6.92; +https://www.dataprovider.com/) bot: name: Dataprovider @@ -660,7 +678,25 @@ producer: name: url: -- +- + user_agent: Mozilla/5.0 (compatible; FlipboardProxy/1.2; +http://flipboard.com/browserproxy) + bot: + name: Flipboard + url: http://flipboard.com/browserproxy + category: Feed Fetcher + producer: + name: 'Flipboard' + url: 'http://flipboard.com/' +- + user_agent: Mozilla/5.0 (compatible; FlipboardRSS/1.2; +http://flipboard.com/browserproxy) + bot: + name: Flipboard + url: http://flipboard.com/browserproxy + category: Feed Fetcher + producer: + name: 'Flipboard' + url: 'http://flipboard.com/' +- user_agent: niki-bot bot: name: Generic Bot @@ -1171,6 +1207,16 @@ producer: name: Mail.Ru Group url: http://corp.mail.ru +- + user_agent: masscan/1.0 (https://github.com/robertdavidgraham/masscan) + bot: + name: masscan + category: Crawler + url: https://github.com/robertdavidgraham/masscan + producer: + name: Robert Graham + url: https://github.com/robertdavidgraham + - user_agent: Mozilla/5.0 (compatible; meanpathbot/1.0; +http://www.meanpath.com/meanpathbot.html) bot: @@ -1216,6 +1262,25 @@ producer: name: Monitor.Us url: http://www.monitor.us +- + user_agent: munin/2.0.30-1 (libwww-perl/6.15) + bot: + name: 'Munin' + category: 'Site Monitor' + url: 'http://munin-monitoring.org/' + producer: + name: 'Munin' + url: 'http://munin-monitoring.org/' +- + user_agent: munin/http_loadtime + bot: + name: 'Munin' + category: 'Site Monitor' + url: 'http://munin-monitoring.org/' + producer: + name: 'Munin' + url: 'http://munin-monitoring.org/' + - user_agent: nlcrawler/1.0 (+http://northernlight.com/) bot: @@ -2006,6 +2071,15 @@ producer: name: Twitter url: http://www.twitter.com +- + user_agent: UniversalFeedParser/5.2.1 +https://code.google.com/p/feedparser/ + bot: + name: UniversalFeedParser + category: Feed Fetcher + url: https://github.com/kurtmckee/feedparser + producer: + name: Kurt McKee + url: https://github.com/kurtmckee - user_agent: Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html) bot: diff --git a/regexes/bots.yml b/regexes/bots.yml index 79f2c16944..124a43a07c 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -61,6 +61,14 @@ name: 'Amorank' url: 'http://www.amorank.com' +- regex: 'ApacheBench' + name: 'ApacheBench' + category: 'Benchmark' + url: 'https://httpd.apache.org/docs/2.4/programs/ab.html' + producer: + name: 'The Apache Software Foundation' + url: 'http://www.apache.org/foundation/' + - regex: 'Applebot' name: 'Applebot' category: 'Crawler' @@ -253,6 +261,14 @@ name: '' url: '' +- regex: 'Datadog Agent' + name: 'Datadog Agent' + url: 'https://github.com/DataDog/dd-agent' + category: 'Site Monitor' + producer: + name: 'Datadog' + url: 'https://www.datadoghq.com/' + - regex: 'Dataprovider' name: 'Dataprovider' category: 'Crawler' @@ -397,6 +413,14 @@ name: '' url: '' +- regex: 'FlipboardProxy|FlipboardRSS' + name: 'Flipboard' + url: 'http://flipboard.com/browserproxy' + category: 'Feed Fetcher' + producer: + name: 'Flipboard' + url: 'http://flipboard.com/' + - regex: 'Genieo' name: 'Genieo Web filter' category: '' @@ -548,6 +572,14 @@ name: '' url: '' +- regex : 'masscan' + name: 'masscan' + url: 'https://github.com/robertdavidgraham/masscan' + category: 'Crawler' + producer: + name: 'Robert Graham' + url: 'https://github.com/robertdavidgraham' + - regex: 'meanpathbot' name: 'Meanpath Bot' category: 'Search bot' @@ -588,6 +620,14 @@ name: 'Mojeek Ltd.' url: 'http://www.mojeek.com' +- regex: 'munin' + name: 'Munin' + category: 'Site Monitor' + url: 'http://munin-monitoring.org/' + producer: + name: 'Munin' + url: 'http://munin-monitoring.org/' + - regex: 'NalezenCzBot' name: 'NalezenCzBot' category: 'Crawler' @@ -978,6 +1018,14 @@ name: 'Twitter' url: 'http://www.twitter.com' +- regex: 'UniversalFeedParser' + name: 'UniversalFeedParser' + category: 'Feed Fetcher' + url: 'https://github.com/kurtmckee/feedparser' + producer: + name: 'Kurt McKee' + url: 'https://github.com/kurtmckee' + - regex: 'UptimeRobot' name: 'Uptime Robot' category: 'Site Monitor'