Skip to content

Commit

Permalink
Added bots (matomo-org#5580)
Browse files Browse the repository at this point in the history
* add masscan

* add ApacheBench

* added Datadog Agent

* add Flipboard

* add munin

* add UniversalFeedParser
  • Loading branch information
Findus23 authored and sgiehl committed Feb 10, 2017
1 parent 832e019 commit c219415
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 2 deletions.
78 changes: 76 additions & 2 deletions Tests/fixtures/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@
producer:
name: Analytics SEO
url: http://www.analyticsseo.com
-
user_agent: ApacheBench/2.3
bot:
name: ApacheBench
category: Benchmark
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
producer:
name: 'The Apache Software Foundation'
url: 'http://www.apache.org/foundation/'
-
user_agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10 _1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)
bot:
Expand Down Expand Up @@ -417,7 +426,16 @@
producer:
name: Cốc Cốc
url: http://coccoc.com/
-
-
user_agent: Datadog Agent/5.10.1
bot:
name: 'Datadog Agent'
url: 'https://github.com/DataDog/dd-agent'
category: 'Site Monitor'
producer:
name: 'Datadog'
url: 'https://www.datadoghq.com/'
-
user_agent: Mozilla/5.0 (compatible; Dataprovider/6.92; +https://www.dataprovider.com/)
bot:
name: Dataprovider
Expand Down Expand Up @@ -660,7 +678,25 @@
producer:
name:
url:
-
-
user_agent: Mozilla/5.0 (compatible; FlipboardProxy/1.2; +http://flipboard.com/browserproxy)
bot:
name: Flipboard
url: http://flipboard.com/browserproxy
category: Feed Fetcher
producer:
name: 'Flipboard'
url: 'http://flipboard.com/'
-
user_agent: Mozilla/5.0 (compatible; FlipboardRSS/1.2; +http://flipboard.com/browserproxy)
bot:
name: Flipboard
url: http://flipboard.com/browserproxy
category: Feed Fetcher
producer:
name: 'Flipboard'
url: 'http://flipboard.com/'
-
user_agent: niki-bot
bot:
name: Generic Bot
Expand Down Expand Up @@ -1171,6 +1207,16 @@
producer:
name: Mail.Ru Group
url: http://corp.mail.ru
-
user_agent: masscan/1.0 (https://github.com/robertdavidgraham/masscan)
bot:
name: masscan
category: Crawler
url: https://github.com/robertdavidgraham/masscan
producer:
name: Robert Graham
url: https://github.com/robertdavidgraham

-
user_agent: Mozilla/5.0 (compatible; meanpathbot/1.0; +http://www.meanpath.com/meanpathbot.html)
bot:
Expand Down Expand Up @@ -1216,6 +1262,25 @@
producer:
name: Monitor.Us
url: http://www.monitor.us
-
user_agent: munin/2.0.30-1 (libwww-perl/6.15)
bot:
name: 'Munin'
category: 'Site Monitor'
url: 'http://munin-monitoring.org/'
producer:
name: 'Munin'
url: 'http://munin-monitoring.org/'
-
user_agent: munin/http_loadtime
bot:
name: 'Munin'
category: 'Site Monitor'
url: 'http://munin-monitoring.org/'
producer:
name: 'Munin'
url: 'http://munin-monitoring.org/'

-
user_agent: nlcrawler/1.0 (+http://northernlight.com/)
bot:
Expand Down Expand Up @@ -2006,6 +2071,15 @@
producer:
name: Twitter
url: http://www.twitter.com
-
user_agent: UniversalFeedParser/5.2.1 +https://code.google.com/p/feedparser/
bot:
name: UniversalFeedParser
category: Feed Fetcher
url: https://github.com/kurtmckee/feedparser
producer:
name: Kurt McKee
url: https://github.com/kurtmckee
-
user_agent: Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)
bot:
Expand Down
48 changes: 48 additions & 0 deletions regexes/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@
name: 'Amorank'
url: 'http://www.amorank.com'

- regex: 'ApacheBench'
name: 'ApacheBench'
category: 'Benchmark'
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
producer:
name: 'The Apache Software Foundation'
url: 'http://www.apache.org/foundation/'

- regex: 'Applebot'
name: 'Applebot'
category: 'Crawler'
Expand Down Expand Up @@ -253,6 +261,14 @@
name: ''
url: ''

- regex: 'Datadog Agent'
name: 'Datadog Agent'
url: 'https://github.com/DataDog/dd-agent'
category: 'Site Monitor'
producer:
name: 'Datadog'
url: 'https://www.datadoghq.com/'

- regex: 'Dataprovider'
name: 'Dataprovider'
category: 'Crawler'
Expand Down Expand Up @@ -397,6 +413,14 @@
name: ''
url: ''

- regex: 'FlipboardProxy|FlipboardRSS'
name: 'Flipboard'
url: 'http://flipboard.com/browserproxy'
category: 'Feed Fetcher'
producer:
name: 'Flipboard'
url: 'http://flipboard.com/'

- regex: 'Genieo'
name: 'Genieo Web filter'
category: ''
Expand Down Expand Up @@ -548,6 +572,14 @@
name: ''
url: ''

- regex : 'masscan'
name: 'masscan'
url: 'https://github.com/robertdavidgraham/masscan'
category: 'Crawler'
producer:
name: 'Robert Graham'
url: 'https://github.com/robertdavidgraham'

- regex: 'meanpathbot'
name: 'Meanpath Bot'
category: 'Search bot'
Expand Down Expand Up @@ -588,6 +620,14 @@
name: 'Mojeek Ltd.'
url: 'http://www.mojeek.com'

- regex: 'munin'
name: 'Munin'
category: 'Site Monitor'
url: 'http://munin-monitoring.org/'
producer:
name: 'Munin'
url: 'http://munin-monitoring.org/'

- regex: 'NalezenCzBot'
name: 'NalezenCzBot'
category: 'Crawler'
Expand Down Expand Up @@ -978,6 +1018,14 @@
name: 'Twitter'
url: 'http://www.twitter.com'

- regex: 'UniversalFeedParser'
name: 'UniversalFeedParser'
category: 'Feed Fetcher'
url: 'https://github.com/kurtmckee/feedparser'
producer:
name: 'Kurt McKee'
url: 'https://github.com/kurtmckee'

- regex: 'UptimeRobot'
name: 'Uptime Robot'
category: 'Site Monitor'
Expand Down

0 comments on commit c219415

Please sign in to comment.