Skip to content

Commit

Permalink
Add the possibility to override default user agent for each feed
Browse files Browse the repository at this point in the history
  • Loading branch information
pmarschik authored and fguillot committed Sep 20, 2018
1 parent 1d33539 commit 2538eea
Show file tree
Hide file tree
Showing 29 changed files with 129 additions and 22 deletions.
1 change: 1 addition & 0 deletions api/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (c *Controller) CreateFeed(w http.ResponseWriter, r *http.Request) {
feedInfo.CategoryID,
feedInfo.FeedURL,
feedInfo.Crawler,
feedInfo.UserAgent,
feedInfo.Username,
feedInfo.Password,
)
Expand Down
13 changes: 10 additions & 3 deletions api/payload.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@ type entriesResponse struct {
type feedCreation struct {
FeedURL string `json:"feed_url"`
CategoryID int64 `json:"category_id"`
UserAgent string `json:"user_agent"`
Username string `json:"username"`
Password string `json:"password"`
Crawler bool `json:"crawler"`
}

type subscriptionDiscovery struct {
URL string `json:"url"`
Username string `json:"username"`
Password string `json:"password"`
URL string `json:"url"`
UserAgent string `json:"user_agent"`
Username string `json:"username"`
Password string `json:"password"`
}

type feedModification struct {
Expand All @@ -44,6 +46,7 @@ type feedModification struct {
ScraperRules *string `json:"scraper_rules"`
RewriteRules *string `json:"rewrite_rules"`
Crawler *bool `json:"crawler"`
UserAgent *string `json:"user_agent"`
Username *string `json:"username"`
Password *string `json:"password"`
CategoryID *int64 `json:"category_id"`
Expand Down Expand Up @@ -74,6 +77,10 @@ func (f *feedModification) Update(feed *model.Feed) {
feed.Crawler = *f.Crawler
}

if f.UserAgent != nil {
feed.UserAgent = *f.UserAgent
}

if f.Username != nil {
feed.Username = *f.Username
}
Expand Down
1 change: 1 addition & 0 deletions api/subscription.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func (c *Controller) GetSubscriptions(w http.ResponseWriter, r *http.Request) {

subscriptions, err := subscription.FindSubscriptions(
subscriptionInfo.URL,
subscriptionInfo.UserAgent,
subscriptionInfo.Username,
subscriptionInfo.Password,
)
Expand Down
2 changes: 2 additions & 0 deletions client/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type Feed struct {
ScraperRules string `json:"scraper_rules"`
RewriteRules string `json:"rewrite_rules"`
Crawler bool `json:"crawler"`
UserAgent string `json:"user_agent"`
Username string `json:"username"`
Password string `json:"password"`
Category *Category `json:"category,omitempty"`
Expand All @@ -105,6 +106,7 @@ type FeedModification struct {
ScraperRules *string `json:"scraper_rules"`
RewriteRules *string `json:"rewrite_rules"`
Crawler *bool `json:"crawler"`
UserAgent *string `json:"user_agent"`
Username *string `json:"username"`
Password *string `json:"password"`
CategoryID *int64 `json:"category_id"`
Expand Down
2 changes: 1 addition & 1 deletion database/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"miniflux.app/logger"
)

const schemaVersion = 20
const schemaVersion = 21

// Migrate executes database migrations.
func Migrate(db *sql.DB) {
Expand Down
2 changes: 2 additions & 0 deletions database/sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions database/sql/schema_version_21.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
alter table feeds add column user_agent text default '';
16 changes: 14 additions & 2 deletions http/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ const (
)

var (
// DefaultUserAgent sets the User-Agent header used for any requests by miniflux.
DefaultUserAgent = "Mozilla/5.0 (compatible; Miniflux/" + version.Version + "; +https://miniflux.app)"

errInvalidCertificate = "Invalid SSL certificate (original error: %q)"
errTemporaryNetworkOperation = "This website is temporarily unreachable (original error: %q)"
errPermanentNetworkOperation = "This website is permanently unreachable (original error: %q)"
Expand All @@ -47,6 +50,7 @@ type Client struct {
authorizationHeader string
username string
password string
userAgent string
Insecure bool
}

Expand All @@ -72,6 +76,14 @@ func (c *Client) WithCacheHeaders(etagHeader, lastModifiedHeader string) *Client
return c
}

// WithUserAgent defines the User-Agent header to use for outgoing requests.
func (c *Client) WithUserAgent(userAgent string) *Client {
if userAgent != "" {
c.userAgent = userAgent
}
return c
}

// Get execute a GET HTTP request.
func (c *Client) Get() (*Response, error) {
request, err := c.buildRequest(http.MethodGet, nil)
Expand Down Expand Up @@ -212,7 +224,7 @@ func (c *Client) buildClient() http.Client {

func (c *Client) buildHeaders() http.Header {
headers := make(http.Header)
headers.Add("User-Agent", "Mozilla/5.0 (compatible; Miniflux/"+version.Version+"; +https://miniflux.app)")
headers.Add("User-Agent", c.userAgent)
headers.Add("Accept", "*/*")

if c.etagHeader != "" {
Expand All @@ -233,5 +245,5 @@ func (c *Client) buildHeaders() http.Header {

// New returns a new HTTP client.
func New(url string) *Client {
return &Client{url: url, Insecure: false}
return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false}
}
1 change: 1 addition & 0 deletions model/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type Feed struct {
ScraperRules string `json:"scraper_rules"`
RewriteRules string `json:"rewrite_rules"`
Crawler bool `json:"crawler"`
UserAgent string `json:"user_agent"`
Username string `json:"username"`
Password string `json:"password"`
Category *Category `json:"category,omitempty"`
Expand Down
6 changes: 5 additions & 1 deletion reader/feed/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type Handler struct {
}

// CreateFeed fetch, parse and store a new feed.
func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, username, password string) (*model.Feed, error) {
func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, userAgent, username, password string) (*model.Feed, error) {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url))

if !h.store.CategoryExists(userID, categoryID) {
Expand All @@ -46,6 +46,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,

clt := client.New(url)
clt.WithCredentials(username, password)
clt.WithUserAgent(userAgent)
response, err := clt.Get()
if err != nil {
if _, ok := err.(*errors.LocalizedError); ok {
Expand Down Expand Up @@ -87,6 +88,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
subscription.FeedURL = response.EffectiveURL
subscription.UserID = userID
subscription.Crawler = crawler
subscription.UserAgent = userAgent
subscription.Username = username
subscription.Password = password

Expand Down Expand Up @@ -136,6 +138,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
clt := client.New(originalFeed.FeedURL)
clt.WithCredentials(originalFeed.Username, originalFeed.Password)
clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
clt.WithUserAgent(originalFeed.UserAgent)
response, err := clt.Get()
if err != nil {
var customErr errors.LocalizedError
Expand Down Expand Up @@ -196,6 +199,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {

feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription)
feedProcessor.WithScraperRules(originalFeed.ScraperRules)
feedProcessor.WithUserAgent(originalFeed.UserAgent)
feedProcessor.WithRewriteRules(originalFeed.RewriteRules)
feedProcessor.WithCrawler(originalFeed.Crawler)
feedProcessor.Process()
Expand Down
8 changes: 7 additions & 1 deletion reader/processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type FeedProcessor struct {
scraperRules string
rewriteRules string
crawler bool
userAgent string
}

// WithCrawler enables the crawler.
Expand All @@ -33,6 +34,11 @@ func (f *FeedProcessor) WithScraperRules(rules string) {
f.scraperRules = rules
}

// WithUserAgent sets the User-Agent header for fetching article content.
func (f *FeedProcessor) WithUserAgent(userAgent string) {
f.userAgent = userAgent
}

// WithRewriteRules adds rewrite rules to the processing.
func (f *FeedProcessor) WithRewriteRules(rules string) {
f.rewriteRules = rules
Expand All @@ -45,7 +51,7 @@ func (f *FeedProcessor) Process() {
if f.store.EntryURLExists(f.userID, entry.URL) {
logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL)
} else {
content, err := scraper.Fetch(entry.URL, f.scraperRules)
content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent)
if err != nil {
logger.Error("[FeedProcessor] %v", err)
} else {
Expand Down
6 changes: 5 additions & 1 deletion reader/scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ import (
)

// Fetch downloads a web page a returns relevant contents.
func Fetch(websiteURL, rules string) (string, error) {
func Fetch(websiteURL, rules, userAgent string) (string, error) {
clt := client.New(websiteURL)
if userAgent != "" {
clt.WithUserAgent(userAgent)
}

response, err := clt.Get()
if err != nil {
return "", err
Expand Down
3 changes: 2 additions & 1 deletion reader/subscription/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ var (
)

// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
func FindSubscriptions(websiteURL, username, password string) (Subscriptions, error) {
func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL))

clt := client.New(websiteURL)
clt.WithCredentials(username, password)
clt.WithUserAgent(userAgent)
response, err := clt.Get()
if err != nil {
if _, ok := err.(errors.LocalizedError); ok {
Expand Down
3 changes: 2 additions & 1 deletion storage/entry_query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
e.id, e.user_id, e.feed_id, e.hash, e.published_at at time zone u.timezone, e.title,
e.url, e.comments_url, e.author, e.content, e.status, e.starred,
f.title as feed_title, f.feed_url, f.site_url, f.checked_at,
f.category_id, c.title as category_title, f.scraper_rules, f.rewrite_rules, f.crawler,
f.category_id, c.title as category_title, f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent,
fi.icon_id,
u.timezone
FROM entries e
Expand Down Expand Up @@ -247,6 +247,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
&entry.Feed.ScraperRules,
&entry.Feed.RewriteRules,
&entry.Feed.Crawler,
&entry.Feed.UserAgent,
&iconID,
&tz,
)
Expand Down
18 changes: 11 additions & 7 deletions storage/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) {
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
f.user_id, f.checked_at at time zone u.timezone,
f.parsing_error_count, f.parsing_error_msg,
f.scraper_rules, f.rewrite_rules, f.crawler,
f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent,
f.username, f.password,
f.category_id, c.title as category_title,
fi.icon_id,
Expand Down Expand Up @@ -104,6 +104,7 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) {
&feed.ScraperRules,
&feed.RewriteRules,
&feed.Crawler,
&feed.UserAgent,
&feed.Username,
&feed.Password,
&feed.Category.ID,
Expand Down Expand Up @@ -141,7 +142,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
f.user_id, f.checked_at at time zone u.timezone,
f.parsing_error_count, f.parsing_error_msg,
f.scraper_rules, f.rewrite_rules, f.crawler,
f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent,
f.username, f.password,
f.category_id, c.title as category_title,
fi.icon_id,
Expand All @@ -166,6 +167,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
&feed.ScraperRules,
&feed.RewriteRules,
&feed.Crawler,
&feed.UserAgent,
&feed.Username,
&feed.Password,
&feed.Category.ID,
Expand Down Expand Up @@ -194,8 +196,8 @@ func (s *Storage) CreateFeed(feed *model.Feed) error {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeed] feedURL=%s", feed.FeedURL))
sql := `
INSERT INTO feeds
(feed_url, site_url, title, category_id, user_id, etag_header, last_modified_header, crawler, username, password)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
(feed_url, site_url, title, category_id, user_id, etag_header, last_modified_header, crawler, user_agent, username, password)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id
`

Expand All @@ -209,6 +211,7 @@ func (s *Storage) CreateFeed(feed *model.Feed) error {
feed.EtagHeader,
feed.LastModifiedHeader,
feed.Crawler,
feed.UserAgent,
feed.Username,
feed.Password,
).Scan(&feed.ID)
Expand All @@ -234,9 +237,9 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {

query := `UPDATE feeds SET
feed_url=$1, site_url=$2, title=$3, category_id=$4, etag_header=$5, last_modified_header=$6, checked_at=$7,
parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10, rewrite_rules=$11, crawler=$12,
username=$13, password=$14
WHERE id=$15 AND user_id=$16`
parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10, rewrite_rules=$11, crawler=$12, user_agent=$13,
username=$14, password=$15
WHERE id=$16 AND user_id=$17`

_, err = s.db.Exec(query,
feed.FeedURL,
Expand All @@ -251,6 +254,7 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
feed.ScraperRules,
feed.RewriteRules,
feed.Crawler,
feed.UserAgent,
feed.Username,
feed.Password,
feed.ID,
Expand Down
3 changes: 3 additions & 0 deletions template/html/add_subscription.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ <h1>{{ t "New Subscription" }}</h1>

<label><input type="checkbox" name="crawler" value="1" {{ if .form.Crawler }}checked{{ end }}> {{ t "Fetch original content" }}</label>

<label for="form-user-agent">{{ t "User-Agent" }}</label>
<input type="text" name="user_agent" id="form-user-agent" placeholder="{{ .defaultUserAgent }}" value="{{ .form.UserAgent }}">

<label for="form-feed-username">{{ t "Feed Username" }}</label>
<input type="text" name="feed_username" id="form-feed-username" value="{{ .form.Username }}">

Expand Down
1 change: 1 addition & 0 deletions template/html/choose_subscription.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ <h1>{{ t "New Subscription" }}</h1>
<form action="{{ route "chooseSubscription" }}" method="POST">
<input type="hidden" name="csrf" value="{{ .csrf }}">
<input type="hidden" name="category_id" value="{{ .form.CategoryID }}">
<input type="hidden" name="user_agent" value="{{ .form.UserAgent }}">
<input type="hidden" name="feed_username" value="{{ .form.Username }}">
<input type="hidden" name="feed_password" value="{{ .form.Password }}">
{{ if .form.Crawler }}
Expand Down
3 changes: 3 additions & 0 deletions template/html/edit_feed.html
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ <h3>{{ t "Last Parsing Error" }}</h3>
-->
<input type="text" name="feed_password" id="form-feed-password" value="{{ .form.Password }}">

<label for="form-user-agent">{{ t "User-Agent" }}</label>
<input type="text" name="user_agent" id="form-user-agent" placeholder="{{ .defaultUserAgent }}" value="{{ .form.UserAgent }}">

<label for="form-scraper-rules">{{ t "Scraper Rules" }}</label>
<input type="text" name="scraper_rules" id="form-scraper-rules" value="{{ .form.ScraperRules }}">

Expand Down
Loading

0 comments on commit 2538eea

Please sign in to comment.