Skip to content

Commit

Permalink
Detect animated images in articles asynchronously (forem#13766)
Browse files Browse the repository at this point in the history
* Add inline animated image detection

* Do not detect animation during preview

* Promote FastImage as a dependency

* Add Articles::DetectAnimatedImages service

* Restore previous parsing

* Add and use Articles::DetectAnimatedImagesWorker

* Remove obsolete poc

* Properly detect images in the after commit callback

* Simplify logic

* Use a second guard clause

* Fix parsing of relative paths and add tests

* Have Articles::DetectAnimatedImages correctly detect relative images uploaded locally

* Change Articles::DetectAnimatedImagesWorker priority to medium

* Only one & required
  • Loading branch information
rhymes authored Jun 1, 2021
1 parent c35f1c5 commit e2e9235
Show file tree
Hide file tree
Showing 12 changed files with 6,849 additions and 1 deletion.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ gem "dogstatsd-ruby", "~> 4.8" # A client for DogStatsD, an extension of the Sta
gem "doorkeeper", "~> 5.5" # Oauth 2 provider
gem "email_validator", "~> 2.2" # Email validator for Rails and ActiveModel
gem "emoji_regex", "~> 3.2" # A pair of Ruby regular expressions for matching Unicode Emoji symbols
gem "fastimage", "~> 2.2" # FastImage finds the size or type of an image given its uri by fetching as little as needed.
gem "fastly", "~> 3.0" # Client library for the Fastly acceleration system
gem "feedjira", "~> 3.1" # A feed fetching and parsing library
gem "field_test", "~> 0.4" # A/B testing
Expand Down
1 change: 1 addition & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ DEPENDENCIES
factory_bot_rails (~> 6.2)
faker (~> 2.18)
fakeredis (~> 0.8.0)
fastimage (~> 2.2)
fastly (~> 3.0)
feedjira (~> 3.1)
field_test (~> 0.4)
Expand Down
8 changes: 7 additions & 1 deletion app/models/article.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class Article < ApplicationRecord
article.notifications.any? && !article.saved_changes.empty?
}

after_commit :async_score_calc, :touch_collection, on: %i[create update]
after_commit :async_score_calc, :touch_collection, :detect_animated_images, on: %i[create update]

# The trigger `update_reading_list_document` is used to keep the `articles.reading_list_document` column updated.
#
Expand Down Expand Up @@ -817,4 +817,10 @@ def touch_collection
def notify_slack_channel_about_publication
Slack::Messengers::ArticlePublished.call(article: self)
end

def detect_animated_images
return unless saved_change_to_attribute?(:processed_html)

::Articles::DetectAnimatedImagesWorker.perform_async(id)
end
end
56 changes: 56 additions & 0 deletions app/services/articles/detect_animated_images.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
module Articles
module DetectAnimatedImages
IMAGES_IN_LIQUID_TAGS_SELECTORS = [
".liquid-comment img", # CommentTag
".ltag-github-readme-tag img", # GithubReadmeTag
".ltag__link__pic img", # LinkTag and MediumTag profile pic
".ltag__link__servicename img", # MediumTag
".ltag__link__taglist img", # LinkTag
".ltag__reddit--container img", # RedditTag
".ltag__stackexchange--container img", # StackexchangeTag
".ltag__twitter-tweet img", # TweetTag
".ltag__user img", # UserTag and OrganizationTag
".ltag__user-subscription-tag img", # UserSubscriptionTag
".ltag_github-liquid-tag img", # GitHubIssueTag
".podcastliquidtag img", # PodcastTag
].join(", ").freeze

def self.call(article)
parsed_html = Nokogiri::HTML.fragment(article.processed_html)

# we ignore images contained in liquid tags as they are not animated
images = parsed_html.css("img") - parsed_html.css(IMAGES_IN_LIQUID_TAGS_SELECTORS)

found = false
images.each do |img|
src = img.attr("src")
next unless src

image = if URI.parse(src).relative?
retrieve_image_from_uploader_store(src)
else
src
end

next if image.blank?
next unless FastImage.animated?(image)

img["data-animated"] = true
found = true
end

article.update_columns(processed_html: parsed_html.to_html) if found
end

def self.retrieve_image_from_uploader_store(src)
filename = File.basename(src)
uploader = ArticleImageUploader.new
uploader.retrieve_from_store!(filename)

return unless uploader.file.exists?

uploader.file&.file
end
private_class_method :retrieve_image_from_uploader_store
end
end
17 changes: 17 additions & 0 deletions app/workers/articles/detect_animated_images_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module Articles
class DetectAnimatedImagesWorker
include Sidekiq::Worker

sidekiq_options queue: :medium_priority, retry: 5, lock: :until_executing

def perform(article_id)
article = Article.find_by(id: article_id)
return unless article

detected = Articles::DetectAnimatedImages.call(article)
return unless detected

EdgeCache::BustArticle.call(article)
end
end
end
24 changes: 24 additions & 0 deletions spec/models/article_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,16 @@ def build_and_validate_article(*args)
end
end

context "when callbacks are triggered after create" do
describe "detect animated images" do
it "enqueues Articles::DetectAnimatedImagesWorker" do
sidekiq_assert_enqueued_with(job: Articles::DetectAnimatedImagesWorker, args: [article.id]) do
build(:article).save
end
end
end
end

context "when callbacks are triggered after save" do
describe "article path sanitizing" do
it "returns a downcased username when user has uppercase characters" do
Expand Down Expand Up @@ -1092,6 +1102,20 @@ def build_and_validate_article(*args)
end
end
end

describe "detect animated images" do
it "enqueues Articles::DetectAnimatedImagesWorker if the HTML has changed" do
sidekiq_assert_enqueued_with(job: Articles::DetectAnimatedImagesWorker, args: [article.id]) do
article.update(body_markdown: "a body")
end
end

it "does not Articles::DetectAnimatedImagesWorker if the HTML does not change" do
sidekiq_assert_no_enqueued_jobs(only: Articles::DetectAnimatedImagesWorker) do
article.update(tag_list: %w[fsharp go])
end
end
end
end

context "when triggers are invoked" do
Expand Down
193 changes: 193 additions & 0 deletions spec/services/articles/detect_animated_images_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
require "rails_helper"

RSpec.describe Articles::DetectAnimatedImages, type: :service do
let(:article) { create(:article) }

def assert_unchanged(article)
previous_html = article.processed_html
described_class.call(article)
expect(article.reload.processed_html).to eq(previous_html)
end

def assert_has_data_animated_attribute(article, count = 1)
described_class.call(article)

parsed_html = Nokogiri::HTML.fragment(article.processed_html)
expect(parsed_html.css("img[data-animated]").count).to eq(count)
end

context "when the body has no images" do
it "does not alter the processed HTML" do
assert_unchanged(article)
end
end

context "when the body renders a liquid tag with images" do
it "does not alter the processed HTML using CommentTag" do
comment = create(:comment)
article.update(body_markdown: "{% comment #{comment.id_code} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using Github::GitHubIssueTag",
vcr: { cassette_name: "github_client_issue" } do
article.update(body_markdown: "{% github https://github.com/thepracticaldev/dev.to/issues/7434 %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using Github::GithubReadmeTag",
vcr: { cassette_name: "github_client_repository" } do
article.update(body_markdown: "{% github https://github.com/rust-lang/rust %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using MediumTag", vcr: { cassette_name: "medium" } do
url = "https://medium.com/@edisonywh/my-ruby-journey-hooking-things-up-91d757e1c59c"
article.update(body_markdown: "{% medium #{url} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using LinkTag" do
article.update(body_markdown: "{% link #{article.path} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using OrganizationTag" do
organization = create(:organization)
article.update(body_markdown: "{% organization #{organization.slug} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using PodcastTag" do
podcast_episode = create(:podcast_episode)
article.update(body_markdown: "{% podcast #{podcast_episode.path} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using RedditTag", vcr: { cassette_name: "reddit_liquid_tag" } do
url = "https://www.reddit.com/r/IAmA/comments/afvl2w/im_scott_from_scotts_cheap_flights_my_profession"
article.update(body_markdown: "{% reddit #{url} %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using StackexchangeTag",
vcr: { cassette_name: "stackexchange_tag_stackoverflow" } do
article.update(body_markdown: "{% stackoverflow 57496168 %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using TweetTag",
vcr: { cassette_name: "twitter_client_status_extended" } do
article.update(body_markdown: "{% twitter 1018911886862057472 %}")

assert_unchanged(article)
end

it "does not alter the processed HTML using UserSubscriptionTag" do
article = create(:article, :with_user_subscription_tag_role_user, with_user_subscription_tag: true)

assert_unchanged(article)
end

it "does not alter the processed HTML using UserTag" do
article.update(body_markdown: "{% user #{article.user.username} %}")

assert_unchanged(article)
end
end

context "when the body contains uploaded images" do
let(:uploader) { ArticleImageUploader.new }
let(:static_image) do
Rack::Test::UploadedFile.new(
Rails.root.join("spec/support/fixtures/images/image1.jpeg"),
"image/jpeg",
)
end
let(:animated_image) do
Rack::Test::UploadedFile.new(
Rails.root.join("spec/support/fixtures/images/image.gif"),
"image/gif",
)
end

it "does not set data-animated to true with a static image" do
uploader.store!(static_image)

article.update(body_markdown: "![image](#{uploader.url})")

assert_unchanged(article)
end

it "sets data-animated to true with an animated image" do
uploader.store!(animated_image)

article.update(body_markdown: "![image](#{uploader.url})")

assert_has_data_animated_attribute(article)
end

it "works with multiple animated images" do
urls = Array.new(2) do
uploader.store!(animated_image)
uploader.url
end

article.update(body_markdown: "![image](#{urls.first}) ![image](#{urls.second})")

assert_has_data_animated_attribute(article, urls.count)
end

it "works with static images mixed with animated images" do
urls = []

uploader.store!(static_image)
urls << uploader.url

uploader.store!(animated_image)
urls << uploader.url

article.update(body_markdown: "![image](#{urls.first}) ![image](#{urls.second})")

assert_has_data_animated_attribute(article, 1)
end
end

context "when the body contains remote images" do
let(:static_image_url) { "https://dummyimage.com/600.jpg" }
let(:animated_image_url) { "https://i.giphy.com/media/kHTMgZ3PeK6wJsqy2s/source.gif" }

it "does not set data-animated to true with a static image" do
article.update(body_markdown: "![image](#{static_image_url})")

assert_unchanged(article)
end

it "sets data-animated to true with an animated image", vcr: { cassette_name: "download_animated_image" } do
article.update(body_markdown: "![image](#{animated_image_url})")

assert_has_data_animated_attribute(article)
end

it "works with multiple animated images", vcr: { cassette_name: "download_animated_images_twice" } do
article.update(body_markdown: "![image](#{animated_image_url}) ![image](#{animated_image_url})")

assert_has_data_animated_attribute(article, 2)
end

it "works with static images mixed with animated images", vcr: { cassette_name: "download_animated_image" } do
article.update(body_markdown: "![image](#{static_image_url}) ![image](#{animated_image_url})")

assert_has_data_animated_attribute(article, 1)
end
end
end
Binary file added spec/support/fixtures/images/image.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit e2e9235

Please sign in to comment.