forked from instructure/canvas-lms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
imported_html_converter.rb
81 lines (68 loc) · 2.3 KB
/
imported_html_converter.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#
# Copyright (C) 2011 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require 'nokogiri'
class ImportedHtmlConverter
include TextHelper
include HtmlTextHelper
CONTAINER_TYPES = ['div', 'p', 'body']
LINK_ATTRS = ['rel', 'href', 'src', 'data', 'value', 'longdesc']
attr_reader :link_parser, :link_resolver, :link_replacer
def initialize(migration)
@migration = migration
@link_parser = Importers::LinkParser.new(migration)
@link_resolver = Importers::LinkResolver.new(migration)
@link_replacer = Importers::LinkReplacer.new(migration)
end
def convert(html, item_type, mig_id, field, opts={})
mig_id = mig_id.to_s
doc = Nokogiri::HTML(html || "")
doc.search("*").each do |node|
LINK_ATTRS.each do |attr|
@link_parser.convert_link(node, attr, item_type, mig_id, field)
end
end
node = doc.at_css('body')
return "" unless node
if opts[:remove_outer_nodes_if_one_child]
while node.children.size == 1 && node.child.child
break unless CONTAINER_TYPES.member?(node.child.name) && node.child.attributes.blank?
node = node.child
end
end
node.inner_html
rescue Nokogiri::SyntaxError
""
end
def convert_text(text)
format_message(text || "")[0]
end
def resolve_content_links!
link_map = @link_parser.unresolved_link_map
return unless link_map.present?
@link_resolver.resolve_links!(link_map)
@link_replacer.replace_placeholders!(link_map)
@link_parser.reset!
end
def self.relative_url?(url)
URI.parse(url).relative? && !url.to_s.start_with?("//")
rescue URI::Error
# leave the url as it was
Rails.logger.warn "attempting to translate invalid url: #{url}"
false
end
end