Skip to content

Commit

Permalink
Merge pull request lichess-org#10555 from lichess-org/markdown-extens…
Browse files Browse the repository at this point in the history
…ions

robust markdown links and image whitelist
  • Loading branch information
ornicar authored Feb 25, 2022
2 parents a2d0482 + 3902c7f commit 00649eb
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 70 deletions.
50 changes: 0 additions & 50 deletions modules/common/src/main/Form.scala
Original file line number Diff line number Diff line change
Expand Up @@ -123,56 +123,6 @@ object Form {
}
}

object markdownImage {
private val allowedDomains =
List(
"imgur.com",
"giphy.com",
"wikimedia.org",
"creativecommons.org",
"pexels.com",
"piqsels.com",
"freeimages.com",
"unsplash.com",
"pixabay.com",
"githubusercontent.com",
"googleusercontent.com",
"i.ibb.co",
"i.postimg.cc",
"xkcd.com",
"lichess1.org"
)
private val imageDomainRegex = """^(?:https?://)([^/]+)/.{6,}""".r
sealed abstract private class Bad(val msg: String)
private case class BadUrl(url: String) extends Bad(s"Invalid image URL: $url")
private case class BadDomain(domain: String) extends Bad(s"Disallowed image domain: $domain")
val constraint = V.Constraint((s: String) =>
Markdown
.imageUrls(s)
.map {
case imageDomainRegex(domain) =>
!allowedDomains.exists(d => domain == d || domain.endsWith(s".$d")) option BadDomain(
domain
)
case url => BadUrl(url).some
}
.flatten match {
case Nil => V.Valid
case bads =>
V.Invalid(
bads.map(_.msg).map(V.ValidationError(_)) ::: {
bads.exists {
case _: BadDomain => true
case _ => false
} ?? List(
V.ValidationError(s"Allowed domains: ${allowedDomains mkString ", "}")
)
}
)
}
)
}

def stringIn(choices: Options[String]) =
text.verifying(mustBeOneOf(choices.map(_._1)), hasKey(choices, _))

Expand Down
131 changes: 117 additions & 14 deletions modules/common/src/main/Markdown.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
package lila.common

import lila.base.RawHtml
import com.vladsch.flexmark.ext.autolink.AutolinkExtension
import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension
import com.vladsch.flexmark.ext.tables.TablesExtension
import com.vladsch.flexmark.html.HtmlRenderer
import com.vladsch.flexmark.html.{
AttributeProvider,
HtmlRenderer,
HtmlWriter,
IndependentAttributeProviderFactory
}
import com.vladsch.flexmark.html.renderer.{
AttributablePart,
CoreNodeRenderer,
LinkResolverContext,
LinkType,
NodeRenderer,
NodeRendererContext,
NodeRendererFactory,
NodeRenderingHandler
}
import com.vladsch.flexmark.parser.Parser
import com.vladsch.flexmark.util.data.MutableDataSet
import com.vladsch.flexmark.util.ast.{ Node, TextCollectingVisitor }
import com.vladsch.flexmark.util.data.{ DataHolder, MutableDataHolder, MutableDataSet }
import com.vladsch.flexmark.util.html.MutableAttributes
import com.vladsch.flexmark.ast.{ Image, Link }
import io.mola.galimatias.URL
import scala.collection.JavaConverters
import java.util.Arrays
import scala.jdk.CollectionConverters._
import lila.base.RawHtml
import scala.util.Try

final class Markdown(
autoLink: Boolean = true,
Expand All @@ -28,6 +49,8 @@ final class Markdown(
if (table) extensions.add(TablesExtension.create())
if (strikeThrough) extensions.add(StrikethroughExtension.create())
if (autoLink) extensions.add(AutolinkExtension.create())
extensions.add(Markdown.NofollowExtension)
extensions.add(Markdown.WhitelistedImageExtension)

private val options = new MutableDataSet()
.set(Parser.EXTENSIONS, extensions)
Expand All @@ -51,12 +74,6 @@ final class Markdown(

private val logger = lila.log("markdown")

// quick and dirty.
// there should be a clean way to do it:
// https://programming.vip/docs/flexmark-java-markdown-add-target-attribute-to-link.html
private def addLinkAttributes(markup: Html) =
markup.replace("<a href=", """<a rel="nofollow noopener noreferrer" href=""")

private def mentionsToLinks(markdown: Text): Text =
RawHtml.atUsernameRegex.replaceAllIn(markdown, "[@$1](/@/$1)")

Expand All @@ -68,7 +85,7 @@ final class Markdown(
Chronometer
.sync {
try {
addLinkAttributes(renderer.render(parser.parse(mentionsToLinks(preventStackOverflow(text)))))
renderer.render(parser.parse(mentionsToLinks(preventStackOverflow(text))))
} catch {
case e: StackOverflowError =>
logger.branch(key).error("StackOverflowError", e)
Expand All @@ -82,8 +99,94 @@ final class Markdown(

object Markdown {

private val imageRegex = """!\[[^\]]*\]\((.*?)\s*("(?:.*[^"])")?\s*\)""".r

def imageUrls(markdown: String): List[String] =
imageRegex.findAllIn(markdown).matchData.map(_ group 1).toList
private val rel = "nofollow noopener noreferrer"

private object WhitelistedImageExtension extends HtmlRenderer.HtmlRendererExtension {
override def rendererOptions(options: MutableDataHolder) = ()
override def extend(htmlRendererBuilder: HtmlRenderer.Builder, rendererType: String) =
htmlRendererBuilder
.nodeRendererFactory(new NodeRendererFactory {
override def apply(options: DataHolder) = WhitelistedImageNodeRenderer
})
.unit
}
private object WhitelistedImageNodeRenderer extends NodeRenderer {
override def getNodeRenderingHandlers() =
new java.util.HashSet(
Arrays.asList(
new NodeRenderingHandler(classOf[Image], render)
)
)

private val whitelist =
List(
"imgur.com",
"giphy.com",
"wikimedia.org",
"creativecommons.org",
"pexels.com",
"piqsels.com",
"freeimages.com",
"unsplash.com",
"pixabay.com",
"githubusercontent.com",
"googleusercontent.com",
"i.ibb.co",
"i.postimg.cc",
"xkcd.com",
"lichess1.org"
)
private def whitelistedSrc(src: String): Option[String] =
for {
url <- Try(URL.parse(src)).toOption
if url.scheme == "http" || url.scheme == "https"
host <- Option(url.host).map(_.toString)
if whitelist.exists(h => host == h || host.endsWith(s".$h"))
} yield url.toString

private def render(node: Image, context: NodeRendererContext, html: HtmlWriter): Unit =
// Based on implementation in CoreNodeRenderer.
if (context.isDoNotRenderLinks || CoreNodeRenderer.isSuppressedLinkPrefix(node.getUrl(), context))
context.renderChildren(node)
else {
val resolvedLink = context.resolveLink(LinkType.IMAGE, node.getUrl().unescape(), null, null)
val url = resolvedLink.getUrl()
val altText = new TextCollectingVisitor().collectAndGetText(node)
whitelistedSrc(url) match {
case Some(src) =>
html
.srcPos(node.getChars())
.attr("src", src)
.attr("alt", altText)
.attr(resolvedLink.getNonNullAttributes())
.withAttr(resolvedLink)
.tagVoid("img")
case None =>
html
.srcPos(node.getChars())
.attr("href", url)
.attr("rel", rel)
.withAttr(resolvedLink)
.tag("a")
.text(altText)
.tag("/a")
}
}.unit
}

private object NofollowExtension extends HtmlRenderer.HtmlRendererExtension {
override def rendererOptions(options: MutableDataHolder) = ()
override def extend(htmlRendererBuilder: HtmlRenderer.Builder, rendererType: String) =
htmlRendererBuilder
.attributeProviderFactory(new IndependentAttributeProviderFactory {
override def apply(context: LinkResolverContext): AttributeProvider = NofollowAttributeProvider
})
.unit
}
private object NofollowAttributeProvider extends AttributeProvider {
override def setAttributes(node: Node, part: AttributablePart, attributes: MutableAttributes) = {
if (node.isInstanceOf[Link] && part == AttributablePart.LINK)
attributes.replaceValue("rel", rel).unit
}
}
}
8 changes: 4 additions & 4 deletions modules/team/src/main/TeamForm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import play.api.data._
import play.api.data.Forms._
import scala.concurrent.duration._

import lila.common.Form.{ cleanNonEmptyText, cleanText, markdownImage, mustNotContainLichess, numberIn }
import lila.common.Form.{ cleanNonEmptyText, cleanText, mustNotContainLichess, numberIn }
import lila.db.dsl._

final private[team] class TeamForm(
Expand All @@ -15,7 +15,7 @@ final private[team] class TeamForm(
extends lila.hub.CaptchedForm {

private object Fields {
val name = "name" -> cleanText(minLength = 3, maxLength = 60).verifying(mustNotContainLichess(false))
val name = "name" -> cleanText(minLength = 3, maxLength = 60).verifying(mustNotContainLichess(false))
val password = "password" -> optional(cleanText(maxLength = 60))
def passwordCheck(team: Team) = "password" -> optional(text).verifying(
"team:incorrectEntryCode",
Expand All @@ -25,9 +25,9 @@ final private[team] class TeamForm(
"message" -> optional(cleanText(minLength = 30, maxLength = 2000))
.verifying("Request message required", msg => msg.isDefined || team.open)
val description =
"description" -> cleanText(minLength = 30, maxLength = 4000).verifying(markdownImage.constraint)
"description" -> cleanText(minLength = 30, maxLength = 4000)
val descPrivate =
"descPrivate" -> optional(cleanNonEmptyText(maxLength = 4000).verifying(markdownImage.constraint))
"descPrivate" -> optional(cleanNonEmptyText(maxLength = 4000))
val request = "request" -> boolean
val gameId = "gameId" -> text
val move = "move" -> text
Expand Down
4 changes: 2 additions & 2 deletions modules/ublog/src/main/UblogForm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import org.joda.time.DateTime
import play.api.data._
import play.api.data.Forms._

import lila.common.Form.{ cleanNonEmptyText, cleanText, markdownImage, stringIn }
import lila.common.Form.{ cleanNonEmptyText, cleanText, stringIn }
import lila.i18n.{ defaultLang, LangList }
import lila.user.User
import play.api.i18n.Lang
Expand All @@ -19,7 +19,7 @@ final class UblogForm(markup: UblogMarkup, val captcher: lila.hub.actors.Captche
mapping(
"title" -> cleanNonEmptyText(minLength = 3, maxLength = 80),
"intro" -> cleanNonEmptyText(minLength = 0, maxLength = 1_000),
"markdown" -> cleanNonEmptyText(minLength = 0, maxLength = 100_000).verifying(markdownImage.constraint),
"markdown" -> cleanNonEmptyText(minLength = 0, maxLength = 100_000),
"imageAlt" -> optional(cleanNonEmptyText(minLength = 3, maxLength = 200)),
"imageCredit" -> optional(cleanNonEmptyText(minLength = 3, maxLength = 200)),
"language" -> optional(stringIn(LangList.popularNoRegion.map(_.code).toSet)),
Expand Down

0 comments on commit 00649eb

Please sign in to comment.