Skip to content

Commit

Permalink
Merge pull request jgm#6509 from lierdakil/docx-smush-inlines-refactor
Browse files Browse the repository at this point in the history
[Docx Reader] Refactor/update Text.Pandoc.Readers.Docx.Combine.smushInlines
  • Loading branch information
jgm authored Jul 12, 2020
2 parents 20aea4c + f09e187 commit 7be86b1
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 63 deletions.
101 changes: 39 additions & 62 deletions src/Text/Pandoc/Readers/Docx/Combine.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2020 Jesse Rosenthal <[email protected]>,
2014-2020 John MacFarlane <[email protected]>
2014-2020 John MacFarlane <[email protected]>,
2020 Nikolay Yakimov <[email protected]>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <[email protected]>
Expand Down Expand Up @@ -59,79 +58,61 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines
where

import Data.List
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>))
import qualified Data.Sequence as Seq (null)
import Data.Bifunctor
import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
, (><), (|>) )
import Text.Pandoc.Builder

data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
| NullModifier

spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
where (l, m, r) = spaceOutInlines ms
(fs, m') = unstackInlines m
where (l, (fs, m'), r) = spaceOutInlines ms

spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
where (l, m, r) = spaceOutInlines ms
(fs, m') = unstackInlines m
where (l, (fs, m'), r) = spaceOutInlines ms

spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines)
spaceOutInlines ils =
let (fs, ils') = unstackInlines ils
contents = unMany ils'
left = case viewl contents of
(Space :< _) -> space
_ -> mempty
right = case viewr contents of
(_ :> Space) -> space
_ -> mempty in
(left, stackInlines fs $ trimInlines . Many $ contents, right)
(left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils'
-- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element
in (Many left, (fs, Many contents'), Many right)

isSpace :: Inline -> Bool
isSpace Space = True
isSpace SoftBreak = True
isSpace _ = False

stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
stackInlines [] ms = ms
stackInlines (NullModifier : fs) ms = stackInlines fs ms
stackInlines (Modifier f : fs) ms =
if isEmpty ms
if null ms
then stackInlines fs ms
else f $ stackInlines fs ms
stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms

unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
unstackInlines ms = case ilModifier ms of
NullModifier -> ([], ms)
_ -> (f : fs, ms') where
f = ilModifier ms
(fs, ms') = unstackInlines $ ilInnards ms

ilModifier :: Inlines -> Modifier Inlines
ilModifier ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph _) -> Modifier emph
(Strong _) -> Modifier strong
(SmallCaps _) -> Modifier smallcaps
(Strikeout _) -> Modifier strikeout
(Superscript _) -> Modifier superscript
(Subscript _) -> Modifier subscript
(Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
(Span attr _) -> AttrModifier spanWith attr
_ -> NullModifier
_ -> NullModifier

ilInnards :: Inlines -> Inlines
ilInnards ils = case viewl (unMany ils) of
(x :< xs) | Seq.null xs -> case x of
(Emph lst) -> fromList lst
(Strong lst) -> fromList lst
(SmallCaps lst) -> fromList lst
(Strikeout lst) -> fromList lst
(Superscript lst) -> fromList lst
(Subscript lst) -> fromList lst
(Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
_ -> ils
_ -> ils
unstackInlines ms = case ilModifierAndInnards ms of
Nothing -> ([], ms)
Just (f, inner) -> first (f :) $ unstackInlines inner

ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines)
ilModifierAndInnards ils = case viewl $ unMany ils of
x :< xs | null xs -> second fromList <$> case x of
Emph lst -> Just (Modifier emph, lst)
Strong lst -> Just (Modifier strong, lst)
SmallCaps lst -> Just (Modifier smallcaps, lst)
Strikeout lst -> Just (Modifier strikeout, lst)
Underline lst -> Just (Modifier underline, lst)
Superscript lst -> Just (Modifier superscript, lst)
Subscript lst -> Just (Modifier subscript, lst)
Link attr lst tgt -> Just (Modifier $ linkWith attr (fst tgt) (snd tgt), lst)
Span attr lst -> Just (AttrModifier spanWith attr, lst)
_ -> Nothing
_ -> Nothing

inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of
Expand Down Expand Up @@ -161,12 +142,12 @@ combineSingletonInlines x y =
y_rem_attr = filter isAttrModifier y_remaining
in
case null shared of
True | isEmpty xs && isEmpty ys ->
stackInlines (x_rem_attr ++ y_rem_attr) mempty
| isEmpty xs ->
True | null xs && null ys ->
stackInlines (x_rem_attr <> y_rem_attr) mempty
| null xs ->
let (sp, y') = spaceOutInlinesL y in
stackInlines x_rem_attr mempty <> sp <> y'
| isEmpty ys ->
| null ys ->
let (x', sp) = spaceOutInlinesR x in
x' <> sp <> stackInlines y_rem_attr mempty
| otherwise ->
Expand All @@ -193,12 +174,8 @@ combineBlocks bs cs = bs <> cs
instance (Monoid a, Eq a) => Eq (Modifier a) where
(Modifier f) == (Modifier g) = f mempty == g mempty
(AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty
NullModifier == NullModifier = True
_ == _ = False

isEmpty :: (Monoid a, Eq a) => a -> Bool
isEmpty x = x == mempty

isAttrModifier :: Modifier a -> Bool
isAttrModifier (AttrModifier _ _) = True
isAttrModifier _ = False
Expand Down
Binary file modified test/docx/golden/inline_formatting.docx
Binary file not shown.
2 changes: 1 addition & 1 deletion test/docx/inline_formatting.native
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pandoc (Meta {unMeta = fromList []})
[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space],Emph [Underline [Str "emphasis"]],Str "."]
,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]

0 comments on commit 7be86b1

Please sign in to comment.