forked from pocketbase/pocketbase
-
Notifications
You must be signed in to change notification settings - Fork 0
/
excerpt_modifier.go
140 lines (110 loc) · 3.37 KB
/
excerpt_modifier.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package rest
import (
"errors"
"regexp"
"strings"
"github.com/pocketbase/pocketbase/tools/list"
"github.com/spf13/cast"
"golang.org/x/net/html"
)
var whitespaceRegex = regexp.MustCompile(`\s+`)
var excludeTags = []string{
"head", "style", "script", "iframe", "embed", "applet", "object",
"svg", "img", "picture", "dialog", "template", "button", "form",
"textarea", "input", "select", "option",
}
var inlineTags = []string{
"a", "abbr", "acronym", "b", "bdo", "big", "br", "button",
"cite", "code", "em", "i", "label", "q", "small", "span",
"strong", "strike", "sub", "sup", "time",
}
var _ FieldModifier = (*excerptModifier)(nil)
type excerptModifier struct {
max int // approximate max excerpt length
withEllipsis bool // if enabled will add ellipsis when the plain text length > max
}
// newExcerptModifier validates the specified raw string arguments and
// initializes a new excerptModifier.
//
// This method is usually invoked in initModifer().
func newExcerptModifier(args ...string) (*excerptModifier, error) {
totalArgs := len(args)
if totalArgs == 0 {
return nil, errors.New("max argument is required - expected (max, withEllipsis?)")
}
if totalArgs > 2 {
return nil, errors.New("too many arguments - expected (max, withEllipsis?)")
}
max := cast.ToInt(args[0])
if max == 0 {
return nil, errors.New("max argument must be > 0")
}
var withEllipsis bool
if totalArgs > 1 {
withEllipsis = cast.ToBool(args[1])
}
return &excerptModifier{max, withEllipsis}, nil
}
// Modify implements the [FieldModifier.Modify] interface method.
//
// It returns a plain text excerpt/short-description from a formatted
// html string (non-string values are kept untouched).
func (m *excerptModifier) Modify(value any) (any, error) {
strValue, ok := value.(string)
if !ok {
// not a string -> return as it is without applying the modifier
// (we don't throw an error because the modifier could be applied for a missing expand field)
return value, nil
}
var builder strings.Builder
doc, err := html.Parse(strings.NewReader(strValue))
if err != nil {
return "", err
}
var hasPrevSpace bool
// for all node types and more details check
// https://pkg.go.dev/golang.org/x/net/html#Parse
var stripTags func(*html.Node)
stripTags = func(n *html.Node) {
switch n.Type {
case html.TextNode:
// collapse multiple spaces into one
txt := whitespaceRegex.ReplaceAllString(n.Data, " ")
if hasPrevSpace {
txt = strings.TrimLeft(txt, " ")
}
if txt != "" {
hasPrevSpace = strings.HasSuffix(txt, " ")
builder.WriteString(txt)
}
}
// excerpt max has been reached => no need to further iterate
// (+2 for the extra whitespace suffix/prefix that will be trimmed later)
if builder.Len() > m.max+2 {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) {
isBlock := c.Type == html.ElementNode && !list.ExistInSlice(c.Data, inlineTags)
if isBlock && !hasPrevSpace {
builder.WriteString(" ")
hasPrevSpace = true
}
stripTags(c)
if isBlock && !hasPrevSpace {
builder.WriteString(" ")
hasPrevSpace = true
}
}
}
}
stripTags(doc)
result := strings.TrimSpace(builder.String())
if len(result) > m.max {
result = strings.TrimSpace(result[:m.max])
if m.withEllipsis {
result += "..."
}
}
return result, nil
}