From 5faa36430cd77ae256a79cb52a7808d540da50da Mon Sep 17 00:00:00 2001 From: xuri Date: Sat, 12 Jun 2021 08:49:18 +0800 Subject: [PATCH] skip XML control character in the escape literal string, and update dependencies --- go.mod | 4 ++-- go.sum | 10 +++++----- lib.go | 31 +++++++++++++++++++++---------- lib_test.go | 2 ++ 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index 692eaa331f..78ae93c412 100644 --- a/go.mod +++ b/go.mod @@ -7,8 +7,8 @@ require ( github.com/richardlehane/mscfb v1.0.3 github.com/stretchr/testify v1.6.1 github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 - golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc + golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb - golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d + golang.org/x/net v0.0.0-20210610132358-84b48f89b13b golang.org/x/text v0.3.6 ) diff --git a/go.sum b/go.sum index a709fdfe20..309a85b514 100644 --- a/go.sum +++ b/go.sum @@ -13,15 +13,15 @@ github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 h1:EpI0bqf/eX9SdZDwlMmahKM+CDBgNbsXMhsN28XrM8o= github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= -golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc h1:+q90ECDSAQirdykUN6sPEiBXBsp8Csjcca8Oy7bgLTA= -golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a h1:kr2P4QFmQr29mSLA43kwrOcgcReGTfbE9N577tCTuBc= +golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb h1:fqpd0EBDzlHRCjiphRR5Zo/RSWWQlWv34418dnEixWk= golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d h1:BgJvlyh+UqCUaPlscHJ+PN8GcpfrFdr7NHjd1JL0+Gs= -golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= +golang.org/x/net v0.0.0-20210610132358-84b48f89b13b h1:k+E048sYJHyVnsr1GDrRZWQ32D2C7lWs9JRc0bel53A= +golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/lib.go b/lib.go index e221d17ca2..eb8ced6075 100644 --- a/lib.go +++ b/lib.go @@ -21,6 +21,7 @@ import ( "regexp" "strconv" "strings" + "unicode" ) // ReadZipReader can be used to read the spreadsheet in memory without touching the @@ -467,34 +468,44 @@ func isNumeric(s string) (bool, int) { // initial underscore shall itself be escaped (i.e. stored as _x005F_). For // example: The string literal _x0008_ would be stored as _x005F_x0008_. func bstrUnmarshal(s string) (result string) { - m := regexp.MustCompile(`_x[a-zA-Z0-9]{4}_`) + bstrExp := regexp.MustCompile(`_x[a-zA-Z0-9]{4}_`) escapeExp := regexp.MustCompile(`x[a-zA-Z0-9]{4}_`) - matches := m.FindAllStringSubmatchIndex(s, -1) - var cursor int + matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0 for _, match := range matches { result += s[cursor:match[0]] - if s[match[0]:match[1]] == "_x005F_" { - if len(s) > match[1]+6 && !escapeExp.MatchString(s[match[1]:match[1]+6]) { - result += s[match[0]:match[1]] + subStr := s[match[0]:match[1]] + if subStr == "_x005F_" { + if l > match[1]+6 && !escapeExp.MatchString(s[match[1]:match[1]+6]) { + result += subStr cursor = match[1] continue } - if len(s) > match[1]+5 && s[match[1]:match[1]+5] == "x005F" { + if l > match[1]+5 && s[match[1]:match[1]+5] == "x005F" { result += "_" cursor = match[1] continue } - if escapeExp.MatchString(s[match[0]:match[1]]) { + if escapeExp.MatchString(subStr) { result += "_" cursor = match[1] continue } } - if escapeExp.MatchString(s[match[0]:match[1]]) { + if bstrExp.MatchString(subStr) { + x, _ := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`) + hasRune := false + for _, c := range string(x) { + if unicode.IsControl(c) { + hasRune = true + } + } + if !hasRune { + result += string(x) + } cursor = match[1] } } - if cursor < len(s) { + if cursor < l { result += s[cursor:] } return result diff --git a/lib_test.go b/lib_test.go index bd28c7e411..58c6ed9fbd 100644 --- a/lib_test.go +++ b/lib_test.go @@ -237,10 +237,12 @@ func TestGenXMLNamespace(t *testing.T) { func TestBstrUnmarshal(t *testing.T) { bstrs := map[string]string{ + "*_x0000_": "*", "*": "*", "*_x0008_": "*", "_x0008_*": "*", "*_x0008_*": "**", + "*_x4F60__x597D_": "*你好", "*_x005F__x0008_*": "*_x005F_*", "*_x005F_x0001_*": "*_x0001_*", "*_x005F_x005F_x005F_x0006_*": "*_x005F_x0006_*",