forked from dominikh/go-id3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencoding.go
149 lines (127 loc) · 2.5 KB
/
encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
package id3
import (
"fmt"
utf16pkg "unicode/utf16"
)
const (
iso88591 Encoding = iota
utf16bom
utf16be
utf8
)
var (
utf16nul = []byte{0, 0}
nul = []byte{0}
utf8byte = []byte{byte(utf8)}
)
type Encoding byte
func (e Encoding) String() string {
switch e {
case iso88591:
return "ISO-8859-1"
case utf16bom:
return "UTF-16"
case utf16be:
return "UTF-16BE"
case utf8:
return "UTF-8"
default:
return fmt.Sprintf("Unknown encoding %d", byte(e))
}
}
func (e Encoding) toUTF8(b []byte) []byte {
var ret []byte
switch e {
case utf16bom, utf16be:
ret = utf16ToUTF8(b)
case utf8:
ret = make([]byte, len(b))
copy(ret, b)
case iso88591:
ret = iso88591ToUTF8(b)
default:
panic("unsupported")
}
if len(ret) > 0 && ret[len(ret)-1] == 0 {
return ret[:len(ret)-1]
}
return ret
}
func (e Encoding) toISO88591(b []byte) []byte {
if e != utf8 {
panic("Conversion to ISO-8859-1 is only implemented for UTF-8")
}
return utf8ToISO88591(b)
}
func (e Encoding) terminator() []byte {
switch e {
case utf16bom, utf16be:
return utf16nul
default:
return nul
}
}
func utf16ToUTF8(input []byte) []byte {
// ID3v2 allows UTF-16 in two ways: With a BOM or as Big Endian.
// So if we have no Little Endian BOM, it has to be Big Endian
// either way.
bigEndian := true
if input[0] == 0xFF && input[1] == 0xFE {
bigEndian = false
input = input[2:]
} else if input[0] == 0xFE && input[1] == 0xFF {
input = input[2:]
}
uint16s := make([]uint16, len(input)/2)
i := 0
for j := 0; j < len(input); j += 2 {
if bigEndian {
uint16s[i] = uint16(input[j])<<8 | uint16(input[j+1])
} else {
uint16s[i] = uint16(input[j]) | uint16(input[j+1])<<8
}
i++
}
return []byte(string(utf16pkg.Decode(uint16s)))
}
func utf8ToISO88591(input []byte) []byte {
res := make([]byte, len(input))
i := 0
for j := 0; j < len(input); j++ {
if input[j] <= 128 {
res[i] = input[j]
} else {
if input[j] == 195 {
res[i] = input[j+1] + 64
} else {
res[i] = input[j+1]
}
j++
}
i++
}
return res[:i]
}
func iso88591ToUTF8(input []byte) []byte {
// - ISO-8859-1 bytes match Unicode code points
// - All runes <128 correspond to ASCII, same as in UTF-8
// - All runes >128 in ISO-8859-1 encode as 2 bytes in UTF-8
res := make([]byte, len(input)*2)
var j int
for _, b := range input {
if b <= 128 {
res[j] = b
j++
} else {
if b >= 192 {
res[j] = 195
res[j+1] = b - 64
} else {
res[j] = 194
res[j+1] = b
}
j += 2
}
}
return res[:j]
}