|
1 | 1 | from __future__ import absolute_import, division, print_function
|
2 | 2 |
|
3 | 3 | from . import swigwrapper # pylint: disable=import-self
|
4 |
| -from .swigwrapper import UTF8Alphabet |
5 | 4 |
|
6 | 5 | # This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle
|
7 | 6 | # string encoding explicitly, here and throughout this file.
|
@@ -89,6 +88,56 @@ def Decode(self, input):
|
89 | 88 | return res.decode('utf-8')
|
90 | 89 |
|
91 | 90 |
|
| 91 | +class UTF8Alphabet(swigwrapper.UTF8Alphabet): |
| 92 | + """Convenience wrapper for Alphabet which calls init in the constructor""" |
| 93 | + def __init__(self): |
| 94 | + super(UTF8Alphabet, self).__init__() |
| 95 | + err = self.init(b'') |
| 96 | + if err != 0: |
| 97 | + raise ValueError('UTF8Alphabet initialization failed with error code 0x{:X}'.format(err)) |
| 98 | + |
| 99 | + def CanEncodeSingle(self, input): |
| 100 | + ''' |
| 101 | + Returns true if the single character/output class has a corresponding label |
| 102 | + in the alphabet. |
| 103 | + ''' |
| 104 | + return super(UTF8Alphabet, self).CanEncodeSingle(input.encode('utf-8')) |
| 105 | + |
| 106 | + def CanEncode(self, input): |
| 107 | + ''' |
| 108 | + Returns true if the entire string can be encoded into labels in this |
| 109 | + alphabet. |
| 110 | + ''' |
| 111 | + return super(UTF8Alphabet, self).CanEncode(input.encode('utf-8')) |
| 112 | + |
| 113 | + def EncodeSingle(self, input): |
| 114 | + ''' |
| 115 | + Encode a single character/output class into a label. Character must be in |
| 116 | + the alphabet, this method will assert that. Use `CanEncodeSingle` to test. |
| 117 | + ''' |
| 118 | + return super(UTF8Alphabet, self).EncodeSingle(input.encode('utf-8')) |
| 119 | + |
| 120 | + def Encode(self, input): |
| 121 | + ''' |
| 122 | + Encode a sequence of character/output classes into a sequence of labels. |
| 123 | + Characters are assumed to always take a single Unicode codepoint. |
| 124 | + Characters must be in the alphabet, this method will assert that. Use |
| 125 | + `CanEncode` and `CanEncodeSingle` to test. |
| 126 | + ''' |
| 127 | + # Convert SWIG's UnsignedIntVec to a Python list |
| 128 | + res = super(UTF8Alphabet, self).Encode(input.encode('utf-8')) |
| 129 | + return [el for el in res] |
| 130 | + |
| 131 | + def DecodeSingle(self, input): |
| 132 | + res = super(UTF8Alphabet, self).DecodeSingle(input) |
| 133 | + return res.decode('utf-8') |
| 134 | + |
| 135 | + def Decode(self, input): |
| 136 | + '''Decode a sequence of labels into a string.''' |
| 137 | + res = super(UTF8Alphabet, self).Decode(input) |
| 138 | + return res.decode('utf-8') |
| 139 | + |
| 140 | + |
92 | 141 |
|
93 | 142 | def ctc_beam_search_decoder(probs_seq,
|
94 | 143 | alphabet,
|
|
0 commit comments