3
3
#include < string>
4
4
#include < vector>
5
5
#include < torch/torch.h>
6
+ #include < memory>
6
7
#include " scorer.h"
7
8
#include " ctc_beam_search_decoder.h"
8
9
#include " utf8.h"
10
+ #include " boost/shared_ptr.hpp"
11
+ #include " boost/python.hpp"
12
+ #include " boost/python/stl_iterator.hpp"
9
13
10
- int utf8_to_utf8_char_vec (const char * labels, std::vector<std::string>& new_vocab) {
11
- const char * str_i = labels;
12
- const char * end = str_i + strlen (labels)+1 ;
13
- do {
14
- char u[5 ] = {0 ,0 ,0 ,0 ,0 };
15
- uint32_t code = utf8::next (str_i, end);
16
- if (code == 0 ) {
17
- continue ;
18
- }
19
- utf8::append (code, u);
20
- new_vocab.push_back (std::string (u));
14
+ using namespace std ;
15
+
16
+ template <typename T>
17
+ inline
18
+ std::vector< T > py_list_to_std_vector ( const boost::python::object& iterable )
19
+ {
20
+ return std::vector< T >( boost::python::stl_input_iterator< T >( iterable ),
21
+ boost::python::stl_input_iterator< T >( ) );
22
+ }
23
+
24
+ template <class T >
25
+ inline
26
+ boost::python::list std_vector_to_py_list (std::vector<T> vector) {
27
+ typename std::vector<T>::iterator iter;
28
+ boost::python::list list;
29
+ for (iter = vector.begin (); iter != vector.end (); ++iter) {
30
+ list.append (*iter);
21
31
}
22
- while (str_i < end) ;
32
+ return list ;
23
33
}
24
34
25
35
int beam_decode (at::Tensor th_probs,
26
36
at::Tensor th_seq_lens,
27
- const char * labels ,
37
+ std::vector<std::string> new_vocab ,
28
38
int vocab_size,
29
39
size_t beam_size,
30
40
size_t num_processes,
@@ -38,8 +48,6 @@ int beam_decode(at::Tensor th_probs,
38
48
at::Tensor th_scores,
39
49
at::Tensor th_out_length)
40
50
{
41
- std::vector<std::string> new_vocab;
42
- utf8_to_utf8_char_vec (labels, new_vocab);
43
51
Scorer *ext_scorer = NULL ;
44
52
if (scorer != NULL ) {
45
53
ext_scorer = static_cast <Scorer *>(scorer);
@@ -67,7 +75,7 @@ int beam_decode(at::Tensor th_probs,
67
75
68
76
std::vector<std::vector<std::pair<double , Output>>> batch_results =
69
77
ctc_beam_search_decoder_batch (inputs, new_vocab, beam_size, num_processes, cutoff_prob, cutoff_top_n, blank_id, log_input, ext_scorer);
70
- auto outputs_accessor = th_output.accessor <int , 3 >();
78
+ auto outputs_accessor = th_output.accessor <int , 3 >();
71
79
auto timesteps_accessor = th_timesteps.accessor <int , 3 >();
72
80
auto scores_accessor = th_scores.accessor <float , 2 >();
73
81
auto out_length_accessor = th_out_length.accessor <int , 2 >();
@@ -93,7 +101,7 @@ int beam_decode(at::Tensor th_probs,
93
101
94
102
int paddle_beam_decode (at::Tensor th_probs,
95
103
at::Tensor th_seq_lens,
96
- const char * labels,
104
+ std::vector<std::string> labels,
97
105
int vocab_size,
98
106
size_t beam_size,
99
107
size_t num_processes,
@@ -112,7 +120,7 @@ int paddle_beam_decode(at::Tensor th_probs,
112
120
113
121
int paddle_beam_decode_lm (at::Tensor th_probs,
114
122
at::Tensor th_seq_lens,
115
- const char * labels,
123
+ std::vector<std::string> labels,
116
124
int vocab_size,
117
125
size_t beam_size,
118
126
size_t num_processes,
@@ -134,10 +142,8 @@ int paddle_beam_decode_lm(at::Tensor th_probs,
134
142
void * paddle_get_scorer (double alpha,
135
143
double beta,
136
144
const char * lm_path,
137
- const char * labels ,
145
+ vector<std::string> new_vocab ,
138
146
int vocab_size) {
139
- std::vector<std::string> new_vocab;
140
- utf8_to_utf8_char_vec (labels, new_vocab);
141
147
Scorer* scorer = new Scorer (alpha, beta, lm_path, new_vocab);
142
148
return static_cast <void *>(scorer);
143
149
}
0 commit comments