Skip to content

Commit eb13407

Browse files
committed
selectively merging from xiaohui-zhang's branch, some code for writing/reading vectors of pairs.
1 parent d165318 commit eb13407

File tree

3 files changed

+140
-1
lines changed

3 files changed

+140
-1
lines changed

src/base/io-funcs-inl.h

+107
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
44
// Jan Silovsky; Yanmin Qian;
55
// Johns Hopkins University (Author: Daniel Povey)
6+
// 2016 Xiaohui Zhang
67

78
// See ../../COPYING for clarification regarding multiple authors
89
//
@@ -87,6 +88,112 @@ template<class T> inline void ReadBasicType(std::istream &is,
8788
}
8889
}
8990

91+
// Template that covers integers.
92+
template<class T>
93+
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
94+
const std::vector<std::pair<T, T> > &v) {
95+
// Compile time assertion that this is not called with a wrong type.
96+
KALDI_ASSERT_IS_INTEGER_TYPE(T);
97+
if (binary) {
98+
char sz = sizeof(T); // this is currently just a check.
99+
os.write(&sz, 1);
100+
int32 vecsz = static_cast<int32>(v.size());
101+
KALDI_ASSERT((size_t)vecsz == v.size());
102+
os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
103+
if (vecsz != 0) {
104+
os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
105+
}
106+
} else {
107+
// focus here is on prettiness of text form rather than
108+
// efficiency of reading-in.
109+
// reading-in is dominated by low-level operations anyway:
110+
// for efficiency use binary.
111+
os << "[ ";
112+
typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
113+
end = v.end();
114+
for (; iter != end; ++iter) {
115+
if (sizeof(T) == 1)
116+
os << static_cast<int16>(iter->first) << ','
117+
<< static_cast<int16>(iter->second) << ' ';
118+
else
119+
os << iter->first << ','
120+
<< iter->second << ' ';
121+
}
122+
os << "]\n";
123+
}
124+
if (os.fail()) {
125+
throw std::runtime_error("Write failure in WriteIntegerPairVector.");
126+
}
127+
}
128+
129+
// Template that covers integers.
130+
template<class T>
131+
inline void ReadIntegerPairVector(std::istream &is, bool binary,
132+
std::vector<std::pair<T, T> > *v) {
133+
KALDI_ASSERT_IS_INTEGER_TYPE(T);
134+
KALDI_ASSERT(v != NULL);
135+
if (binary) {
136+
int sz = is.peek();
137+
if (sz == sizeof(T)) {
138+
is.get();
139+
} else { // this is currently just a check.
140+
KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
141+
<< sizeof(T) << ", saw instead " << sz << ", at file position "
142+
<< is.tellg();
143+
}
144+
int32 vecsz;
145+
is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
146+
if (is.fail() || vecsz < 0) goto bad;
147+
v->resize(vecsz);
148+
if (vecsz > 0) {
149+
is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz*2);
150+
}
151+
} else {
152+
std::vector<std::pair<T, T> > tmp_v; // use temporary so v doesn't use extra memory
153+
// due to resizing.
154+
is >> std::ws;
155+
if (is.peek() != static_cast<int>('[')) {
156+
KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw "
157+
<< is.peek() << ", at file position " << is.tellg();
158+
}
159+
is.get(); // consume the '['.
160+
is >> std::ws; // consume whitespace.
161+
while (is.peek() != static_cast<int>(']')) {
162+
if (sizeof(T) == 1) { // read/write chars as numbers.
163+
int16 next_t1, next_t2;
164+
is >> next_t1;
165+
if (is.fail()) goto bad;
166+
if (is.peek() != static_cast<int>(','))
167+
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
168+
<< is.peek() << ", at file position " << is.tellg();
169+
is.get(); // consume the ','.
170+
is >> next_t2 >> std::ws;
171+
if (is.fail()) goto bad;
172+
else
173+
tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
174+
} else {
175+
T next_t1, next_t2;
176+
is >> next_t1;
177+
if (is.fail()) goto bad;
178+
if (is.peek() != static_cast<int>(','))
179+
KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
180+
<< is.peek() << ", at file position " << is.tellg();
181+
is.get(); // consume the ','.
182+
is >> next_t2 >> std::ws;
183+
if (is.fail()) goto bad;
184+
else
185+
tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
186+
}
187+
}
188+
is.get(); // get the final ']'.
189+
*v = tmp_v; // could use std::swap to use less temporary memory, but this
190+
// uses less permanent memory.
191+
}
192+
if (!is.fail()) return;
193+
bad:
194+
KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
195+
<< is.tellg();
196+
}
90197

91198
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
92199
const std::vector<T> &v) {

src/base/io-funcs-test.cc

+22-1
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,20 @@ void UnitTestIo(bool binary) {
4343
WriteIntegerVector(outfile, binary, vec2);
4444
if (!binary) outfile << " \n";
4545
std::vector<char> vec3;
46-
for (size_t i = 0; i < 10; i++) vec3.push_back(Rand()%100);
46+
47+
int32 size = RandInt(0, 10);
48+
for (size_t i = 0; i < size; i++) vec3.push_back(Rand()%100);
4749
WriteIntegerVector(outfile, binary, vec3);
50+
std::vector<std::pair<int32, int32> > vec4;
51+
WriteIntegerPairVector(outfile, binary, vec4);
52+
if (!binary && Rand()%2 == 0) outfile << " \n";
53+
std::vector<std::pair<uint16, uint16> > vec5;
54+
for (size_t i = 0; i < size; i++) vec5.push_back(std::make_pair<uint16, uint16>(Rand()%100 - 10, Rand()%100 - 10));
55+
WriteIntegerPairVector(outfile, binary, vec5);
56+
if (!binary) outfile << " \n";
57+
std::vector<std::pair<char, char> > vec6;
58+
for (size_t i = 0; i < size; i++) vec6.push_back(std::make_pair<char, char>(Rand()%100, Rand()%100));
59+
WriteIntegerPairVector(outfile, binary, vec6);
4860
if (!binary && Rand()%2 == 0) outfile << " \n";
4961
const char *token1 = "Hi";
5062
WriteToken(outfile, binary, token1);
@@ -90,6 +102,15 @@ void UnitTestIo(bool binary) {
90102
std::vector<char> vec3_in;
91103
ReadIntegerVector(infile, binary_in, &vec3_in);
92104
KALDI_ASSERT(vec3_in == vec3);
105+
std::vector<std::pair<int32, int32> > vec4_in;
106+
ReadIntegerPairVector(infile, binary_in, &vec4_in);
107+
KALDI_ASSERT(vec4_in == vec4);
108+
std::vector<std::pair<uint16, uint16> > vec5_in;
109+
ReadIntegerPairVector(infile, binary_in, &vec5_in);
110+
KALDI_ASSERT(vec5_in == vec5);
111+
std::vector<std::pair<char, char> > vec6_in;
112+
ReadIntegerPairVector(infile, binary_in, &vec6_in);
113+
KALDI_ASSERT(vec6_in == vec6);
93114
std::string token1_in, token2_in;
94115
KALDI_ASSERT(Peek(infile, binary_in) == static_cast<int>(*token1));
95116
KALDI_ASSERT(PeekToken(infile, binary_in) == static_cast<int>(*token1));

src/base/io-funcs.h

+11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
44
// Jan Silovsky; Yanmin Qian
5+
// 2016 Xiaohui Zhang
56

67
// See ../../COPYING for clarification regarding multiple authors
78
//
@@ -181,6 +182,16 @@ template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
181182
template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
182183
std::vector<T> *v);
183184

185+
/// Function for writing STL vectors of pairs of integer types.
186+
template<class T>
187+
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
188+
const std::vector<std::pair<T, T> > &v);
189+
190+
/// Function for reading STL vector of pairs of integer types.
191+
template<class T>
192+
inline void ReadIntegerPairVector(std::istream &is, bool binary,
193+
std::vector<std::pair<T, T> > *v);
194+
184195
/// The WriteToken functions are for writing nonempty sequences of non-space
185196
/// characters. They are not for general strings.
186197
void WriteToken(std::ostream &os, bool binary, const char *token);

0 commit comments

Comments
 (0)