Skip to content

Commit

Permalink
Merge pull request HIT-SCIR#136 from endyul/feature/portable_model
Browse files Browse the repository at this point in the history
Feature/portable model
  • Loading branch information
Oneplus committed Oct 19, 2015
2 parents 7faf223 + 1954e4a commit 0243a4c
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 119 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

if (APPLE)
add_definitions(-DGTEST_HAS_TR1_TUPLE=0)
set(CMAKE_CXX_FLAGS "-std=c++11 -Wno-c++11-narrowing")
set(CMAKE_CXX_FLAGS "-std=c++0x -Wno-c++11-narrowing")
else()
set(CMAKE_CXX_FLAGS "-std=c++0x")
endif(APPLE)


# -- config output directories
set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
set (LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
Expand Down
75 changes: 38 additions & 37 deletions src/framework/featurespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <iostream>
#include <vector>
#include <cstdint>
#include "utils/smartmap.hpp"

namespace ltp {
Expand All @@ -17,7 +18,7 @@ class FeatureSpaceIterator {
// should be careful about the empty dicts
}

FeatureSpaceIterator(const utility::SmartMap<int>* dicts, int num_dicts)
FeatureSpaceIterator(const utility::SmartMap<int32_t>* dicts, uint32_t num_dicts)
: _dicts(dicts),
_num_dicts(num_dicts),
_i(0),
Expand All @@ -29,8 +30,8 @@ class FeatureSpaceIterator {
}

const char* key() { return _j.key(); }
int id() { return (*_j.value()); }
size_t tid() { return _i; }
int32_t id() { return (*_j.value()); }
uint32_t tid() { return _i; }

bool operator ==(const FeatureSpaceIterator & other) const {
return ((_dicts + _i) == other._dicts);
Expand Down Expand Up @@ -63,33 +64,33 @@ class FeatureSpaceIterator {
}
}

size_t _i;
size_t _num_dicts;
size_t _state;
const utility::SmartMap<int>* _dicts;
utility::SmartMap<int>::const_iterator _j;
uint32_t _i;
uint32_t _num_dicts;
uint32_t _state;
const utility::SmartMap<int32_t>* _dicts;
utility::SmartMap<int32_t>::const_iterator _j;
};

class ViterbiFeatureSpace {
public:
ViterbiFeatureSpace(size_t nr_dicts, size_t nr_labels = 1)
ViterbiFeatureSpace(uint32_t nr_dicts, uint32_t nr_labels = 1)
: _num_dicts(nr_dicts), _num_labels(nr_labels), _offset(0) {
dicts = new utility::SmartMap<int>[ nr_dicts ];
dicts = new utility::SmartMap<int32_t>[ nr_dicts ];
}

~ViterbiFeatureSpace(void) {
delete [](dicts);
}

int retrieve(const size_t& tid, const char* key) const {
int val;
int32_t retrieve(const uint32_t& tid, const char* key) const {
int32_t val;
if (dicts[tid].get(key, val)) {
return val;
}
return -1;
}

int retrieve(const size_t& tid, const std::string& key) const {
int32_t retrieve(const uint32_t& tid, const std::string& key) const {
return retrieve(tid, key.c_str());
}

Expand All @@ -101,8 +102,8 @@ class ViterbiFeatureSpace {
* @param[in] create if create is ture, insert the key into the dict
* @return int the dimension index
*/
int retrieve(const size_t& tid, const char* key, bool create) {
int val;
int32_t retrieve(const uint32_t& tid, const char* key, bool create) {
int32_t val;
if (dicts[tid].get(key, val)) {
return val;
} else {
Expand All @@ -116,7 +117,7 @@ class ViterbiFeatureSpace {
return -1;
}

int retrieve(const size_t& tid, const std::string& key, bool create) {
int32_t retrieve(const uint32_t& tid, const std::string& key, bool create) {
return retrieve(tid, key.c_str(), create);
}

Expand All @@ -128,15 +129,15 @@ class ViterbiFeatureSpace {
* @param[in] lid the label
* @return int the dimension index
*/
int index(const size_t& tid, const char* key, const size_t& lid = 0) const {
int idx = -1;
int32_t index(const uint32_t& tid, const char* key, const uint32_t& lid = 0) const {
int32_t idx = -1;
if (!dicts[tid].get(key, idx)) {
return -1;
}
return idx * _num_labels + lid;
}

int index(const size_t& tid, const std::string& key, const size_t& lid = 0) const {
int32_t index(const uint32_t& tid, const std::string& key, const uint32_t& lid = 0) const {
return index(tid, key.c_str(), lid);
}

Expand All @@ -147,27 +148,27 @@ class ViterbiFeatureSpace {
* @param[in] lid the label
* @return int the dimension index
*/
int index(const size_t& prev_lid, const size_t& lid) const {
int32_t index(const uint32_t& prev_lid, const uint32_t& lid) const {
return _offset * _num_labels + prev_lid * _num_labels + lid;
}

size_t num_features() const {
uint32_t num_features() const {
return _offset;
}

size_t dim() const {
uint32_t dim() const {
return _offset* _num_labels + _num_labels* _num_labels;
}

size_t num_groups() const {
uint32_t num_groups() const {
return _offset + _num_labels;
}

size_t num_dicts() const {
uint32_t num_dicts() const {
return _num_dicts;
}

void set_num_labels(const size_t& num_labels) {
void set_num_labels(const uint32_t& num_labels) {
_num_labels = num_labels;
}

Expand All @@ -178,14 +179,14 @@ class ViterbiFeatureSpace {
*/
void dump(std::ostream & ofs) const {
char chunk[16];
size_t sz = _num_dicts;
uint32_t sz = _num_dicts;
strncpy(chunk, "featurespace", 16);

ofs.write(chunk, 16);
ofs.write(reinterpret_cast<const char *>(&_offset), sizeof(unsigned long long));
ofs.write(reinterpret_cast<const char *>(&sz), sizeof(unsigned long long));
ofs.write(reinterpret_cast<const char *>(&_offset), sizeof(uint32_t));
ofs.write(reinterpret_cast<const char *>(&sz), sizeof(uint32_t));

for (size_t i = 0; i < _num_dicts; ++ i) {
for (uint32_t i = 0; i < _num_dicts; ++ i) {
dicts[i].dump(ofs);
}
}
Expand All @@ -199,20 +200,20 @@ class ViterbiFeatureSpace {
*/
bool load(std::istream& ifs) {
char chunk[16];
unsigned long long sz;
uint32_t sz;
ifs.read(chunk, 16);
if (strcmp(chunk, "featurespace")) {
return false;
}

ifs.read(reinterpret_cast<char *>(&_offset), sizeof(unsigned long long));
ifs.read(reinterpret_cast<char *>(&sz), sizeof(unsigned long long));
ifs.read(reinterpret_cast<char *>(&_offset), sizeof(uint32_t));
ifs.read(reinterpret_cast<char *>(&sz), sizeof(uint32_t));

if (sz != _num_dicts) {
return false;
}

for (size_t i = 0; i < sz; ++ i) {
for (uint32_t i = 0; i < sz; ++ i) {
if (!dicts[i].load(ifs)) {
return false;
}
Expand All @@ -228,10 +229,10 @@ class ViterbiFeatureSpace {
return FeatureSpaceIterator(dicts + _num_dicts, _num_dicts);
}
private:
size_t _offset;
size_t _num_labels;
size_t _num_dicts;
utility::SmartMap<int>* dicts;
uint32_t _offset;
uint32_t _num_labels;
uint32_t _num_dicts;
utility::SmartMap<int32_t>* dicts;
};

} // namespace framework
Expand Down
56 changes: 29 additions & 27 deletions src/framework/parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <iostream>
#include <cstring>
#include <cstdint>
#include "utils/math/sparsevec.h"
#include "utils/math/featurevec.h"
#include "utils/logging.hpp"
Expand All @@ -14,12 +15,12 @@ class Parameters {
public:
bool _enable_wrapper;

size_t _dim;
size_t _last_timestamp;
uint32_t _dim;
uint32_t _last_timestamp;

double* _W;
double* _W_sum;
size_t* _W_time;
uint32_t* _W_time;

public:
enum DumpOption {
Expand All @@ -33,17 +34,17 @@ class Parameters {
_last_timestamp(0), _enable_wrapper(false) {}
~Parameters() { dealloc(); }

void realloc(const size_t& dim) {
void realloc(const uint32_t& dim) {
dealloc();
_dim = dim;

if (dim > 0) {
_W = new double[dim];
_W_sum = new double[dim];
_W_time = new size_t[dim];
_W_time = new uint32_t[dim];
}

for (size_t i = 0; i < dim; ++ i) {
for (uint32_t i = 0; i < dim; ++ i) {
_W[i] = 0;
_W_sum[i] = 0;
_W_time[i] = 0;
Expand Down Expand Up @@ -83,8 +84,8 @@ class Parameters {
* @param[in] now The timestamp.
* @param[in] scale The scale
*/
void add(const size_t& idx, const size_t& now, const double& scale = 1.) {
size_t elapsed = now - _W_time[idx];
void add(const uint32_t& idx, const uint32_t& now, const double& scale = 1.) {
uint32_t elapsed = now - _W_time[idx];
double cur_val = _W[idx];

_W[idx] = cur_val + scale;
Expand All @@ -103,11 +104,11 @@ class Parameters {
* @param[in] now The timestamp.
* @param[in] scale The scale
*/
void add(const math::SparseVec& vec, const size_t& now, const double& scale = 1.) {
void add(const math::SparseVec& vec, const uint32_t& now, const double& scale = 1.) {
for (math::SparseVec::const_iterator itx = vec.begin();
itx != vec.end(); ++ itx) {
int idx = itx->first;
int elapsed = now - _W_time[idx];
uint32_t idx = itx->first;
uint32_t elapsed = now - _W_time[idx];
double upd = scale * itx->second;
double cur_val = _W[idx];

Expand Down Expand Up @@ -169,15 +170,15 @@ class Parameters {
* non-averaged one (_W).
* @return double The dot product.
*/
double dot(const int idx, bool avg = false) const {
double dot(const uint32_t idx, bool avg = false) const {
const double * const p = (avg ? _W_sum : _W);
return p[idx];
}

double predict(const math::FeatureVector* vec, const size_t& elapsed_time) const {
double predict(const math::FeatureVector* vec, const uint32_t& elapsed_time) const {
double ret = 0;
for (int i = 0; i < vec->n; ++i) {
int idx = vec->idx[i] + vec->loff;
for (uint32_t i = 0; i < vec->n; ++i) {
uint32_t idx = vec->idx[i] + vec->loff;
if (vec->val) {
ret += (_W_sum[idx] + _W[idx] * elapsed_time * vec->val[i]);
}
Expand All @@ -188,7 +189,7 @@ class Parameters {
return ret;
}

double predict(const int idx, const size_t& elapsed_time) const {
double predict(const uint32_t idx, const uint32_t& elapsed_time) const {
return _W_sum[idx] + _W[idx] * elapsed_time;
}

Expand All @@ -197,8 +198,8 @@ class Parameters {
*
* @param[in] now The timestamp.
*/
void flush(const size_t& now) {
for (size_t i = 0; i < _dim; ++i) {
void flush(const uint32_t& now) {
for (uint32_t i = 0; i < _dim; ++i) {
_W_sum[i] += (now - _W_time[i]) * _W[i];
_W_time[i] = now;
}
Expand All @@ -208,13 +209,14 @@ class Parameters {
}
}

void str(std::ostream& out, int width = 10) {
void str(std::ostream& out, uint32_t width = 10) {
if (0 == width) return;
out << "\t";
for (int i = 0; i < width; ++ i) {
for (uint32_t i = 0; i < width; ++ i) {
out << "[" << i << "]\t";
}
out << std::endl;
for (size_t i = 0; i < _dim; ++ i) {
for (uint32_t i = 0; i < _dim; ++ i) {
if (i % width == 0) {
out << "[" << i << "-" << (i / width + 1) * width - 1 << "]\t";
}
Expand Down Expand Up @@ -245,16 +247,16 @@ class Parameters {
strncpy(chunk, "param-nonavg", 16);
}
out.write(chunk, 16);
out.write(reinterpret_cast<const char*>(&_dim), sizeof(unsigned long long));
out.write(reinterpret_cast<const char*>(&_dim), sizeof(uint32_t));

if (_dim > 0) {
if (opt == kDumpDetails) {
out.write(reinterpret_cast<const char*>(_W), sizeof(double) * _dim);
out.write(reinterpret_cast<const char*>(_W_sum), sizeof(double) * _dim);
out.write(reinterpret_cast<const char*>(&_last_timestamp), sizeof(unsigned long long));
out.write(reinterpret_cast<const char*>(&_last_timestamp), sizeof(uint32_t));
} else if (opt == kDumpAveraged) {
out.write(reinterpret_cast<const char*>(_W_sum), sizeof(double) * _dim);
out.write(reinterpret_cast<const char*>(&_last_timestamp), sizeof(unsigned long long));
out.write(reinterpret_cast<const char*>(&_last_timestamp), sizeof(uint32_t));
} else if (opt == kDumpNonAveraged) {
out.write(reinterpret_cast<const char*>(_W), sizeof(double) * _dim);
}
Expand All @@ -279,19 +281,19 @@ class Parameters {
return false;
}

in.read(reinterpret_cast<char *>(&_dim), sizeof(unsigned long long));
in.read(reinterpret_cast<char *>(&_dim), sizeof(uint32_t));
if (_dim > 0) {
if (!strncmp(body, "details", 11)) {
_W = new double[_dim];
_W_sum = new double[_dim];
in.read(reinterpret_cast<char *>(_W), sizeof(double)* _dim);
in.read(reinterpret_cast<char *>(_W_sum), sizeof(double)* _dim);
in.read(reinterpret_cast<char *>(&_last_timestamp), sizeof(unsigned long long));
in.read(reinterpret_cast<char *>(&_last_timestamp), sizeof(uint32_t));
_enable_wrapper = false;
} else if (!strncmp(body, "avg", 11)) {
_W_sum = new double[_dim];
in.read(reinterpret_cast<char *>(_W_sum), sizeof(double)* _dim);
in.read(reinterpret_cast<char *>(&_last_timestamp), sizeof(unsigned long long));
in.read(reinterpret_cast<char *>(&_last_timestamp), sizeof(uint32_t));
_W = _W_sum;
_enable_wrapper = true;
} else if (!strncmp(body, "nonavg", 11)) {
Expand Down
Loading

0 comments on commit 0243a4c

Please sign in to comment.