Skip to content

Commit

Permalink
Merge pull request chokkan#54 from Maluuba/create-from-memory
Browse files Browse the repository at this point in the history
Create from memory
  • Loading branch information
Naoaki Okazaki committed Jan 24, 2016
2 parents 93c13c4 + 11d528c commit a555b6f
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 58 deletions.
13 changes: 13 additions & 0 deletions include/crfsuite.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,19 @@ int crfsuite_create_instance(const char *iid, void **ptr);
*/
int crfsuite_create_instance_from_file(const char *filename, void **ptr);

/**
* Create an instance of a model object from a model in memory.
* @param data A pointer to the model data.
* Must be 16-byte aligned.
* @param size A size (in bytes) of the model data.
* @param ptr The pointer to \c void* that points to the
* instance of the model object if successful,
* *ptr points to \c NULL otherwise.
* @return int \c 0 if this function creates an object successfully,
* \c 1 otherwise
*/
int crfsuite_create_instance_from_memory(const void *data, size_t size, void **ptr);

/**
* Create instances of tagging object from a model file.
* @param filename The filename of the model.
Expand Down
20 changes: 20 additions & 0 deletions include/crfsuite.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,26 @@ bool Tagger::open(const std::string& name)
return true;
}

bool Tagger::open(const void* data, std::size_t size)
{
int ret;

// Close the model if it is already opened.
this->close();

// Open the model.
if ((ret = crfsuite_create_instance_from_memory(data, size, (void**)&model))) {
return false;
}

// Obtain the tagger interface.
if ((ret = model->get_tagger(model, &tagger))) {
throw std::runtime_error("Failed to obtain the tagger interface");
}

return true;
}

void Tagger::close()
{
if (tagger != NULL) {
Expand Down
12 changes: 12 additions & 0 deletions include/crfsuite_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,18 @@ class Tagger
*/
bool open(const std::string& name);

/**
* Open a model from memory.
* @param data A pointer to the model data.
* Must be 16-byte aligned.
* @param size A size (in bytes) of the model data.
* @return bool \c true if the model file is successfully opened,
* \c false otherwise (e.g., when the mode file is
* not found).
* @throw std::runtime_error An internal error in the model.
*/
bool open(const void* data, std::size_t size);

/**
* Close the model.
*/
Expand Down
2 changes: 1 addition & 1 deletion lib/cqdb/include/cqdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ typedef struct tag_cqdb cqdb_t; /**< Typedef of a CQDB reader. */
* @param size The size of the memory block.
* @retval cqdb_t* The pointer to the ::cqdb_t instance.
*/
cqdb_t* cqdb_reader(void *buffer, size_t size);
cqdb_t* cqdb_reader(const void *buffer, size_t size);

/**
* Delete the CQDB reader.
Expand Down
28 changes: 14 additions & 14 deletions lib/cqdb/src/cqdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ struct tag_cqdb_writer {
* Constant quark database (CQDB).
*/
struct tag_cqdb {
uint8_t* buffer; /**< Pointer to the memory block. */
size_t size; /**< Size of the memory block. */
const uint8_t* buffer; /**< Pointer to the memory block. */
size_t size; /**< Size of the memory block. */

header_t header; /**< Chunk header. */
table_t ht[NUM_TABLES]; /**< Hash tables (string -> id). */
header_t header; /**< Chunk header. */
table_t ht[NUM_TABLES]; /**< Hash tables (string -> id). */

uint32_t* bwd; /**< Array for backward look-up (id -> string). */
uint32_t* bwd; /**< Array for backward look-up (id -> string). */

int num; /**< Number of key/data pairs. */
int num; /**< Number of key/data pairs. */
};


Expand Down Expand Up @@ -397,7 +397,7 @@ int cqdb_writer_close(cqdb_writer_t* dbw)



static uint32_t read_uint32(uint8_t* p)
static uint32_t read_uint32(const uint8_t* p)
{
uint32_t value;
value = ((uint32_t)p[0]);
Expand All @@ -407,7 +407,7 @@ static uint32_t read_uint32(uint8_t* p)
return value;
}

static uint8_t *read_tableref(tableref_t* ref, uint8_t *p)
static const uint8_t *read_tableref(tableref_t* ref, const uint8_t *p)
{
ref->offset = read_uint32(p);
p += sizeof(uint32_t);
Expand All @@ -416,7 +416,7 @@ static uint8_t *read_tableref(tableref_t* ref, uint8_t *p)
return p;
}

static bucket_t* read_bucket(uint8_t* p, uint32_t num)
static bucket_t* read_bucket(const uint8_t* p, uint32_t num)
{
uint32_t i;
bucket_t *bucket = (bucket_t*)calloc(num, sizeof(bucket_t));
Expand All @@ -429,7 +429,7 @@ static bucket_t* read_bucket(uint8_t* p, uint32_t num)
return bucket;
}

static uint32_t* read_backward_links(uint8_t* p, uint32_t num)
static uint32_t* read_backward_links(const uint8_t* p, uint32_t num)
{
uint32_t i;
uint32_t *bwd = (uint32_t*)calloc(num, sizeof(uint32_t));
Expand All @@ -440,7 +440,7 @@ static uint32_t* read_backward_links(uint8_t* p, uint32_t num)
return bwd;
}

cqdb_t* cqdb_reader(void *buffer, size_t size)
cqdb_t* cqdb_reader(const void *buffer, size_t size)
{
int i;
cqdb_t* db = NULL;
Expand All @@ -457,7 +457,7 @@ cqdb_t* cqdb_reader(void *buffer, size_t size)

db = (cqdb_t*)calloc(1, sizeof(cqdb_t));
if (db != NULL) {
uint8_t* p = NULL;
const uint8_t* p = NULL;

/* Set memory block and size. */
db->buffer = buffer;
Expand Down Expand Up @@ -549,7 +549,7 @@ int cqdb_to_id(cqdb_t* db, const char *str)
if (p->hash == hv) {
int value;
uint32_t ksize;
uint8_t *q = db->buffer + p->offset;
const uint8_t *q = db->buffer + p->offset;
value = (int)read_uint32(q);
q += sizeof(uint32_t);
ksize = read_uint32(q);
Expand All @@ -571,7 +571,7 @@ const char* cqdb_to_string(cqdb_t* db, int id)
if (db->bwd != NULL && (uint32_t)id < db->header.bwd_size) {
uint32_t offset = db->bwd[id];
if (offset) {
uint8_t *p = db->buffer + offset;
const uint8_t *p = db->buffer + offset;
p += sizeof(uint32_t); /* Skip key data. */
p += sizeof(uint32_t); /* Skip value size. */
return (const char *)p;
Expand Down
1 change: 1 addition & 0 deletions lib/crf/src/crf1d.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ int crf1dmw_close_features(crf1dmw_t* writer);
int crf1dmw_put_feature(crf1dmw_t* writer, int fid, const crf1dm_feature_t* f);

crf1dm_t* crf1dm_new(const char *filename);
crf1dm_t* crf1dm_new_from_memory(const void *data, size_t size);
void crf1dm_close(crf1dm_t* model);
int crf1dm_get_num_attrs(crf1dm_t* model);
int crf1dm_get_num_labels(crf1dm_t* model);
Expand Down
105 changes: 67 additions & 38 deletions lib/crf/src/crf1d_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ typedef struct {
} feature_header_t;

struct tag_crf1dm {
uint8_t* buffer_orig;
uint8_t* buffer;
uint32_t size;
header_t* header;
cqdb_t* labels;
cqdb_t* attrs;
uint8_t* buffer_orig;
const uint8_t* buffer;
uint32_t size;
header_t* header;
cqdb_t* labels;
cqdb_t* attrs;
};

struct tag_crf1dmw {
Expand Down Expand Up @@ -122,7 +122,7 @@ static int write_uint8(FILE *fp, uint8_t value)
return fwrite(&value, sizeof(value), 1, fp) == 1 ? 0 : 1;
}

static int read_uint8(uint8_t* buffer, uint8_t* value)
static int read_uint8(const uint8_t* buffer, uint8_t* value)
{
*value = *buffer;
return sizeof(*value);
Expand All @@ -138,7 +138,7 @@ static int write_uint32(FILE *fp, uint32_t value)
return fwrite(buffer, sizeof(uint8_t), 4, fp) == 4 ? 0 : 1;
}

static int read_uint32(uint8_t* buffer, uint32_t* value)
static int read_uint32(const uint8_t* buffer, uint32_t* value)
{
*value = ((uint32_t)buffer[0]);
*value |= ((uint32_t)buffer[1] << 8);
Expand All @@ -157,7 +157,7 @@ static int write_uint8_array(FILE *fp, uint8_t *array, size_t n)
return ret;
}

static int read_uint8_array(uint8_t* buffer, uint8_t *array, size_t n)
static int read_uint8_array(const uint8_t* buffer, uint8_t *array, size_t n)
{
size_t i;
int ret = 0;
Expand Down Expand Up @@ -194,7 +194,7 @@ static void write_float(FILE *fp, floatval_t value)
fwrite(buffer, sizeof(uint8_t), 8, fp);
}

static int read_float(uint8_t* buffer, floatval_t* value)
static int read_float(const uint8_t* buffer, floatval_t* value)
{
uint64_t iv;
iv = ((uint64_t)buffer[0]);
Expand Down Expand Up @@ -700,10 +700,9 @@ int crf1dmw_put_feature(crf1dmw_t* writer, int fid, const crf1dm_feature_t* f)
return 0;
}

crf1dm_t* crf1dm_new(const char *filename)
static crf1dm_t* crf1dm_new_impl(uint8_t* buffer_orig, const uint8_t* buffer, uint32_t size)
{
FILE *fp = NULL;
uint8_t* p = NULL;
const uint8_t* p = NULL;
crf1dm_t *model = NULL;
header_t *header = NULL;

Expand All @@ -712,29 +711,15 @@ crf1dm_t* crf1dm_new(const char *filename)
goto error_exit;
}

fp = fopen(filename, "rb");
if (fp == NULL) {
goto error_exit;
}

fseek(fp, 0, SEEK_END);
model->size = (uint32_t)ftell(fp);
fseek(fp, 0, SEEK_SET);

model->buffer = model->buffer_orig = (uint8_t*)malloc(model->size + 16);
while ((uintptr_t)model->buffer % 16 != 0) {
++model->buffer;
}
model->buffer_orig = buffer_orig;
model->buffer = buffer;

if (fread(model->buffer, 1, model->size, fp) != model->size) {
free(model->buffer_orig);
header = (header_t*)calloc(1, sizeof(header_t));
if (header == NULL) {
goto error_exit;
}
fclose(fp);

/* Write the file header. */
header = (header_t*)calloc(1, sizeof(header_t));

/* Read the file header. */
p = model->buffer;
p += read_uint8_array(p, header->magic, sizeof(header->magic));
p += read_uint32(p, &header->size);
Expand Down Expand Up @@ -763,15 +748,58 @@ crf1dm_t* crf1dm_new(const char *filename)
return model;

error_exit:
if (model != NULL) {
free(model);
free(header);
free(model);
free(buffer_orig);
return NULL;
}

crf1dm_t* crf1dm_new(const char *filename)
{
FILE *fp = NULL;
uint32_t size = 0;
uint8_t* buffer_orig = NULL;
uint8_t* buffer = NULL;

fp = fopen(filename, "rb");
if (fp == NULL) {
goto error_exit;
}

fseek(fp, 0, SEEK_END);
size = (uint32_t)ftell(fp);
fseek(fp, 0, SEEK_SET);

buffer = buffer_orig = (uint8_t*)malloc(size + 16);
if (buffer_orig = NULL) {
goto error_exit;
}

/* Align the buffer to 16 bytes. */
while ((uintptr_t)buffer % 16 != 0) {
++buffer;
}

if (fread(buffer, 1, size, fp) != size) {
goto error_exit;
}
fclose(fp);

return crf1dm_new_impl(buffer_orig, buffer, size);

error_exit:
free(buffer_orig);
if (fp != NULL) {
fclose(fp);
}
return NULL;
}

crf1dm_t* crf1dm_new_from_memory(const void *data, size_t size)
{
return crf1dm_new_impl(NULL, data, size);
}

void crf1dm_close(crf1dm_t* model)
{
if (model->labels != NULL) {
Expand All @@ -786,8 +814,9 @@ void crf1dm_close(crf1dm_t* model)
}
if (model->buffer_orig != NULL) {
free(model->buffer_orig);
model->buffer_orig = model->buffer = NULL;
model->buffer_orig = NULL;
}
model->buffer = NULL;
free(model);
}

Expand Down Expand Up @@ -839,7 +868,7 @@ const char *crf1dm_to_attr(crf1dm_t* model, int aid)

int crf1dm_get_labelref(crf1dm_t* model, int lid, feature_refs_t* ref)
{
uint8_t *p = model->buffer;
const uint8_t *p = model->buffer;
uint32_t offset;

p += model->header->off_labelrefs;
Expand All @@ -855,7 +884,7 @@ int crf1dm_get_labelref(crf1dm_t* model, int lid, feature_refs_t* ref)

int crf1dm_get_attrref(crf1dm_t* model, int aid, feature_refs_t* ref)
{
uint8_t *p = model->buffer;
const uint8_t *p = model->buffer;
uint32_t offset;

p += model->header->off_attrrefs;
Expand All @@ -880,7 +909,7 @@ int crf1dm_get_featureid(feature_refs_t* ref, int i)

int crf1dm_get_feature(crf1dm_t* model, int fid, crf1dm_feature_t* f)
{
uint8_t *p = NULL;
const uint8_t *p = NULL;
uint32_t val = 0;
uint32_t offset = model->header->off_features + CHUNK_SIZE;
offset += FEATURE_SIZE * fid;
Expand Down
Loading

0 comments on commit a555b6f

Please sign in to comment.