Skip to content

Commit

Permalink
SyntaxNet cleanups (tensorflow#281)
Browse files Browse the repository at this point in the history
Cleanup changes for syntaxnet.
  • Loading branch information
calberti authored Jul 21, 2016
1 parent 0a40f8d commit be7a899
Show file tree
Hide file tree
Showing 38 changed files with 140 additions and 122 deletions.
31 changes: 22 additions & 9 deletions syntaxnet/syntaxnet/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# A syntactic parser and part-of-speech tagger in TensorFlow.

package(
default_visibility = ["//visibility:private"],
default_visibility = [
"//visibility:private",
],
features = ["-layering_check"],
)

Expand Down Expand Up @@ -108,9 +110,9 @@ cc_library(
srcs = ["test_main.cc"],
linkopts = ["-lm"],
deps = [
"//external:gtest",
"@org_tensorflow//tensorflow/core:lib",
"@org_tensorflow//tensorflow/core:testlib",
"//external:gtest",
],
)

Expand All @@ -130,6 +132,7 @@ cc_library(
srcs = ["text_formats.cc"],
deps = [
":document_format",
":sentence_proto",
],
alwayslink = 1,
)
Expand All @@ -150,7 +153,6 @@ cc_library(
deps = [
":feature_extractor_proto",
":fml_parser",
":kbest_syntax_proto",
":sentence_proto",
":task_context",
],
Expand All @@ -166,7 +168,6 @@ cc_library(
deps = [
":document_format",
":feature_extractor_proto",
":kbest_syntax_proto",
":proto_io",
":sentence_proto",
":task_context",
Expand All @@ -182,6 +183,7 @@ cc_library(
deps = [
":dictionary_proto",
":feature_extractor",
":sentence_proto",
":shared_store",
":term_frequency_map",
":utils",
Expand Down Expand Up @@ -263,6 +265,7 @@ cc_library(
deps = [
":kbest_syntax_proto",
":registry",
":sentence_proto",
":shared_store",
":task_context",
":term_frequency_map",
Expand All @@ -279,6 +282,7 @@ cc_library(
":dictionary_proto",
":sentence_proto",
":task_context",
":task_spec_proto",
":term_frequency_map",
":test_main",
],
Expand All @@ -294,7 +298,6 @@ cc_library(
":parser_transitions",
":registry",
":sentence_features",
":sentence_proto",
":task_context",
":term_frequency_map",
":workspace",
Expand Down Expand Up @@ -325,6 +328,7 @@ cc_library(
":feature_extractor",
":parser_features",
":parser_transitions",
":sentence_proto",
":sparse_proto",
":task_context",
":task_spec_proto",
Expand All @@ -344,6 +348,7 @@ cc_library(
":parser_transitions",
":sentence_batch",
":sentence_proto",
":sparse_proto",
":task_context",
":task_spec_proto",
],
Expand All @@ -360,7 +365,6 @@ cc_library(
":sentence_batch",
":sentence_proto",
":task_context",
":task_spec_proto",
":text_formats",
],
alwayslink = 1,
Expand All @@ -370,13 +374,13 @@ cc_library(
name = "lexicon_builder",
srcs = ["lexicon_builder.cc"],
deps = [
":dictionary_proto",
":document_format",
":parser_features",
":parser_transitions",
":sentence_batch",
":sentence_proto",
":task_context",
":task_spec_proto",
":text_formats",
],
alwayslink = 1,
Expand Down Expand Up @@ -429,6 +433,11 @@ filegroup(
],
)

filegroup(
name = "parsey_data",
srcs = glob(["models/parsey_mcparseface/*"]),
)

cc_test(
name = "shared_store_test",
size = "small",
Expand Down Expand Up @@ -464,6 +473,8 @@ cc_test(
deps = [
":parser_transitions",
":populate_test_inputs",
":sentence_proto",
":task_spec_proto",
":test_main",
],
)
Expand All @@ -476,6 +487,8 @@ cc_test(
deps = [
":parser_transitions",
":populate_test_inputs",
":sentence_proto",
":task_spec_proto",
":test_main",
],
)
Expand Down Expand Up @@ -519,10 +532,10 @@ py_library(
name = "graph_builder",
srcs = ["graph_builder.py"],
deps = [
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
":load_parser_ops_py",
":parser_ops",
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
],
)

Expand Down
39 changes: 20 additions & 19 deletions syntaxnet/syntaxnet/affix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ static const int kInitialBuckets = 1024;
// Fill factor for term and affix hash maps.
static const int kFillFactor = 2;

int TermHash(string term) {
int TermHash(const string &term) {
return utils::Hash32(term.data(), term.size(), 0xDECAF);
}

// Copies a substring of a Unicode text to a string.
static void UnicodeSubstring(UnicodeText::const_iterator start,
UnicodeText::const_iterator end, string *result) {
static void UnicodeSubstring(const UnicodeText::const_iterator &start,
const UnicodeText::const_iterator &end,
string *result) {
result->clear();
result->append(start.utf8_data(), end.utf8_data() - start.utf8_data());
}
Expand Down Expand Up @@ -79,7 +80,7 @@ void AffixTable::Read(const AffixTableEntry &table_entry) {
const auto &affix_entry = table_entry.affix(affix_id);
CHECK_GE(affix_entry.length(), 0);
CHECK_LE(affix_entry.length(), max_length_);
CHECK(FindAffix(affix_entry.form()) == NULL); // forbid duplicates
CHECK(FindAffix(affix_entry.form()) == nullptr); // forbid duplicates
Affix *affix = AddNewAffix(affix_entry.form(), affix_entry.length());
CHECK_EQ(affix->id(), affix_id);
}
Expand Down Expand Up @@ -117,7 +118,7 @@ void AffixTable::Write(AffixTableEntry *table_entry) const {
affix_entry->set_form(affix->form());
affix_entry->set_length(affix->length());
affix_entry->set_shorter_id(
affix->shorter() == NULL ? -1 : affix->shorter()->id());
affix->shorter() == nullptr ? -1 : affix->shorter()->id());
}
}

Expand All @@ -137,7 +138,7 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {
// Determine longest affix.
int affix_len = length;
if (affix_len > max_length_) affix_len = max_length_;
if (affix_len == 0) return NULL;
if (affix_len == 0) return nullptr;

// Find start and end of longest affix.
UnicodeText::const_iterator start, end;
Expand All @@ -150,25 +151,25 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {
}

// Try to find successively shorter affixes.
Affix *top = NULL;
Affix *ancestor = NULL;
Affix *top = nullptr;
Affix *ancestor = nullptr;
string s;
while (affix_len > 0) {
// Try to find affix in table.
UnicodeSubstring(start, end, &s);
Affix *affix = FindAffix(s);
if (affix == NULL) {
if (affix == nullptr) {
// Affix not found, add new one to table.
affix = AddNewAffix(s, affix_len);

// Update ancestor chain.
if (ancestor != NULL) ancestor->set_shorter(affix);
if (ancestor != nullptr) ancestor->set_shorter(affix);
ancestor = affix;
if (top == NULL) top = affix;
if (top == nullptr) top = affix;
} else {
// Affix found. Update ancestor if needed and return match.
if (ancestor != NULL) ancestor->set_shorter(affix);
if (top == NULL) top = affix;
if (ancestor != nullptr) ancestor->set_shorter(affix);
if (top == nullptr) top = affix;
break;
}

Expand All @@ -187,15 +188,15 @@ Affix *AffixTable::AddAffixesForWord(const char *word, size_t size) {

Affix *AffixTable::GetAffix(int id) const {
if (id < 0 || id >= static_cast<int>(affixes_.size())) {
return NULL;
return nullptr;
} else {
return affixes_[id];
}
}

string AffixTable::AffixForm(int id) const {
Affix *affix = GetAffix(id);
if (affix == NULL) {
if (affix == nullptr) {
return "";
} else {
return affix->form();
Expand All @@ -204,7 +205,7 @@ string AffixTable::AffixForm(int id) const {

int AffixTable::AffixId(const string &form) const {
Affix *affix = FindAffix(form);
if (affix == NULL) {
if (affix == nullptr) {
return -1;
} else {
return affix->id();
Expand Down Expand Up @@ -234,11 +235,11 @@ Affix *AffixTable::FindAffix(const string &form) const {

// Try to find affix in hash table.
Affix *affix = buckets_[hash & (buckets_.size() - 1)];
while (affix != NULL) {
while (affix != nullptr) {
if (strcmp(affix->form_.c_str(), form.c_str()) == 0) return affix;
affix = affix->next_;
}
return NULL;
return nullptr;
}

void AffixTable::Resize(int size_hint) {
Expand All @@ -250,7 +251,7 @@ void AffixTable::Resize(int size_hint) {
// Distribute affixes in new buckets.
buckets_.resize(new_size);
for (size_t i = 0; i < buckets_.size(); ++i) {
buckets_[i] = NULL;
buckets_[i] = nullptr;
}
for (size_t i = 0; i < affixes_.size(); ++i) {
Affix *affix = affixes_[i];
Expand Down
12 changes: 8 additions & 4 deletions syntaxnet/syntaxnet/affix.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef $TARGETDIR_AFFIX_H_
#define $TARGETDIR_AFFIX_H_
#ifndef SYNTAXNET_AFFIX_H_
#define SYNTAXNET_AFFIX_H_

#include <stddef.h>
#include <string>
Expand All @@ -40,7 +40,11 @@ class Affix {
private:
friend class AffixTable;
Affix(int id, const char *form, int length)
: id_(id), length_(length), form_(form), shorter_(NULL), next_(NULL) {}
: id_(id),
length_(length),
form_(form),
shorter_(nullptr),
next_(nullptr) {}

public:
// Returns unique id of affix.
Expand Down Expand Up @@ -152,4 +156,4 @@ class AffixTable {

} // namespace syntaxnet

#endif // $TARGETDIR_AFFIX_H_
#endif // SYNTAXNET_AFFIX_H_
2 changes: 1 addition & 1 deletion syntaxnet/syntaxnet/arc_standard_transitions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ limitations under the License.

#include <string>

#include "syntaxnet/utils.h"
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/utils.h"
#include "tensorflow/core/lib/strings/strcat.h"

namespace syntaxnet {
Expand Down
6 changes: 3 additions & 3 deletions syntaxnet/syntaxnet/arc_standard_transitions_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ limitations under the License.

#include <memory>
#include <string>
#include <gmock/gmock.h>

#include "syntaxnet/utils.h"
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/populate_test_inputs.h"
#include "syntaxnet/sentence.pb.h"
#include "syntaxnet/task_context.h"
#include "syntaxnet/task_spec.pb.h"
#include "syntaxnet/term_frequency_map.h"
#include "syntaxnet/utils.h"
#include <gmock/gmock.h>

#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/test.h"
Expand Down
6 changes: 3 additions & 3 deletions syntaxnet/syntaxnet/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef $TARGETDIR_BASE_H_
#define $TARGETDIR_BASE_H_
#ifndef SYNTAXNET_BASE_H_
#define SYNTAXNET_BASE_H_

#include <functional>
#include <string>
Expand Down Expand Up @@ -50,4 +50,4 @@ using std::string;

// namespace syntaxnet

#endif // $TARGETDIR_BASE_H_
#endif // SYNTAXNET_BASE_H_
2 changes: 1 addition & 1 deletion syntaxnet/syntaxnet/beam_reader_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ limitations under the License.
#include "syntaxnet/base.h"
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/sentence_batch.h"
#include "syntaxnet/sentence.pb.h"
#include "syntaxnet/sentence_batch.h"
#include "syntaxnet/shared_store.h"
#include "syntaxnet/sparse.pb.h"
#include "syntaxnet/task_context.h"
Expand Down
2 changes: 1 addition & 1 deletion syntaxnet/syntaxnet/document_filters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class DocumentSource : public OpKernel {
mutex_lock lock(mu_);
Sentence *document;
vector<Sentence *> document_batch;
while ((document = corpus_->Read()) != NULL) {
while ((document = corpus_->Read()) != nullptr) {
document_batch.push_back(document);
if (static_cast<int>(document_batch.size()) == batch_size_) {
OutputDocuments(context, &document_batch);
Expand Down
Loading

0 comments on commit be7a899

Please sign in to comment.