update exported api and code

FiyZou · Nov 30, 2017 · c2ea8b3 · c2ea8b3
1 parent 468a368
commit c2ea8b3
Show file tree

Hide file tree

Showing 32 changed files with 111 additions and 111 deletions.
diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ var (
 
 func main() {
 	// Init
-	searcher.Init(types.EngineInitOptions{
+	searcher.Init(types.EngineOpts{
 		Using:             4,
 		NotUsingSegmenter: true})
 	defer searcher.Close()

diff --git a/README_zh.md b/README_zh.md
@@ -77,7 +77,7 @@ var (
 
 func main() {
 	// 初始化
-	searcher.Init(types.EngineInitOptions{
+	searcher.Init(types.EngineOpts{
 		Using:             3,
 		SegmenterDict: "zh",
 		// SegmenterDict: "your gopath"+"/src/github.com/go-ego/riot/data/dict/dictionary.txt",

diff --git a/core/indexer.go b/core/indexer.go
@@ -49,7 +49,7 @@ type Indexer struct {
 		removeCache        types.DocumentsId
 	}
 
-	initOptions types.IndexerInitOptions
+	initOptions types.IndexerOpts
 	initialized bool
 
 	// 这实际上是总文档数的一个近似
@@ -71,7 +71,7 @@ type KeywordIndices struct {
 }
 
 // Init 初始化索引器
-func (indexer *Indexer) Init(options types.IndexerInitOptions) {
+func (indexer *Indexer) Init(options types.IndexerOpts) {
 	if indexer.initialized == true {
 		log.Fatal("The Indexer can not be initialized twice.")
 	}

diff --git a/core/indexer_test.go b/core/indexer_test.go
@@ -9,7 +9,7 @@ import (
 
 func TestAddKeywords(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId:    1,
 		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
@@ -44,7 +44,7 @@ func TestAddKeywords(t *testing.T) {
 
 func TestRemoveDoc(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 
 	// doc1 = "token2 token3"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
@@ -128,7 +128,7 @@ func TestRemoveDoc(t *testing.T) {
 
 func TestLookupLocationsIndex(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 	// doc1 = "token2 token3"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId: 1,
@@ -208,7 +208,7 @@ func TestLookupLocationsIndex(t *testing.T) {
 
 func TestLookupDocIdsIndex(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.DocIdsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.DocIdsIndex})
 	// doc1 = "token2 token3"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId: 1,
@@ -288,7 +288,7 @@ func TestLookupDocIdsIndex(t *testing.T) {
 
 func TestLookupWithProximity(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 
 	// doc1 = "token2 token4 token4 token2 token3 token4"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
@@ -329,7 +329,7 @@ func TestLookupWithProximity(t *testing.T) {
 
 func TestLookupWithPartialLocations(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 	// doc1 = "token2 token4 token4 token2 token3 token4" + "label1"(不在文本中)
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId: 1,
@@ -358,7 +358,7 @@ func TestLookupWithPartialLocations(t *testing.T) {
 
 func TestLookupWithBM25(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{
+	indexer.Init(types.IndexerOpts{
 		IndexType: types.FrequenciesIndex,
 		BM25Parameters: &types.BM25Parameters{
 			K1: 1,
@@ -393,7 +393,7 @@ func TestLookupWithBM25(t *testing.T) {
 
 func TestLookupWithinDocIds(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 	// doc1 = "token2 token3"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId: 1,
@@ -436,7 +436,7 @@ func TestLookupWithinDocIds(t *testing.T) {
 
 func TestLookupWithLocations(t *testing.T) {
 	var indexer Indexer
-	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
+	indexer.Init(types.IndexerOpts{IndexType: types.LocationsIndex})
 	// doc1 = "token2 token4 token4 token2 token3 token4"
 	indexer.AddDocumentToCache(&types.DocumentIndex{
 		DocId: 1,

diff --git a/data/main.go b/data/main.go
@@ -28,7 +28,7 @@ var (
 
 func main() {
 	// Init searcher
-	searcher.Init(types.EngineInitOptions{
+	searcher.Init(types.EngineOpts{
 		Using:         4,
 		SegmenterDict: "./dict/dictionary.txt"})
 	defer searcher.Close()

diff --git a/docs/en/benchmarking.md b/docs/en/benchmarking.md
@@ -14,7 +14,7 @@ Changing the NumShards variable in the test program can change the number of sin
 
 The index item here refers to a non-repeating "search key" - "document" pair. For example, when there are N different search keys in a document, the document generates N index items.
 
-The program uses 8 shards by default, and you can change this value when initializing the engine based on your specific requirements, see [types.EngineInitOptions.NumShards](/types/engine_init_options.go)
+The program uses 8 shards by default, and you can change this value when initializing the engine based on your specific requirements, see [types.EngineOpts.NumShards](/types/engine_init_options.go)
 
 # performance analysis
 

diff --git a/docs/en/bm25.md b/docs/en/bm25.md
@@ -6,14 +6,14 @@ BM25 is a search engine's classic sorting function that measures the relevance o
     BM25 = sum ----------------------------
                TF + k1 * (1 - b + b * D / L)
 
-Sum sum of all keywords, TF (term frequency) for a keyword in the document appear in the frequency, D is the number of words in the document, L is the average number of words in all documents, k1 and b are constants, in the The riot defaults to 2.0 and 0.75, but it can be at engine initialization in the  [EngineInitOptions.IndexerInitOptions.BM25Parameters](/types/indexer_init_options.go) amendment. IDF (inverse document frequency) measure keywords are common, riot engine using a smooth IDF formula
+Sum sum of all keywords, TF (term frequency) for a keyword in the document appear in the frequency, D is the number of words in the document, L is the average number of words in all documents, k1 and b are constants, in the The riot defaults to 2.0 and 0.75, but it can be at engine initialization in the  [EngineOpts.IndexerOpts.BM25Parameters](/types/indexer_init_options.go) amendment. IDF (inverse document frequency) measure keywords are common, riot engine using a smooth IDF formula
 
                 The total number of documents
     IDF = log2( ------------------------  + 1 )
             The number of documents that appear for this keyword
 # Use
 
-Indexer is responsible for calculating the BM25, in order to be able to calculate the BM25 value of the document, you must save the word frequency of all the keywords in the document, which needs [EngineInitOptions.IndexerInitOptions.IndexType] (/types/indexer_init_options.go) at engine initialization to at least FrequenciesIndex (LocationsIndex also calculates BM25, but this index also holds where words appear and consumes more memory).
+Indexer is responsible for calculating the BM25, in order to be able to calculate the BM25 value of the document, you must save the word frequency of all the keywords in the document, which needs [EngineOpts.IndexerOpts.IndexType] (/types/indexer_init_options.go) at engine initialization to at least FrequenciesIndex (LocationsIndex also calculates BM25, but this index also holds where words appear and consumes more memory).
 
 Then you can call IndexedDocument in your [Custom Scoring Rules] (/docs/en/ custom_scoring_criteria.md). BM25 gets this value as the scoring data. If you want to rely entirely on the BM25 score, you can use the default rating rule, which is RankByBM25.
 

diff --git a/docs/en/codelab.md b/docs/en/codelab.md
@@ -64,17 +64,17 @@ The first package defines the engine function, the second package defines the co
 
 ```go
 var searcher riot.Engine
-searcher.Init(types.EngineInitOptions{
+searcher.Init(types.EngineOpts{
 	SegmenterDict: "../../data/dict/dictionary.txt",
 	StopTokenFile:         "../../data/dict/stop_tokens.txt",
-	IndexerInitOptions: &types.IndexerInitOptions{
+	IndexerOpts: &types.IndexerOpts{
 		IndexType: types.LocationsIndex,
 	},
 })
 ```
-[types.EngineInitOptions](/types/engine_init_options.go) defines parameters that need to be set by the initialization engine, such as where to load the word dictionary file, stop word list, indexer type, BM25 parameters, etc., as well as the default scoring rules (see the "Search" section) and Output Paging Option. Please read the structure of the code for details.
+[types.EngineOpts](/types/engine_init_options.go) defines parameters that need to be set by the initialization engine, such as where to load the word dictionary file, stop word list, indexer type, BM25 parameters, etc., as well as the default scoring rules (see the "Search" section) and Output Paging Option. Please read the structure of the code for details.
 
-In particular, it should be emphasized that please carefully choose IndexerInitOptions.IndexType types, there are three different types of index table:
+In particular, it should be emphasized that please carefully choose IndexerOpts.IndexType types, there are three different types of index table:
 
 1. DocIdsIndex, provides the most basic index, only record the document key docid appears.
 2. FrequenciesIndex, in addition to record docid, but also save the search key appear in each document frequency, if you need BM25 then FrequenciesIndex is what you need.

diff --git a/docs/en/persistent_storage.md b/docs/en/persistent_storage.md
@@ -1,10 +1,10 @@
 Persistent storage
 ====
 
-The riot engine supports saving search data to the hard drive and restoring data from the hard drive when the machine restarts. Just use persistent storage to set the three options in EngineInitOptions:
+The riot engine supports saving search data to the hard drive and restoring data from the hard drive when the machine restarts. Just use persistent storage to set the three options in EngineOpts:
 
 ```go
-type EngineInitOptions struct {
+type EngineOpts struct {
   // Skip other options
 
   // Whether to use persistent databases, and the number of directories and splits that database files hold

diff --git a/docs/en/segmenter.md b/docs/en/segmenter.md
@@ -1,7 +1,7 @@
 ## Word segmentation rules:
 
 ```Go
-types.EngineInitOptions{
+types.EngineOpts{
 		Using:         4,
 }
 ```

diff --git a/docs/en/token_proximity.md b/docs/en/token_proximity.md
@@ -13,4 +13,4 @@ The specific calculation process is to take a P_1 first, calculate the smallest
 
 See the computeTokenProximity function in [core / indexer.go] (/ core / indexer.go) for implementation.
 
-Close distance calculation need to save each word position in the indexer, which requires additional memory consumption, it is off by default, open this function, please set EngineInitOptions.IndexerInitOptions.IndexType LocationsIndex initialization engine.
+Close distance calculation need to save each word position in the indexer, which requires additional memory consumption, it is off by default, open this function, please set EngineOpts.IndexerOpts.IndexType LocationsIndex initialization engine.
diff --git a/docs/zh/benchmarking.md b/docs/zh/benchmarking.md
@@ -14,7 +14,7 @@
 
 这里的索引项是指一个不重复的“搜索键”-“文档”对，比如当一个文档中有N个不一样的搜索键时，该文档会产生N个索引项。
 
-程序默认使用 8个 shard，你可以根据具体的需求在初始化引擎时改变这个值，见[types.EngineInitOptions.NumShards](/types/engine_init_options.go)
+程序默认使用 8个 shard，你可以根据具体的需求在初始化引擎时改变这个值，见[types.EngineOpts.NumShards](/types/engine_init_options.go)
 
 # 性能分析
 

diff --git a/docs/zh/bm25.md b/docs/zh/bm25.md
@@ -6,13 +6,13 @@ BM25 是搜索引擎的经典排序函数，用于衡量一组关键词和某文
     BM25 = sum ----------------------------
                TF + k1 * (1 - b + b * D / L)
 
-其中 sum 对所有关键词求和，TF（term frequency）为某关键词在该文档中出现的词频，D 为该文档的词数，L为所有文档的平均词数，k1和b为常数，在 riot 里默认值为 2.0 和 0.75，不过可以在引擎初始化的时候在 [EngineInitOptions.IndexerInitOptions.BM25Parameters](/types/indexer_init_options.go) 中修改。IDF（inverse document frequency）衡量关键词是否常见，riot 引擎使用带平滑的 IDF 公式
+其中 sum 对所有关键词求和，TF（term frequency）为某关键词在该文档中出现的词频，D 为该文档的词数，L为所有文档的平均词数，k1和b为常数，在 riot 里默认值为 2.0 和 0.75，不过可以在引擎初始化的时候在 [EngineOpts.IndexerOpts.BM25Parameters](/types/indexer_init_options.go) 中修改。IDF（inverse document frequency）衡量关键词是否常见，riot 引擎使用带平滑的 IDF 公式
 
                        总文档数目
     IDF = log2( ------------------------  + 1 )
                  出现该关键词的文档数目
 # 使用
 
-索引器负责计算 BM25，为了能计算文档的 BM25 值，必须保存文档中所有关键词的词频，这需要在引擎初始化时将[EngineInitOptions.IndexerInitOptions.IndexType](/types/indexer_init_options.go)至少设置为 FrequenciesIndex（LocationsIndex也可计算 BM25，但这种索引也保存词出现的位置，消耗更多内存）。
+索引器负责计算 BM25，为了能计算文档的 BM25 值，必须保存文档中所有关键词的词频，这需要在引擎初始化时将[EngineOpts.IndexerOpts.IndexType](/types/indexer_init_options.go)至少设置为 FrequenciesIndex（LocationsIndex也可计算 BM25，但这种索引也保存词出现的位置，消耗更多内存）。
 
 然后你可以在你[自定义的评分规则](/docs/zh/custom_scoring_criteria.md)中调用 IndexedDocument. BM25 得到此值作为评分数据。如果你想完全依赖 BM25 评分，可以使用默认的评分规则，既 RankByBM25。
diff --git a/docs/zh/codelab.md b/docs/zh/codelab.md
@@ -64,17 +64,17 @@ import (
 
 ```go
 var searcher riot.Engine
-searcher.Init(types.EngineInitOptions{
+searcher.Init(types.EngineOpts{
 	SegmenterDict: "../../data/dict/dictionary.txt",
 	StopTokenFile:         "../../data/dict/stop_tokens.txt",
-	IndexerInitOptions: &types.IndexerInitOptions{
+	IndexerOpts: &types.IndexerOpts{
 		IndexType: types.LocationsIndex,
 	},
 })
 ```
-[types.EngineInitOptions](/types/engine_init_options.go) 定义了初始化引擎需要设定的参数，比如从何处载入分词字典文件，停用词列表，索引器类型，BM25 参数等，以及默认的评分规则（见“搜索”一节）和输出分页选项。具体细节请阅读代码中结构体的注释。
+[types.EngineOpts](/types/engine_init_options.go) 定义了初始化引擎需要设定的参数，比如从何处载入分词字典文件，停用词列表，索引器类型，BM25 参数等，以及默认的评分规则（见“搜索”一节）和输出分页选项。具体细节请阅读代码中结构体的注释。
 
-特别需要强调的是请慎重选择 IndexerInitOptions.IndexType 的类型，共有三种不同类型的索引表：
+特别需要强调的是请慎重选择 IndexerOpts.IndexType 的类型，共有三种不同类型的索引表：
 
 1. DocIdsIndex，提供了最基本的索引，仅仅记录搜索键出现的文档 docid。
 2. FrequenciesIndex，除了记录 docid 外，还保存了搜索键在每个文档中出现的频率，如果你需要BM25那么 FrequenciesIndex 是你需要的。

diff --git a/docs/zh/persistent_storage.md b/docs/zh/persistent_storage.md
@@ -1,10 +1,10 @@
 持久存储
 ====
 
-riot 引擎支持将搜索数据存入硬盘，并在当机重启动时从硬盘恢复数据。使用持久存储只需设置EngineInitOptions 中的三个选项：
+riot 引擎支持将搜索数据存入硬盘，并在当机重启动时从硬盘恢复数据。使用持久存储只需设置EngineOpts 中的三个选项：
 
 ```go
-type EngineInitOptions struct {
+type EngineOpts struct {
   // 略过其他选项
 
   // 是否使用持久数据库，以及数据库文件保存的目录和裂分数目

diff --git a/docs/zh/segmenter.md b/docs/zh/segmenter.md
@@ -1,7 +1,7 @@
 ## 分词规则:
 
 ```Go
-types.EngineInitOptions{
+types.EngineOpts{
 		Using:         4,
 }
 ```

diff --git a/docs/zh/token_proximity.md b/docs/zh/token_proximity.md
@@ -13,4 +13,4 @@ N 关键词的紧邻距离计算公式如下：
 
 具体实现见[core/indexer.go](/core/indexer.go) 文件中 computeTokenProximity 函数。
 
-紧邻距离计算需要在索引器中保存每个分词的位置，这需要额外消耗内存，因此是默认关闭的，打开这一功能请在引擎初始化时设定 EngineInitOptions.IndexerInitOptions.IndexType 为  LocationsIndex。
+紧邻距离计算需要在索引器中保存每个分词的位置，这需要额外消耗内存，因此是默认关闭的，打开这一功能请在引擎初始化时设定 EngineOpts.IndexerOpts.IndexType 为  LocationsIndex。
diff --git a/engine.go b/engine.go
@@ -68,7 +68,7 @@ type Engine struct {
 	numDocumentsStored       uint64
 
 	// 记录初始化参数
-	initOptions types.EngineInitOptions
+	initOptions types.EngineOpts
 	initialized bool
 
 	indexers   []core.Indexer
@@ -94,7 +94,7 @@ type Engine struct {
 }
 
 // Indexer initialize the indexer channel
-func (engine *Engine) Indexer(options types.EngineInitOptions) {
+func (engine *Engine) Indexer(options types.EngineOpts) {
 	engine.indexerAddDocChannels = make(
 		[]chan indexerAddDocumentRequest, options.NumShards)
 	engine.indexerRemoveDocChannels = make(
@@ -115,7 +115,7 @@ func (engine *Engine) Indexer(options types.EngineInitOptions) {
 }
 
 // Ranker initialize the ranker channel
-func (engine *Engine) Ranker(options types.EngineInitOptions) {
+func (engine *Engine) Ranker(options types.EngineOpts) {
 	engine.rankerAddDocChannels = make(
 		[]chan rankerAddDocRequest, options.NumShards)
 	engine.rankerRankChannels = make(
@@ -219,7 +219,7 @@ func (engine *Engine) Storage() {
 }
 
 // Init initialize the engine
-func (engine *Engine) Init(options types.EngineInitOptions) {
+func (engine *Engine) Init(options types.EngineOpts) {
 	// 将线程数设置为CPU数
 	// runtime.GOMAXPROCS(runtime.NumCPU())
 	// runtime.GOMAXPROCS(128)
@@ -243,7 +243,7 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 	// 初始化索引器和排序器
 	for shard := 0; shard < options.NumShards; shard++ {
 		engine.indexers = append(engine.indexers, core.Indexer{})
-		engine.indexers[shard].Init(*options.IndexerInitOptions)
+		engine.indexers[shard].Init(*options.IndexerOpts)
 
 		engine.rankers = append(engine.rankers, core.Ranker{})
 		engine.rankers[shard].Init(options.OnlyID)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,4 @@ The specific calculation process is to take a P_1 first, calculate the smallest

		See the computeTokenProximity function in [core / indexer.go] (/ core / indexer.go) for implementation.

		Close distance calculation need to save each word position in the indexer, which requires additional memory consumption, it is off by default, open this function, please set EngineInitOptions.IndexerInitOptions.IndexType LocationsIndex initialization engine.
		Close distance calculation need to save each word position in the indexer, which requires additional memory consumption, it is off by default, open this function, please set EngineOpts.IndexerOpts.IndexType LocationsIndex initialization engine.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,4 @@ N 关键词的紧邻距离计算公式如下：

		具体实现见[core/indexer.go](/core/indexer.go) 文件中 computeTokenProximity 函数。

		紧邻距离计算需要在索引器中保存每个分词的位置，这需要额外消耗内存，因此是默认关闭的，打开这一功能请在引擎初始化时设定 EngineInitOptions.IndexerInitOptions.IndexType 为 LocationsIndex。
		紧邻距离计算需要在索引器中保存每个分词的位置，这需要额外消耗内存，因此是默认关闭的，打开这一功能请在引擎初始化时设定 EngineOpts.IndexerOpts.IndexType 为 LocationsIndex。