Skip to content

Commit

Permalink
Preparing for release.
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Jun 22, 2018
1 parent e301e0b commit badfce3
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 21 deletions.
17 changes: 13 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
Tantivy 0.6
==========================
- Removed C code. Tantivy is now pure Rust.
- BM25
- Approximate field norms encoded over 1 byte.
- Compiles on stable rust


Special thanks to @drusellers and @jason-wolfe for their contributions
to this release!

- Removed C code. Tantivy is now pure Rust. (@pmasurel)
- BM25 (@pmasurel)
- Approximate field norms encoded over 1 byte. (@pmasurel)
- Compiles on stable rust (@pmasurel)
- Add &[u8] fastfield for associating arbitrary bytes to each document (@jason-wolfe) (#270)
- Completely uncompressed
- Internally: One u64 fast field for indexes, one fast field for the bytes themselves.
- Add NGram token support (@drusellers)
- Add Stopword Filter support (@drusellers)
- Add a FuzzyTermQuery (@drusellers)
- Add a RegexQuery (@drusellers)
- Various performance improvements (@pmasurel)_


Tantivy 0.5.2
===========================
Expand Down
4 changes: 1 addition & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ stable_deref_trait = "1.0.0"
rust-stemmers = "0.1.0"
downcast = { version="0.9" }
matches = "0.1"
bitpacking = "0.4"
bitpacking = "0.5"
fnv = "1.0.6"

[target.'cfg(windows)'.dependencies]
Expand All @@ -62,9 +62,7 @@ debug-assertions = false

[features]
default = ["mmap"]
simd = ["bitpacking/simd"]
mmap = ["fst/mmap", "atomicwrites"]
unstable = ["simd"]
lz4-compression = ["lz4"]

[badges]
Expand Down
6 changes: 3 additions & 3 deletions src/collector/facet_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

let mut index_writer = index.writer(3_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(1,3_000_000).unwrap();
let num_facets: usize = 3 * 4 * 5;
let facets: Vec<Facet> = (0..num_facets)
.map(|mut n| {
Expand Down Expand Up @@ -587,7 +587,7 @@ mod tests {
.collect();
thread_rng().shuffle(&mut docs[..]);

let mut index_writer = index.writer(3_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(1,3_000_000).unwrap();
for doc in docs {
index_writer.add_document(doc);
}
Expand Down Expand Up @@ -644,7 +644,7 @@ mod bench {
// 40425 docs
thread_rng().shuffle(&mut docs[..]);

let mut index_writer = index.writer(3_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(1,3_000_000).unwrap();
for doc in docs {
index_writer.add_document(doc);
}
Expand Down
26 changes: 20 additions & 6 deletions src/core/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use num_cpus;
use std::path::Path;
use tokenizer::TokenizerManager;
use IndexWriter;
use indexer::index_writer::HEAP_SIZE_MIN;

const NUM_SEARCHERS: usize = 12;

Expand Down Expand Up @@ -136,31 +137,44 @@ impl Index {
/// `IndexWriter` on the system is accessing the index directory,
/// it is safe to manually delete the lockfile.
///
/// num_threads specifies the number of indexing workers that
/// - `num_threads` defines the number of indexing workers that
/// should work at the same time.
///
/// - `overall_heap_size_in_bytes` sets the amount of memory
/// allocated for all indexing thread.
/// Each thread will receive a budget of `overall_heap_size_in_bytes / num_threads`.
///
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
/// # Panics
/// If the heap size per thread is too small, panics.
pub fn writer_with_num_threads(
&self,
num_threads: usize,
heap_size_in_bytes: usize,
overall_heap_size_in_bytes: usize,
) -> Result<IndexWriter> {
let directory_lock = DirectoryLock::lock(self.directory().box_clone())?;
open_index_writer(self, num_threads, heap_size_in_bytes, directory_lock)
let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
open_index_writer(self, num_threads, heap_size_in_bytes_per_thread, directory_lock)
}

/// Creates a multithreaded writer
/// It just calls `writer_with_num_threads` with the number of cores as `num_threads`
///
/// Tantivy will automatically define the number of threads to use.
/// `overall_heap_size_in_bytes` is the total target memory usage that will be split
/// between a given number of threads.
///
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
/// # Panics
/// If the heap size per thread is too small, panics.
pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
pub fn writer(&self, overall_heap_size_in_bytes: usize) -> Result<IndexWriter> {
let mut num_threads = num_cpus::get();
let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN {
num_threads = (overall_heap_size_in_bytes / HEAP_SIZE_MIN).max(1);
}
self.writer_with_num_threads(num_threads, overall_heap_size_in_bytes)
}

/// Accessor to the index schema
Expand Down
8 changes: 4 additions & 4 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ mod tests {

{
// writing the segment
let mut index_writer = index.writer_with_num_threads(3, 3_000_000).unwrap();
let mut index_writer = index.writer(3_000_000).unwrap();
index_writer.add_document(doc!(text_field=>"a"));
index_writer.rollback().unwrap();

Expand Down Expand Up @@ -745,7 +745,7 @@ mod tests {
};
{
// writing the segment
let mut index_writer = index.writer_with_num_threads(4, 3_000_000).unwrap();
let mut index_writer = index.writer(12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
let mut doc = Document::default();
Expand Down Expand Up @@ -779,7 +779,7 @@ mod tests {

{
// writing the segment
let mut index_writer = index.writer_with_num_threads(4, 3_000_000).unwrap();
let mut index_writer = index.writer(12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
Expand Down Expand Up @@ -814,7 +814,7 @@ mod tests {

{
// writing the segment
let mut index_writer = index.writer_with_num_threads(4, 3_000_000).unwrap();
let mut index_writer = index.writer_with_num_threads(4, 12_000_000).unwrap();
// create 8 segments with 100 tiny docs
for _doc in 0..100 {
index_writer.add_document(doc!(text_field => "a"));
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ mod compression;
mod core;
mod indexer;

#[allow(unused_doc_comment)]
#[allow(unused_doc_comments)]
mod error;
pub mod tokenizer;

Expand Down

0 comments on commit badfce3

Please sign in to comment.