Skip to content

Commit

Permalink
Added term frequency field to TVTerm
Browse files Browse the repository at this point in the history
* You can now found out the frequency of a term in a document without having to
  store offsets or positions.
  • Loading branch information
dbalmain committed Jun 22, 2008
1 parent 9756496 commit a249f60
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 15 deletions.
4 changes: 2 additions & 2 deletions ruby/ext/r_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ frb_get_tv_term(TVTerm *tv_term)
}
RARRAY(rpositions)->len = freq;
}
return rb_struct_new(cTVTerm, rtext, rpositions, NULL);
return rb_struct_new(cTVTerm, rtext, INT2FIX(freq), rpositions, NULL);
}

/****************************************************************************
Expand Down Expand Up @@ -3072,7 +3072,7 @@ Init_TVTerm(void)
/* rdochack
cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
*/
cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL);
cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL);
rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
}
Expand Down
2 changes: 1 addition & 1 deletion ruby/lib/ferret/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def term_vector(id, field)
if id.kind_of?(String) or id.kind_of?(Symbol)
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
if term_doc_enum.next?
id = @reader[term_doc_enum.doc]
id = term_doc_enum.doc
else
return nil
end
Expand Down
24 changes: 12 additions & 12 deletions ruby/test/unit/index/tc_index_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ def do_test_term_docpos_enum_skip_to(tde)
def do_test_term_vectors()
expected_tv = TermVector.new(:body,
[
TVTerm.new("word1", [2, 4, 7]),
TVTerm.new("word2", [3]),
TVTerm.new("word3", [0, 5, 8, 9]),
TVTerm.new("word4", [1, 6])
TVTerm.new("word1", 3, [2, 4, 7]),
TVTerm.new("word2", 1, [3]),
TVTerm.new("word3", 4, [0, 5, 8, 9]),
TVTerm.new("word4", 2, [1, 6])
],
[*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})

Expand All @@ -209,13 +209,13 @@ def do_test_term_vectors()

tv = tvs[:author]
assert_equal(:author, tv.field)
assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms)
assert(tv.offsets.nil?)


tv = tvs[:title]
assert_equal(:title, tv.field)
assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms)
assert_equal([TVOffsets.new(0, 13)], tv.offsets)
end

Expand Down Expand Up @@ -608,10 +608,10 @@ def test_ir_multivalue_fields()
def do_test_term_vectors(ir)
expected_tv = TermVector.new(:body,
[
TVTerm.new("word1", [2, 4, 7]),
TVTerm.new("word2", [3]),
TVTerm.new("word3", [0, 5, 8, 9]),
TVTerm.new("word4", [1, 6])
TVTerm.new("word1", 3, [2, 4, 7]),
TVTerm.new("word2", 1, [3]),
TVTerm.new("word3", 4, [0, 5, 8, 9]),
TVTerm.new("word4", 2, [1, 6])
],
[*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})

Expand All @@ -626,13 +626,13 @@ def do_test_term_vectors(ir)

tv = tvs[:author]
assert_equal(:author, tv.field)
assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms)
assert(tv.offsets.nil?)


tv = tvs[:title]
assert_equal(:title, tv.field)
assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms)
assert_equal([TVOffsets.new(0, 13)], tv.offsets)
end

Expand Down

0 comments on commit a249f60

Please sign in to comment.