Skip to content

Commit

Permalink
切词脚本
Browse files Browse the repository at this point in the history
  • Loading branch information
hq20051252 committed Mar 17, 2015
1 parent cf649d1 commit 07b729a
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion words/file_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,19 @@ def main():
print "Start cut word."
start = time.time()

progress = 0
for line in fd.xreadlines():
fo.write(" ".join(jieba.cut(line)).encode("utf-8"))

progress += 1
sys.stdout.write("Process line %d." % progress)

tokens = jieba.cut(line)
res = " ".join(tokens).encode("utf-8")
fo.write(res)

end = time.time()
time_cost = end - start
print "\n"
print "Cost time %s." % time_cost
print "Process file completely."

Expand Down

0 comments on commit 07b729a

Please sign in to comment.