Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
isnowfy committed Sep 27, 2015
1 parent 262aa10 commit 3c1c528
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def read(fname):

setup(
name='snownlp',
version='0.12.2',
version='0.12.3',
description='Python library for processing Chinese text',
author='isnowfy',
url='https://github.com/isnowfy/snownlp',
Expand Down
15 changes: 14 additions & 1 deletion snownlp/seg/y09_2047.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def train(self, data):
tl1 = 0.0
tl2 = 0.0
tl3 = 0.0
for now in self.tri.samples():
samples = sorted(self.tri.samples(), key=lambda x: self.tri.get(x)[1])
for now in samples:
c3 = self.div(self.tri.get(now)[1]-1, self.bi.get(now[:2])[1]-1)
c2 = self.div(self.bi.get(now[1:])[1]-1, self.uni.get(now[1])[1]-1)
c1 = self.div(self.uni.get(now[2])[1]-1, self.uni.getsum()-1)
Expand All @@ -101,6 +102,18 @@ def tag(self, data):
now = [((('', 'BOS'), ('', 'BOS')), 0.0, [])]
for w in data:
stage = {}
not_found = True
for s in self.status:
if self.uni.freq((w, s)) != 0:
not_found = False
break
if not_found:
for s in self.status:
for pre in now:
stage[(pre[0][1], (w, s))] = (pre[1], pre[2]+[s])
now = list(map(lambda x: (x[0], x[1][0], x[1][1]),
stage.items()))
continue
for s in self.status:
for pre in now:
p = pre[1]+self.log_prob(pre[0][0], pre[0][1], (w, s))
Expand Down
20 changes: 13 additions & 7 deletions snownlp/summary/textrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,20 @@ def solve(self):
for _ in range(self.max_iter):
m = {}
max_diff = 0
for k, v in self.words.items():
m[k] = 1-self.d
for j in v:
if k == j or len(self.words[j]) == 0:
tmp = filter(lambda x: len(self.words[x[0]]) > 0,
self.vertex.items())
tmp = sorted(tmp, key=lambda x: x[1] / len(self.words[x[0]]))
for k, v in tmp:
for j in self.words[k]:
if k == j:
continue
m[k] += (self.d/len(self.words[j])*self.vertex[j])
if abs(m[k] - self.vertex[k]) > max_diff:
max_diff = abs(m[k] - self.vertex[k])
if j not in m:
m[j] = 1 - self.d
m[j] += (self.d / len(self.words[k]) * self.vertex[k])
for k in self.vertex:
if k in m and k in self.vertex:
if abs(m[k] - self.vertex[k]) > max_diff:
max_diff = abs(m[k] - self.vertex[k])
self.vertex = m
if max_diff <= self.min_diff:
break
Expand Down

0 comments on commit 3c1c528

Please sign in to comment.