Skip to content

Commit

Permalink
Merge pull request isnowfy#4 from erning/python3
Browse files Browse the repository at this point in the history
compatible with python-3.3
  • Loading branch information
isnowfy committed Dec 9, 2013
2 parents 5f245d1 + 0d3c994 commit 95a4503
Show file tree
Hide file tree
Showing 14 changed files with 37 additions and 24 deletions.
7 changes: 6 additions & 1 deletion snownlp/classification/bayes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import sys
import marshal
from math import log, exp

Expand All @@ -19,13 +20,17 @@ def save(self, fname):
d['d'] = {}
for k, v in self.d.iteritems():
d['d'][k] = v.__dict__
if sys.version_info.major == 3:
fname = fname + '.3'
marshal.dump(d, open(fname, 'wb'))

def load(self, fname):
if sys.version_info.major == 3:
fname = fname + '.3'
d = marshal.load(open(fname, 'rb'))
self.total = d['total']
self.d = {}
for k, v in d['d'].iteritems():
for k, v in d['d'].items():
self.d[k] = AddOneProb()
self.d[k].__dict__ = v

Expand Down
4 changes: 2 additions & 2 deletions snownlp/normal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
import codecs

import zh
from . import zh

stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'stopwords.txt')
Expand All @@ -25,7 +25,7 @@


def filter_stop(words):
return filter(lambda x: x not in stop, words)
return list(filter(lambda x: x not in stop, words))


def zh2hans(sent):
Expand Down
3 changes: 2 additions & 1 deletion snownlp/normal/zh.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals

zh2hans = {
Expand Down Expand Up @@ -3242,4 +3243,4 @@ def transfer(sentence):


if __name__ == '__main__':
print transfer('飛機飛向藍天')
print(transfer('飛機飛向藍天'))
2 changes: 1 addition & 1 deletion snownlp/seg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re

import seg as TnTseg
from . import seg as TnTseg

data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'seg.marshal')
Expand Down
Binary file added snownlp/seg/seg.marshal.3
Binary file not shown.
4 changes: 2 additions & 2 deletions snownlp/seg/seg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import unicode_literals

import codecs
Expand Down Expand Up @@ -46,4 +46,4 @@ def seg(self, sentence):
if __name__ == '__main__':
seg = Seg()
seg.train('data.txt')
print ' '.join(seg.seg('主要是用来放置一些简单快速的中文分词和词性标注的程序'))
print(' '.join(seg.seg('主要是用来放置一些简单快速的中文分词和词性标注的程序')))
Binary file added snownlp/sentiment/sentiment.marshal.3
Binary file not shown.
4 changes: 2 additions & 2 deletions snownlp/sim/bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def init(self):
tmp[word] = 0
tmp[word] += 1
self.f.append(tmp)
for k, v in tmp.iteritems():
for k, v in tmp.items():
if k not in self.df:
self.df[k] = 0
self.df[k] += 1
for k, v in self.df.iteritems():
for k, v in self.df.items():
self.idf[k] = math.log(self.D-v+0.5)-math.log(v+0.5)

def sim(self, doc, index):
Expand Down
12 changes: 6 additions & 6 deletions snownlp/summary/textrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def solve(self):
self.top = sorted(self.top, key=lambda x: x[1], reverse=True)

def top_index(self, limit):
return map(lambda x: x[0], self.top)[:limit]
return list(map(lambda x: x[0], self.top))[:limit]

def top(self, limit):
return map(lambda x: self.docs[x[0]], self.top)
return list(map(lambda x: self.docs[x[0]], self.top))


class KeywordTextRank(object):
Expand Down Expand Up @@ -79,7 +79,7 @@ def solve(self):
for _ in range(self.max_iter):
m = {}
max_diff = 0
for k, v in self.words.iteritems():
for k, v in self.words.items():
m[k] = 1-self.d
for j in v:
if k == j or len(self.words[j]) == 0:
Expand All @@ -90,11 +90,11 @@ def solve(self):
self.vertex = m
if max_diff <= self.min_diff:
break
self.top = list(self.vertex.iteritems())
self.top = list(self.vertex.items())
self.top = sorted(self.top, key=lambda x: x[1], reverse=True)

def top_index(self, limit):
return map(lambda x: x[0], self.top)[:limit]
return list(map(lambda x: x[0], self.top))[:limit]

def top(self, limit):
return map(lambda x: self.docs[x[0]], self.top)
return list(map(lambda x: self.docs[x[0]], self.top))
Binary file added snownlp/tag/tag.marshal.3
Binary file not shown.
4 changes: 2 additions & 2 deletions snownlp/utils/frequency.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-

import good_turing
from . import good_turing

class BaseProb(object):

def __init__(self):
self.d = {}
self.total = 0.0
Expand Down
4 changes: 2 additions & 2 deletions snownlp/utils/good_turing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import division
from math import log, exp

Expand Down Expand Up @@ -48,4 +48,4 @@ def main(dic):
return nr[0]/total/total, dict(zip(dic.keys(), map(lambda x:prob[rr[x]], dic.values())))

if __name__ == '__main__':
print main({1:1,2:1,3:1,4:2,5:2,6:3,7:1,8:2,9:3})
print(main({1:1,2:1,3:1,4:2,5:2,6:3,7:1,8:2,9:3}))
12 changes: 9 additions & 3 deletions snownlp/utils/tnt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
'''
Implementation of 'TnT - A Statisical Part of Speech Tagger'
'''
from __future__ import unicode_literals

import sys
import heapq
import marshal
from math import log

import frequency
from . import frequency


class TnT(object):
Expand Down Expand Up @@ -37,11 +39,15 @@ def save(self, fname):
d[k] = v.__dict__
else:
d[k] = v
if sys.version_info.major == 3:
fname = fname + '.3'
marshal.dump(d, open(fname, 'wb'))

def load(self, fname):
if sys.version_info.major == 3:
fname = fname + '.3'
d = marshal.load(open(fname, 'rb'))
for k, v in d.iteritems():
for k, v in d.items():
if isinstance(self.__dict__[k], set):
self.__dict__[k] = set(v)
elif hasattr(self.__dict__[k], '__dict__'):
Expand Down Expand Up @@ -121,7 +127,7 @@ def tag(self, data):
if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
s)][0]:
stage[(pre[0][1], s)] = (p, pre[2]+[s])
stage = map(lambda x: (x[0], x[1][0], x[1][1]), stage.items())
stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
return zip(data, now[0][2])
5 changes: 3 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals

text = '''
Expand Down Expand Up @@ -54,8 +55,8 @@
rank = textrank.TextRank(doc)
rank.solve()
for index in rank.top_index(5):
print sents[index]
print(sents[index])
keyword_rank = textrank.KeywordTextRank(doc)
keyword_rank.solve()
for w in keyword_rank.top_index(5):
print w
print(w)

0 comments on commit 95a4503

Please sign in to comment.