Merge pull request isnowfy#4 from erning/python3

compatible with python-3.3
kqij2015 · Dec 9, 2013 · 95a4503 · 95a4503
2 parents 5f245d1 + 0d3c994
commit 95a4503
Show file tree

Hide file tree

Showing 14 changed files with 37 additions and 24 deletions.
diff --git a/snownlp/classification/bayes.py b/snownlp/classification/bayes.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
+import sys
 import marshal
 from math import log, exp
 
@@ -19,13 +20,17 @@ def save(self, fname):
         d['d'] = {}
         for k, v in self.d.iteritems():
             d['d'][k] = v.__dict__
+        if sys.version_info.major == 3:
+            fname = fname + '.3'
         marshal.dump(d, open(fname, 'wb'))
 
     def load(self, fname):
+        if sys.version_info.major == 3:
+            fname = fname + '.3'
         d = marshal.load(open(fname, 'rb'))
         self.total = d['total']
         self.d = {}
-        for k, v in d['d'].iteritems():
+        for k, v in d['d'].items():
             self.d[k] = AddOneProb()
             self.d[k].__dict__ = v
 

diff --git a/snownlp/normal/__init__.py b/snownlp/normal/__init__.py
@@ -5,7 +5,7 @@
 import re
 import codecs
 
-import zh
+from . import zh
 
 stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          'stopwords.txt')
@@ -25,7 +25,7 @@
 
 
 def filter_stop(words):
-    return filter(lambda x: x not in stop, words)
+    return list(filter(lambda x: x not in stop, words))
 
 
 def zh2hans(sent):

diff --git a/snownlp/normal/zh.py b/snownlp/normal/zh.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import print_function
 from __future__ import unicode_literals
 
 zh2hans = {
@@ -3242,4 +3243,4 @@ def transfer(sentence):
 
 
 if __name__ == '__main__':
-    print transfer('飛機飛向藍天')
+    print(transfer('飛機飛向藍天'))
diff --git a/snownlp/seg/__init__.py b/snownlp/seg/__init__.py
@@ -4,7 +4,7 @@
 import os
 import re
 
-import seg as TnTseg
+from . import seg as TnTseg
 
 data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          'seg.marshal')

diff --git a/snownlp/seg/seg.marshal.3 b/snownlp/seg/seg.marshal.3
diff --git a/snownlp/seg/seg.py b/snownlp/seg/seg.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-
+from __future__ import print_function
 from __future__ import unicode_literals
 
 import codecs
@@ -46,4 +46,4 @@ def seg(self, sentence):
 if __name__ == '__main__':
     seg = Seg()
     seg.train('data.txt')
-    print ' '.join(seg.seg('主要是用来放置一些简单快速的中文分词和词性标注的程序'))
+    print(' '.join(seg.seg('主要是用来放置一些简单快速的中文分词和词性标注的程序')))
diff --git a/snownlp/sentiment/sentiment.marshal.3 b/snownlp/sentiment/sentiment.marshal.3
diff --git a/snownlp/sim/bm25.py b/snownlp/sim/bm25.py
@@ -25,11 +25,11 @@ def init(self):
                     tmp[word] = 0
                 tmp[word] += 1
             self.f.append(tmp)
-            for k, v in tmp.iteritems():
+            for k, v in tmp.items():
                 if k not in self.df:
                     self.df[k] = 0
                 self.df[k] += 1
-        for k, v in self.df.iteritems():
+        for k, v in self.df.items():
             self.idf[k] = math.log(self.D-v+0.5)-math.log(v+0.5)
 
     def sim(self, doc, index):

diff --git a/snownlp/summary/textrank.py b/snownlp/summary/textrank.py
@@ -43,10 +43,10 @@ def solve(self):
         self.top = sorted(self.top, key=lambda x: x[1], reverse=True)
 
     def top_index(self, limit):
-        return map(lambda x: x[0], self.top)[:limit]
+        return list(map(lambda x: x[0], self.top))[:limit]
 
     def top(self, limit):
-        return map(lambda x: self.docs[x[0]], self.top)
+        return list(map(lambda x: self.docs[x[0]], self.top))
 
 
 class KeywordTextRank(object):
@@ -79,7 +79,7 @@ def solve(self):
         for _ in range(self.max_iter):
             m = {}
             max_diff = 0
-            for k, v in self.words.iteritems():
+            for k, v in self.words.items():
                 m[k] = 1-self.d
                 for j in v:
                     if k == j or len(self.words[j]) == 0:
@@ -90,11 +90,11 @@ def solve(self):
             self.vertex = m
             if max_diff <= self.min_diff:
                 break
-        self.top = list(self.vertex.iteritems())
+        self.top = list(self.vertex.items())
         self.top = sorted(self.top, key=lambda x: x[1], reverse=True)
 
     def top_index(self, limit):
-        return map(lambda x: x[0], self.top)[:limit]
+        return list(map(lambda x: x[0], self.top))[:limit]
 
     def top(self, limit):
-        return map(lambda x: self.docs[x[0]], self.top)
+        return list(map(lambda x: self.docs[x[0]], self.top))
diff --git a/snownlp/tag/tag.marshal.3 b/snownlp/tag/tag.marshal.3
diff --git a/snownlp/utils/frequency.py b/snownlp/utils/frequency.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 
-import good_turing
+from . import good_turing
 
 class BaseProb(object):
-    
+
     def __init__(self):
         self.d = {}
         self.total = 0.0

diff --git a/snownlp/utils/good_turing.py b/snownlp/utils/good_turing.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-
+from __future__ import print_function
 from __future__ import division
 from math import log, exp
 
@@ -48,4 +48,4 @@ def main(dic):
     return nr[0]/total/total, dict(zip(dic.keys(), map(lambda x:prob[rr[x]], dic.values())))
 
 if __name__ == '__main__':
-    print main({1:1,2:1,3:1,4:2,5:2,6:3,7:1,8:2,9:3})
+    print(main({1:1,2:1,3:1,4:2,5:2,6:3,7:1,8:2,9:3}))
diff --git a/snownlp/utils/tnt.py b/snownlp/utils/tnt.py
@@ -3,12 +3,14 @@
 '''
 Implementation of 'TnT - A Statisical Part of Speech Tagger'
 '''
+from __future__ import unicode_literals
 
+import sys
 import heapq
 import marshal
 from math import log
 
-import frequency
+from . import frequency
 
 
 class TnT(object):
@@ -37,11 +39,15 @@ def save(self, fname):
                 d[k] = v.__dict__
             else:
                 d[k] = v
+        if sys.version_info.major == 3:
+            fname = fname + '.3'
         marshal.dump(d, open(fname, 'wb'))
 
     def load(self, fname):
+        if sys.version_info.major == 3:
+            fname = fname + '.3'
         d = marshal.load(open(fname, 'rb'))
-        for k, v in d.iteritems():
+        for k, v in d.items():
             if isinstance(self.__dict__[k], set):
                 self.__dict__[k] = set(v)
             elif hasattr(self.__dict__[k], '__dict__'):
@@ -121,7 +127,7 @@ def tag(self, data):
                     if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
                                                                  s)][0]:
                         stage[(pre[0][1], s)] = (p, pre[2]+[s])
-            stage = map(lambda x: (x[0], x[1][0], x[1][1]), stage.items())
+            stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
             now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
         now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
         return zip(data, now[0][2])
diff --git a/test.py b/test.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import print_function
 from __future__ import unicode_literals
 
 text = '''
@@ -54,8 +55,8 @@
     rank = textrank.TextRank(doc)
     rank.solve()
     for index in rank.top_index(5):
-        print sents[index]
+        print(sents[index])
     keyword_rank = textrank.KeywordTextRank(doc)
     keyword_rank.solve()
     for w in keyword_rank.top_index(5):
-        print w
+        print(w)