Skip to content

Commit

Permalink
add ignore
Browse files Browse the repository at this point in the history
  • Loading branch information
isnowfy committed Nov 29, 2013
1 parent e90e3f5 commit 4974d0c
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.py[cod]
21 changes: 21 additions & 0 deletions snownlp/normal/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,23 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import os
import codecs

import zh

data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'stopwords')
stop = set()
fr = codecs.open(data_path, 'r', 'utf-8')
for word in fr:
stop.add(word.strip())
fr.close()


def filter_stop(words):
return filter(lambda x: x not in stop, words)


def zh2hans(sent):
return zh.transfer(sent)
15 changes: 15 additions & 0 deletions snownlp/seg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import os

import seg as TnTseg

data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'data.txt')
segger = TnTseg.Seg()
segger.train(data_path)


def seg(sent):
return list(segger.seg(sent))
4 changes: 2 additions & 2 deletions snownlp/seg/seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

import codecs

import tnt
from ..utils.tnt import TnT


class Seg(object):

def __init__(self):
self.segger = tnt.TnT()
self.segger = TnT()

def train(self, file_name):
fr = codecs.open(file_name, 'r', 'utf-8')
Expand Down
Empty file added snownlp/sim/__init__.py
Empty file.
Empty file added snownlp/summary/__init__.py
Empty file.
4 changes: 2 additions & 2 deletions snownlp/summary/textrank.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import bm25
from ..sim.bm25 import BM25


class TextRank(object):

def __init__(self, docs):
self.docs = docs
self.bm25 = bm25.BM25(docs)
self.bm25 = BM25(docs)
self.D = len(docs)
self.d = 0.85
self.weight = []
Expand Down
34 changes: 34 additions & 0 deletions snownlp/tag/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import os
import codecs

from ..utils.tnt import TnT

data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'199801.txt')
tagger = TnT()


def train(file_name):
fr = codecs.open(file_name, 'r', 'utf-8')
data = []
for i in fr:
line = i.strip()
if not line:
continue
tmp = map(lambda x: x.split('/'), line.split())
data.append(tmp)
fr.close()
tagger.train(data)

train(data_path)


def tag_all(words):
return tagger.tag(words)


def tag(words):
return map(lambda x: x[1], tag_all(words))
Empty file added snownlp/utils/__init__.py
Empty file.

0 comments on commit 4974d0c

Please sign in to comment.