Skip to content

Commit

Permalink
readded Python 3 support after unicode fix
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Dec 10, 2015
1 parent 0a3903b commit 9a8a5a6
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ Usage

Example:

from __future__ import print_function, unicode_literals #to make this work on Python 2 as well as Python 3

import frog

frog = frog.Frog(frog.FrogOptions(parser=False), "/etc/frog/frog.cfg")
output = frog.process_raw("Dit is een test")
print("RAW OUTPUT=",output)
output = frog.process("Dit is nog een test.")
print("PARSED OUTPUT=",output)


Output:

RAW OUTPUT= 1 Dit dit [dit] VNW(aanw,pron,stan,vol,3o,ev)
Expand Down
2 changes: 2 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import print_function, unicode_literals

import frog

frog = frog.Frog(frog.FrogOptions(parser=False), "/etc/frog/frog.cfg")
Expand Down
11 changes: 7 additions & 4 deletions frog_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ from cython import address
from libc.stdint cimport *
from libcpp.utility cimport pair
import os.path
import sys
cimport frog_classes
cimport libfolia_classes

Expand Down Expand Up @@ -131,7 +132,7 @@ cdef class Frog:
cdef frog_classes.Configuration configuration
cdef frog_classes.LogStream logstream

def __init__(self, FrogOptions options, str configurationfile = ""):
def __init__(self, FrogOptions options, configurationfile = ""):
"""Initialises Frog, pass a FrogOptions instance and a configuration file"""

self.options = options
Expand All @@ -149,7 +150,7 @@ cdef class Frog:
"""Invokes Frog on the specified text, the text is considered one document. The raw results from Frog are return as a string"""
#cdef libfolia_classes.Document * doc = self.capi.tokenizer.tokenizehelper( text.encode('utf-8') )
cdef string result = self.capi.Frogtostring(self._encode_text(text))
r = result.decode('utf-8') if type(text) == unicode else result
r = result.decode('utf-8') #if (sys.version < '3' and type(text) == unicode) or (sys.version > '3' and type(text) == str) else result
return r

def parsecolumns(self, str response):
Expand Down Expand Up @@ -192,7 +193,9 @@ cdef class Frog:
del self.capi

def _encode_text(self, text):
if type(text) == unicode:
if sys.version < '3' and type(text) == unicode:
return text.encode('utf-8')
return text
if sys.version > '3' and type(text) == str:
return text.encode('utf-8')
return text #already was bytes or python2 str

0 comments on commit 9a8a5a6

Please sign in to comment.