-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Changed and added more fields in Oxford dict
- Loading branch information
khuang6
committed
Dec 22, 2017
1 parent
fbb79b7
commit 7d66e51
Showing
3 changed files
with
4,227 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,258 @@ | ||
#-*- coding:utf-8 -*- | ||
try: | ||
import urllib2 | ||
except: | ||
import urllib.request as urllib2 | ||
import json | ||
from aqt.utils import showInfo | ||
from copy import deepcopy | ||
from warnings import filterwarnings | ||
|
||
from bs4 import BeautifulSoup, Tag | ||
from requests import Session | ||
|
||
from .base import WebService, export, register, with_styles | ||
|
||
filterwarnings('ignore') | ||
|
||
|
||
@register("Oxford") | ||
@register(u'牛津学习词典') | ||
class Oxford(WebService): | ||
_base_url = 'https://www.oxfordlearnersdictionaries.com/definition/english/' | ||
|
||
def __init__(self): | ||
super(Oxford, self).__init__() | ||
|
||
def _get_from_api(self, lang="en"): | ||
word = self.word | ||
baseurl = "https://od-api.oxforddictionaries.com/api/v1" | ||
app_id = "45aecf84" | ||
app_key = "bb36fd6a1259e5baf8df6110a2f7fc8f" | ||
headers = {"app_id": app_id, "app_key": app_key} | ||
self.s = Session() | ||
self.s.headers = { | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 ' | ||
'(KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36' | ||
} | ||
self.s.get(self._base_url) | ||
self._web_word = None | ||
|
||
def query(self, word): | ||
""" | ||
:param word: | ||
:rtype: WebWord | ||
""" | ||
_qry_url = self._base_url + word | ||
rsp = self.s.get(_qry_url, ) | ||
if rsp.status_code == 200: | ||
return WebWord(rsp.content.decode('utf-8')) | ||
|
||
@property | ||
def web_word(self): | ||
if not self._web_word: | ||
self._web_word = self.query(self.word) | ||
return self._web_word | ||
|
||
@export(u'音标', 0) | ||
def phonetic(self): | ||
return '{} {}'.format(self.web_word.wd_phon_bre, self.web_word.wd_phon_nam) | ||
|
||
@export(u'词性', 1) | ||
def pos(self): | ||
return self.web_word.wd_pos | ||
|
||
@export(u'释义', 2) | ||
@with_styles(cssfile='_oxford.css') | ||
def ee(self): | ||
return self.web_word.definitions_html | ||
|
||
@export(u'英式发音', 3) | ||
def sound_bre(self): | ||
url = self.web_word.wd_sound_url_bre | ||
filename = u'_oxford_{}_uk.mp3'.format(self.word) | ||
if url and self.download(url, filename): | ||
return self.get_anki_label(filename, 'audio') | ||
return '' | ||
|
||
@export(u'美式发音', 4) | ||
def sound_ame(self): | ||
url = self.web_word.wd_sound_url_nam | ||
filename = u'_oxford_{}_us.mp3'.format(self.word) | ||
if url and self.download(url, filename): | ||
return self.get_anki_label(filename, 'audio') | ||
return '' | ||
|
||
@export(u'英式发音优先', 5) | ||
def sound_pri(self): | ||
return self.sound_bre if self.sound_bre else self.sound_ame | ||
|
||
|
||
class WebWord: | ||
|
||
def __init__(self, markups): | ||
if not markups: | ||
return | ||
self.markups = markups | ||
self.bs = BeautifulSoup(self.markups, 'lxml') | ||
self.with_html = False | ||
self._defs = None | ||
self._defs_html = None | ||
|
||
@staticmethod | ||
def _cls_dic(class_nm): | ||
return {'class': class_nm} | ||
|
||
# region Tags | ||
@property | ||
def tag_web_top(self): | ||
""" | ||
word - class: h | ||
pos - class: pos | ||
:rtype: Tag | ||
""" | ||
return self.bs.find("div", self._cls_dic('webtop-g')) | ||
|
||
@property | ||
def tag_pron(self): | ||
""" | ||
:rtype: Tag | ||
""" | ||
return self.bs.find("div", self._cls_dic('pron-gs ei-g')) | ||
|
||
@property | ||
def tag_phon_bre(self): | ||
""" | ||
:rtype: Tag | ||
""" | ||
return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='br') | ||
|
||
@property | ||
def tag_phon_nam(self): | ||
""" | ||
:rtype: Tag | ||
""" | ||
return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='n_am') | ||
|
||
# ---- Explains | ||
@property | ||
def tag_explain(self): | ||
""" | ||
:rtype: Tag | ||
""" | ||
return self.bs.find('span', self._cls_dic('sn-gs')) | ||
|
||
# endregion | ||
|
||
@property | ||
def wd_phon_bre(self): | ||
""" | ||
:return: pre_fix, phon | ||
""" | ||
_tag_phn = self.tag_phon_bre.find('span', self._cls_dic('phon')).contents[3] | ||
return "{} {}".format( | ||
self.tag_phon_bre.find('span', self._cls_dic('prefix')).string, | ||
'/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn) | ||
) | ||
|
||
@property | ||
def wd_pos(self): | ||
try: | ||
return self.tag_web_top.find("span", 'pos').text | ||
except: | ||
return '' | ||
|
||
@property | ||
def wd_phon_nam(self): | ||
""" | ||
:return: pre_fix, phon | ||
""" | ||
_tag_phn = self.tag_phon_nam.find('span', self._cls_dic('phon')).contents[3] | ||
return "{} {}".format( | ||
self.tag_phon_nam.find('span', self._cls_dic('prefix')).string, | ||
'/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn) | ||
) | ||
|
||
@property | ||
def wd_sound_url_bre(self): | ||
try: | ||
return self.tag_phon_bre.find('div', self._cls_dic('sound audio_play_button pron-uk icon-audio'))[ | ||
'data-src-mp3'] | ||
except: | ||
pass | ||
|
||
@property | ||
def wd_sound_url_nam(self): | ||
try: | ||
return self.tag_phon_bre.find('div', self._cls_dic('sound audio_play_button pron-us icon-audio'))[ | ||
'data-src-mp3'] | ||
except: | ||
pass | ||
|
||
@property | ||
def definitions(self): | ||
if self._defs and not self.with_html: | ||
return self._defs | ||
if self._defs_html and self.with_html: | ||
return self._defs_html | ||
|
||
defs = [] | ||
defs_html = [] | ||
tag_exp = self._clean(self.tag_explain) | ||
lis = [li for li in tag_exp.find_all('li')] | ||
if not lis: | ||
if self.with_html: | ||
defs_html.append( | ||
str(tag_exp) | ||
) | ||
else: | ||
defs.append(tag_exp.text) | ||
|
||
else: | ||
for li in lis: | ||
if self.with_html: | ||
defs_html.append( | ||
str(tag_exp) | ||
) | ||
else: | ||
defs.append(li.text) | ||
self._defs = defs | ||
self._defs_html = defs_html | ||
return self._defs if not self.with_html else self._defs_html | ||
|
||
@property | ||
def definitions_html(self): | ||
_with_html = deepcopy(self.with_html) | ||
self.with_html = True | ||
# def_html = """ | ||
# <link type="text/css" rel="stylesheet" href="_oxford.css"> | ||
# | ||
# <ol class="v-gs"> | ||
# {} | ||
# </ol> | ||
# """.format(''.join(_de for _de in self.definitions)) | ||
def_html = ''.join(_de for _de in self.definitions) | ||
self.with_html = _with_html | ||
return def_html | ||
|
||
def _clean(self, tg): | ||
""" | ||
:type tg:Tag | ||
:return: | ||
""" | ||
decompose_cls = ['xr-gs', 'sound', 'heading', 'topic', 'collapse', 'oxford3000'] | ||
|
||
word_id = urllib2.quote(word.lower().replace(" ", "_")) | ||
url = baseurl + "/entries/" + lang + "/" + word_id | ||
url = urllib2.Request(url, headers=headers) | ||
response = json.loads(urllib2.urlopen(url).read()) | ||
if tg.attrs and 'class' in tg.attrs: | ||
for _cls in decompose_cls: | ||
_tgs = tg.find_all(attrs=self._cls_dic(_cls), recursive=True) | ||
for _tg in _tgs: | ||
_tg.decompose() | ||
|
||
return response["results"] | ||
rmv_attrs = ['dpsid', 'id'] | ||
for _attr in rmv_attrs: | ||
if tg.attrs and _attr in tg.attrs: | ||
try: | ||
tg.attrs.pop(_attr) | ||
except ValueError: | ||
pass | ||
for child in tg.children: | ||
if not isinstance(child, Tag): | ||
continue | ||
|
||
@export("Lexical Category", 1) | ||
def _fld_category(self): | ||
return self._get_from_api()[0]["lexicalEntries"][0]["lexicalCategory"] | ||
return tg |
Oops, something went wrong.