Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
[FIX]: Conditional meanings are properly removed
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason committed Apr 28, 2021
1 parent c3a7b72 commit 434d36d
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 6 deletions.
1 change: 0 additions & 1 deletion 210428_150630_automatic.json

This file was deleted.

1 change: 1 addition & 0 deletions 210428_152653_automatic.json

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions webscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,18 @@ def scrape(self, word):
translations[main_translation].append([])

if main_translation is not None and "(UK)":
if translation.find(attrs = { "class" : "To2" }):
if translation.find(attrs = { "class" : "dsense" }):
continue
for unnecessary_tag in to_word_tag.find_all(["a", "em"]):
unnecessary_tag.decompose()
translations[main_translation][-1].extend(
[re.sub(r"\s(?=\s|$)|/s\w{1,2}", "", synonym).strip() for synonym in
to_word_tag.text.split(",")])


for k, v in translations.items():
translations[k] = list(filter(lambda x: x, v))

return translations
return filter(lambda x: x[1], translations.items())

def parse_manual(answer):
if re.match(r"(\([^)]*?\))?\s*to", answer):
Expand All @@ -75,8 +76,6 @@ def parse_manual(answer):
for i in range(len(split_inds) - 1)]
return list(filter(lambda synonym: not bool(re.search(r"\([^/)]*\)", synonym)), l))

return l

if __name__ == "__main__":
wr = WordReference()

Expand Down

0 comments on commit 434d36d

Please sign in to comment.