Skip to content

Commit

Permalink
Fix missing space between sentences in wikis
Browse files Browse the repository at this point in the history
  • Loading branch information
jncraton committed Jul 22, 2023
1 parent 5e5313c commit 30cc41d
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion languagemodels/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests
import datetime
import json
import re

from languagemodels.config import config
from languagemodels.inference import (
Expand Down Expand Up @@ -317,14 +318,19 @@ def get_wiki(topic: str) -> str:
for page in response["pages"]:
wiki_result = requests.get(
f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&"
f"exintro&explaintext&redirects=1&titles={page['title']}&format=json"
f"exintro&redirects=1&titles={page['title']}&format=json"
).json()

first = wiki_result["query"]["pages"].popitem()[1]
if "disambiguation" in first["pageprops"]:
continue

summary = first["extract"]

summary = re.sub(r"<p>", "\n\n", summary, flags=re.I)
summary = re.sub(r"\s*[\n\r]+\s*", "\n\n", summary, flags=re.I)
summary = re.sub(r"<.*?>", "", summary, flags=re.I)
summary = summary.strip()
return summary
else:
return "No matching wiki page found."
Expand Down

0 comments on commit 30cc41d

Please sign in to comment.