-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathtrain.py
60 lines (43 loc) · 1.47 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
from bs4 import BeautifulSoup
try:
from ..character import set_website_content
except:
from gpt_computer_agent.character import set_website_content
def train(url: str) -> bool:
try:
# Go to url and extract these elements
meta_properties = [
"og:description",
"og:site_name",
"og:title",
"og:type",
"og:url",
]
# Fetch the webpage content
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
# Extract the meta tags
meta_tags = soup.find_all("meta")
# Initialize the data dictionary
data = {}
# Loop through the meta tags and extract the content
for tag in meta_tags:
if tag.get("property") in meta_properties:
data[tag.get("property")] = tag.get("content")
# Also add the other useful information texts from the webpage
data["title"] = soup.title.string
data["h1"] = soup.h1.string
data["p"] = soup.p.string
text = soup.get_text(separator="\n", strip=True)
data["text"] = text
data["url"] = url
# Now create an string with good looking like this
# Title: {title}
the_string = ""
for key, value in data.items():
the_string += f"{key}: {value}\n"
set_website_content(the_string)
return True
except Exception as e:
return e