gpt_computer_agent/utils/train.py

import requests
from bs4 import BeautifulSoup

try:
    from ..character import set_website_content
except:
    from gpt_computer_agent.character import set_website_content


def train(url: str) -> bool:
    try:
        # Go to url and extract these elements
        meta_properties = [
            "og:description",
            "og:site_name",
            "og:title",
            "og:type",
            "og:url",
        ]

        # Fetch the webpage content
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")

        # Extract the meta tags
        meta_tags = soup.find_all("meta")

        # Initialize the data dictionary
        data = {}

        # Loop through the meta tags and extract the content
        for tag in meta_tags:
            if tag.get("property") in meta_properties:
                data[tag.get("property")] = tag.get("content")

        # Also add the other useful information texts from the webpage
        data["title"] = soup.title.string
        data["h1"] = soup.h1.string
        data["p"] = soup.p.string

        text = soup.get_text(separator="\n", strip=True)

        data["text"] = text

        data["url"] = url

        # Now create an string with good looking like this
        # Title: {title}

        the_string = ""

        for key, value in data.items():
            the_string += f"{key}: {value}\n"

        set_website_content(the_string)

        return True

    except Exception as e:
        return e