Skip to content

Commit

Permalink
Merge branch 'master' into jiito/docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffzwang authored Aug 2, 2023
2 parents dcdfb1d + fd37983 commit cd2ab9d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 144 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ client = Metaphor(api_key="your-api-key")

response = client.search("funny article about tech culture",
num_results=5,
includeDomains: ["nytimes.com", "wsj.com"],
startPublishedDate: "2023-06-12"
include_domains: ["nytimes.com", "wsj.com"],
start_published_date: "2023-06-12"
)

for result in response.results:
Expand Down
202 changes: 61 additions & 141 deletions metaphor_python/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,104 +3,80 @@
from typing import List, Optional, Dict
from dataclasses import dataclass, field


def snake_to_camel(snake_str: str) -> str:
components = snake_str.split("_")
return components[0] + "".join(x.title() for x in components[1:])


def to_camel_case(data: dict) -> dict:
return {snake_to_camel(k): v for k, v in data.items() if v is not None}


def camel_to_snake(camel_str: str) -> str:
snake_str = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", camel_str)
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", snake_str).lower()


def to_snake_case(data: dict) -> dict:
return {camel_to_snake(k): v for k, v in data.items()}


VALID_SEARCH_OPTIONS = {
"num_results": int,
"include_domains": list,
"exclude_domains": list,
"start_crawl_date": str,
"end_crawl_date": str,
"start_published_date": str,
"end_published_date": str,
"use_autoprompt": bool,
"type": str,
SEARCH_OPTIONS_TYPES = {
'query': str,
'num_results': int,
'include_domains': list,
'exclude_domains': list,
'start_crawl_date': str,
'end_crawl_date': str,
'start_published_date': str,
'end_published_date': str,
'use_autoprompt': bool,
'type': str
}

VALID_FIND_SIMILAR_OPTIONS = {
"num_results": int,
"include_domains": list,
"exclude_domains": list,
"start_crawl_date": str,
"end_crawl_date": str,
"start_published_date": str,
"end_published_date": str,
FIND_SIMILAR_OPTIONS_TYPES = {
'url': str,
'num_results': int,
'include_domains': list,
'exclude_domains': list,
'start_crawl_date': str,
'end_crawl_date': str,
'start_published_date': str,
'end_published_date': str,
}


def validate_search_options(options: Dict[str, Optional[object]]) -> None:
for key, value in options.items():
if key not in VALID_SEARCH_OPTIONS:
if key not in SEARCH_OPTIONS_TYPES:
raise ValueError(f"Invalid option: '{key}'")
if not isinstance(value, VALID_SEARCH_OPTIONS[key]):
raise ValueError(
f"Invalid type for option '{key}': Expected {VALID_SEARCH_OPTIONS[key]}, got {type(value)}"
)

if not isinstance(value, SEARCH_OPTIONS_TYPES[key]):
raise ValueError(f"Invalid type for option '{key}': Expected {SEARCH_OPTIONS_TYPES[key]}, got {type(value)}")
if key in ['include_domains', 'exclude_domains'] and not value:
raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")

def validate_find_similar_options(options: Dict[str, Optional[object]]) -> None:
for key, value in options.items():
if key not in VALID_FIND_SIMILAR_OPTIONS:
if key not in FIND_SIMILAR_OPTIONS_TYPES:
raise ValueError(f"Invalid option: '{key}'")
if not isinstance(value, VALID_FIND_SIMILAR_OPTIONS[key]):
raise ValueError(
f"Invalid type for option '{key}': Expected {VALID_FIND_SIMILAR_OPTIONS[key]}, got {type(value)}"
)

if not isinstance(value, FIND_SIMILAR_OPTIONS_TYPES[key]):
raise ValueError(f"Invalid type for option '{key}': Expected {FIND_SIMILAR_OPTIONS_TYPES[key]}, got {type(value)}")
if key in ['include_domains', 'exclude_domains'] and not value:
raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")

@dataclass
class Result:
"""
The Result class represents a search result from the Metaphor API.
Attributes:
title (str): The title of the document.
url (str): The URL of the document.
id (str): The unique identifier of the document.
score (Optional[float], default=None): The relevance score of the document for the search query.
published_date (Optional[str], default=None): The date the document was published.
author (Optional[str], default=None): The author of the document.
extract (Optional[str], default=None): beta field. returned when findSimilar_and_get_contents is called
"""

title: str
url: str
id: str
score: Optional[float] = None
published_date: Optional[str] = None
author: Optional[str] = None
extract: Optional[
str
] = None # beta field. returned when findSimilar_and_get_contents is called
extract: Optional[str] = None

def __init__(
self, title, url, id, score=None, published_date=None, author=None, **kwargs
):
def __init__(self, title, url, id, score=None, published_date=None, author=None, **kwargs):
self.title = title
self.url = url
self.score = score
self.id = id
self.published_date = published_date
self.author = author


@dataclass
class DocumentContent:
id: str
Expand All @@ -114,119 +90,63 @@ def __init__(self, id, url, title, extract, **kwargs):
self.title = title
self.extract = extract


@dataclass
class GetContentsResponse:
contents: List[DocumentContent]


@dataclass
class SearchResponse:
results: List[Result]
api: Optional["Metaphor"] = field(default=None, init=False)
api: Optional['Metaphor'] = field(default=None, init=False)

def get_contents(self):
if self.api is None:
raise Exception(
"API client is not set. This method should be called on a SearchResponse returned by the 'search' method of 'Metaphor'."
)
raise Exception("API client is not set. This method should be called on a SearchResponse returned by the 'search' method of 'Metaphor'.")
ids = [result.id for result in self.results]
return self.api.get_contents(ids)


class Metaphor:
def __init__(self, api_key: str):
self.base_url = "https://api.metaphor.systems"
self.headers = {"x-api-key": api_key}

def search(self, query: str, **options) -> SearchResponse:
"""
This function performs a search on the Metaphor API.
Args:
query (str): The search query.
**options: Additional search options. Valid options are:
- num_results (int): The number of search results to return.
- include_domains (list): A list of domains to include in the search.
- exclude_domains (list): A list of domains to exclude from the search.
- start_crawl_date (str): The start date for the crawl (in YYYY-MM-DD format).
- end_crawl_date (str): The end date for the crawl (in YYYY-MM-DD format).
- start_published_date (str): The start date for when the document was published (in YYYY-MM-DD format).
- end_published_date (str): The end date for when the document was published (in YYYY-MM-DD format).
- use_autoprompt (bool): Whether to use autoprompt for the search.
- type (str): The type of document to search for.
Returns:
SearchResponse: A dataclass containing the search results.
"""

def search(self, query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None,
type: Optional[str] = None) -> SearchResponse:
options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
validate_search_options(options)
request = {"query": query}
request = {'query': query}
request.update(to_camel_case(options))
response = requests.post(
f"{self.base_url}/search", json=request, headers=self.headers
)
response.raise_for_status()
results = [
Result(**to_snake_case(result)) for result in response.json()["results"]
]
response = requests.post(f"{self.base_url}/search", json=request, headers=self.headers)
if response.status_code != 200:
raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
search_response = SearchResponse(results=results)
search_response.api = self
return search_response

def find_similar(self, url: str, **options) -> SearchResponse:
"""
This function finds documents similar to the given URL using the Metaphor API.
Args:
url (str): The URL of the document to find similar documents to.
**options: Additional search options. Valid options are:
- num_results (int): The number of search results to return.
- include_domains (list): A list of domains to include in the search.
- exclude_domains (list): A list of domains to exclude from the search.
- start_crawl_date (str): The start date for the crawl (in YYYY-MM-DD format).
- end_crawl_date (str): The end date for the crawl (in YYYY-MM-DD format).
- start_published_date (str): The start date for when the document was published (in YYYY-MM-DD format).
- end_published_date (str): The end date for when the document was published (in YYYY-MM-DD format).
Returns:
SearchResponse: A dataclass containing the search results.
"""

def find_similar(self, url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None) -> SearchResponse:
options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
validate_find_similar_options(options)
request = {"url": url}
request = {'url': url}
request.update(to_camel_case(options))
response = requests.post(
f"{self.base_url}/findSimilar", json=request, headers=self.headers
)
response.raise_for_status()
results = [
Result(**to_snake_case(result)) for result in response.json()["results"]
]
response = requests.post(f"{self.base_url}/findSimilar", json=request, headers=self.headers)
if response.status_code != 200:
raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
find_similar_response = SearchResponse(results=results)
find_similar_response.api = self
return find_similar_response

def get_contents(self, ids: List[str]) -> GetContentsResponse:
"""
This function retrieves the contents of the documents with the given IDs using the Metaphor API.
Args:
ids (List[str]): A list of document IDs to retrieve the contents for.
Returns:
GetContentsResponse: A dataclass containing the contents of the requested documents.
"""

response = requests.get(
f"{self.base_url}/contents",
params=to_camel_case({"ids": ids}),
headers=self.headers,
)
response.raise_for_status()
return GetContentsResponse(
[
DocumentContent(**to_snake_case(document))
for document in response.json()["contents"]
]
)
if len(ids) == 0:
raise ValueError("ids cannot be empty")
response = requests.get(f"{self.base_url}/contents", params=to_camel_case({"ids": ids}), headers=self.headers)
if response.status_code != 200:
raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
return GetContentsResponse([DocumentContent(**to_snake_case(document)) for document in response.json()["contents"]])
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='metaphor-python',
version='0.1.10',
version='0.1.11',
description='A Python package for the Metaphor API.',
author='Metaphor',
author_email='[email protected]',
Expand Down

0 comments on commit cd2ab9d

Please sign in to comment.