Merge branch 'master' into jiito/docs

zckly · Aug 2, 2023 · cd2ab9d · cd2ab9d
2 parents dcdfb1d + fd37983
commit cd2ab9d
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 144 deletions.
diff --git a/README.md b/README.md
@@ -24,8 +24,8 @@ client = Metaphor(api_key="your-api-key")
 
 response = client.search("funny article about tech culture",
     num_results=5,
-    includeDomains: ["nytimes.com", "wsj.com"],
-    startPublishedDate: "2023-06-12"
+    include_domains: ["nytimes.com", "wsj.com"],
+    start_published_date: "2023-06-12"
 )
 
 for result in response.results:

diff --git a/metaphor_python/api.py b/metaphor_python/api.py
@@ -3,104 +3,80 @@
 from typing import List, Optional, Dict
 from dataclasses import dataclass, field
 
-
 def snake_to_camel(snake_str: str) -> str:
     components = snake_str.split("_")
     return components[0] + "".join(x.title() for x in components[1:])
 
-
 def to_camel_case(data: dict) -> dict:
     return {snake_to_camel(k): v for k, v in data.items() if v is not None}
 
-
 def camel_to_snake(camel_str: str) -> str:
     snake_str = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", camel_str)
     return re.sub("([a-z0-9])([A-Z])", r"\1_\2", snake_str).lower()
 
-
 def to_snake_case(data: dict) -> dict:
     return {camel_to_snake(k): v for k, v in data.items()}
 
-
-VALID_SEARCH_OPTIONS = {
-    "num_results": int,
-    "include_domains": list,
-    "exclude_domains": list,
-    "start_crawl_date": str,
-    "end_crawl_date": str,
-    "start_published_date": str,
-    "end_published_date": str,
-    "use_autoprompt": bool,
-    "type": str,
+SEARCH_OPTIONS_TYPES = {
+    'query': str,
+    'num_results': int,
+    'include_domains': list,
+    'exclude_domains': list,
+    'start_crawl_date': str,
+    'end_crawl_date': str,
+    'start_published_date': str,
+    'end_published_date': str,
+    'use_autoprompt': bool,
+    'type': str
 }
 
-VALID_FIND_SIMILAR_OPTIONS = {
-    "num_results": int,
-    "include_domains": list,
-    "exclude_domains": list,
-    "start_crawl_date": str,
-    "end_crawl_date": str,
-    "start_published_date": str,
-    "end_published_date": str,
+FIND_SIMILAR_OPTIONS_TYPES = {
+    'url': str,
+    'num_results': int,
+    'include_domains': list,
+    'exclude_domains': list,
+    'start_crawl_date': str,
+    'end_crawl_date': str,
+    'start_published_date': str,
+    'end_published_date': str,
 }
 
-
 def validate_search_options(options: Dict[str, Optional[object]]) -> None:
     for key, value in options.items():
-        if key not in VALID_SEARCH_OPTIONS:
+        if key not in SEARCH_OPTIONS_TYPES:
             raise ValueError(f"Invalid option: '{key}'")
-        if not isinstance(value, VALID_SEARCH_OPTIONS[key]):
-            raise ValueError(
-                f"Invalid type for option '{key}': Expected {VALID_SEARCH_OPTIONS[key]}, got {type(value)}"
-            )
-
+        if not isinstance(value, SEARCH_OPTIONS_TYPES[key]):
+            raise ValueError(f"Invalid type for option '{key}': Expected {SEARCH_OPTIONS_TYPES[key]}, got {type(value)}")
+        if key in ['include_domains', 'exclude_domains'] and not value:
+            raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")
 
 def validate_find_similar_options(options: Dict[str, Optional[object]]) -> None:
     for key, value in options.items():
-        if key not in VALID_FIND_SIMILAR_OPTIONS:
+        if key not in FIND_SIMILAR_OPTIONS_TYPES:
             raise ValueError(f"Invalid option: '{key}'")
-        if not isinstance(value, VALID_FIND_SIMILAR_OPTIONS[key]):
-            raise ValueError(
-                f"Invalid type for option '{key}': Expected {VALID_FIND_SIMILAR_OPTIONS[key]}, got {type(value)}"
-            )
-
+        if not isinstance(value, FIND_SIMILAR_OPTIONS_TYPES[key]):
+            raise ValueError(f"Invalid type for option '{key}': Expected {FIND_SIMILAR_OPTIONS_TYPES[key]}, got {type(value)}")
+        if key in ['include_domains', 'exclude_domains'] and not value:
+            raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")
 
 @dataclass
 class Result:
-    """
-    The Result class represents a search result from the Metaphor API.
-
-    Attributes:
-        title (str): The title of the document.
-        url (str): The URL of the document.
-        id (str): The unique identifier of the document.
-        score (Optional[float], default=None): The relevance score of the document for the search query.
-        published_date (Optional[str], default=None): The date the document was published.
-        author (Optional[str], default=None): The author of the document.
-        extract (Optional[str], default=None): beta field. returned when findSimilar_and_get_contents is called
-    """
-
     title: str
     url: str
     id: str
     score: Optional[float] = None
     published_date: Optional[str] = None
     author: Optional[str] = None
-    extract: Optional[
-        str
-    ] = None  # beta field. returned when findSimilar_and_get_contents is called
+    extract: Optional[str] = None
 
-    def __init__(
-        self, title, url, id, score=None, published_date=None, author=None, **kwargs
-    ):
+    def __init__(self, title, url, id, score=None, published_date=None, author=None, **kwargs):
         self.title = title
         self.url = url
         self.score = score
         self.id = id
         self.published_date = published_date
         self.author = author
 
-
 @dataclass
 class DocumentContent:
     id: str
@@ -114,119 +90,63 @@ def __init__(self, id, url, title, extract, **kwargs):
         self.title = title
         self.extract = extract
 
-
 @dataclass
 class GetContentsResponse:
     contents: List[DocumentContent]
 
-
 @dataclass
 class SearchResponse:
     results: List[Result]
-    api: Optional["Metaphor"] = field(default=None, init=False)
+    api: Optional['Metaphor'] = field(default=None, init=False)
 
     def get_contents(self):
         if self.api is None:
-            raise Exception(
-                "API client is not set. This method should be called on a SearchResponse returned by the 'search' method of 'Metaphor'."
-            )
+            raise Exception("API client is not set. This method should be called on a SearchResponse returned by the 'search' method of 'Metaphor'.")
         ids = [result.id for result in self.results]
         return self.api.get_contents(ids)
 
-
 class Metaphor:
     def __init__(self, api_key: str):
         self.base_url = "https://api.metaphor.systems"
         self.headers = {"x-api-key": api_key}
 
-    def search(self, query: str, **options) -> SearchResponse:
-        """
-        This function performs a search on the Metaphor API.
-
-        Args:
-            query (str): The search query.
-            **options: Additional search options. Valid options are:
-                - num_results (int): The number of search results to return.
-                - include_domains (list): A list of domains to include in the search.
-                - exclude_domains (list): A list of domains to exclude from the search.
-                - start_crawl_date (str): The start date for the crawl (in YYYY-MM-DD format).
-                - end_crawl_date (str): The end date for the crawl (in YYYY-MM-DD format).
-                - start_published_date (str): The start date for when the document was published (in YYYY-MM-DD format).
-                - end_published_date (str): The end date for when the document was published (in YYYY-MM-DD format).
-                - use_autoprompt (bool): Whether to use autoprompt for the search.
-                - type (str): The type of document to search for.
-
-        Returns:
-            SearchResponse: A dataclass containing the search results.
-        """
-
+    def search(self, query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
+               exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
+               end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
+               end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None,
+               type: Optional[str] = None) -> SearchResponse:
+        options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
         validate_search_options(options)
-        request = {"query": query}
+        request = {'query': query}
         request.update(to_camel_case(options))
-        response = requests.post(
-            f"{self.base_url}/search", json=request, headers=self.headers
-        )
-        response.raise_for_status()
-        results = [
-            Result(**to_snake_case(result)) for result in response.json()["results"]
-        ]
+        response = requests.post(f"{self.base_url}/search", json=request, headers=self.headers)
+        if response.status_code != 200:
+            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
+        results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
         search_response = SearchResponse(results=results)
         search_response.api = self
         return search_response
 
-    def find_similar(self, url: str, **options) -> SearchResponse:
-        """
-        This function finds documents similar to the given URL using the Metaphor API.
-
-        Args:
-            url (str): The URL of the document to find similar documents to.
-            **options: Additional search options. Valid options are:
-                - num_results (int): The number of search results to return.
-                - include_domains (list): A list of domains to include in the search.
-                - exclude_domains (list): A list of domains to exclude from the search.
-                - start_crawl_date (str): The start date for the crawl (in YYYY-MM-DD format).
-                - end_crawl_date (str): The end date for the crawl (in YYYY-MM-DD format).
-                - start_published_date (str): The start date for when the document was published (in YYYY-MM-DD format).
-                - end_published_date (str): The end date for when the document was published (in YYYY-MM-DD format).
-
-        Returns:
-            SearchResponse: A dataclass containing the search results.
-        """
-
+    def find_similar(self, url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
+                     exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
+                     end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
+                     end_published_date: Optional[str] = None) -> SearchResponse:
+        options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
         validate_find_similar_options(options)
-        request = {"url": url}
+        request = {'url': url}
         request.update(to_camel_case(options))
-        response = requests.post(
-            f"{self.base_url}/findSimilar", json=request, headers=self.headers
-        )
-        response.raise_for_status()
-        results = [
-            Result(**to_snake_case(result)) for result in response.json()["results"]
-        ]
+        response = requests.post(f"{self.base_url}/findSimilar", json=request, headers=self.headers)
+        if response.status_code != 200:
+            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
+        results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
         find_similar_response = SearchResponse(results=results)
         find_similar_response.api = self
         return find_similar_response
 
     def get_contents(self, ids: List[str]) -> GetContentsResponse:
-        """
-        This function retrieves the contents of the documents with the given IDs using the Metaphor API.
-
-        Args:
-            ids (List[str]): A list of document IDs to retrieve the contents for.
-
-        Returns:
-            GetContentsResponse: A dataclass containing the contents of the requested documents.
-        """
-
-        response = requests.get(
-            f"{self.base_url}/contents",
-            params=to_camel_case({"ids": ids}),
-            headers=self.headers,
-        )
-        response.raise_for_status()
-        return GetContentsResponse(
-            [
-                DocumentContent(**to_snake_case(document))
-                for document in response.json()["contents"]
-            ]
-        )
+        if len(ids) == 0:
+            raise ValueError("ids cannot be empty")
+        response = requests.get(f"{self.base_url}/contents", params=to_camel_case({"ids": ids}), headers=self.headers)
+        if response.status_code != 200:
+            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
+        return GetContentsResponse([DocumentContent(**to_snake_case(document)) for document in response.json()["contents"]])
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='metaphor-python',
-    version='0.1.10',
+    version='0.1.11',
     description='A Python package for the Metaphor API.',
     author='Metaphor',
     author_email='[email protected]',