forked from langchain-ai/langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add MosaicML inference endpoints (langchain-ai#4607)
# Add MosaicML inference endpoints This PR adds support in langchain for MosaicML inference endpoints. We both serve a select few open source models, and allow customers to deploy their own models using our inference service. Docs are here (https://docs.mosaicml.com/en/latest/inference.html), and sign up form is here (https://forms.mosaicml.com/demo?utm_source=langchain). I'm not intimately familiar with the details of langchain, or the contribution process, so please let me know if there is anything that needs fixing or this is the wrong way to submit a new integration, thanks! I'm also not sure what the procedure is for integration tests. I have tested locally with my api key. ## Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <[email protected]>
- Loading branch information
Showing
8 changed files
with
665 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# MosaicML\n", | ||
"\n", | ||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n", | ||
"\n", | ||
"This example goes over how to use LangChain to interact with MosaicML Inference for text completion." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n", | ||
"\n", | ||
"from getpass import getpass\n", | ||
"\n", | ||
"MOSAICML_API_TOKEN = getpass()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"\n", | ||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.llms import MosaicML\n", | ||
"from langchain import PromptTemplate, LLMChain" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"template = \"\"\"Question: {question}\"\"\"\n", | ||
"\n", | ||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={'do_sample': False})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"llm_chain = LLMChain(prompt=prompt, llm=llm)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"question = \"What is one good reason why you should train a large language model on domain specific data?\"\n", | ||
"\n", | ||
"llm_chain.run(question)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
109 changes: 109 additions & 0 deletions
109
docs/modules/models/text_embedding/examples/mosaicml.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# MosaicML embeddings\n", | ||
"\n", | ||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n", | ||
"\n", | ||
"This example goes over how to use LangChain to interact with MosaicML Inference for text embedding." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n", | ||
"\n", | ||
"from getpass import getpass\n", | ||
"\n", | ||
"MOSAICML_API_TOKEN = getpass()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"\n", | ||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.embeddings import MosaicMLInstructorEmbeddings" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"embeddings = MosaicMLInstructorEmbeddings(\n", | ||
" query_instruction=\"Represent the query for retrieval: \"\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"query_text = \"This is a test query.\"\n", | ||
"query_result = embeddings.embed_query(query_text)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"document_text = \"This is a test document.\"\n", | ||
"document_result = embeddings.embed_documents([document_text])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"\n", | ||
"query_numpy = np.array(query_result)\n", | ||
"document_numpy = np.array(document_result[0])\n", | ||
"similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n", | ||
"print(f\"Cosine similarity between document and query: {similarity}\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
"""Wrapper around MosaicML APIs.""" | ||
from typing import Any, Dict, List, Mapping, Optional, Tuple | ||
|
||
import requests | ||
from pydantic import BaseModel, Extra, root_validator | ||
|
||
from langchain.embeddings.base import Embeddings | ||
from langchain.utils import get_from_dict_or_env | ||
|
||
|
||
class MosaicMLInstructorEmbeddings(BaseModel, Embeddings): | ||
"""Wrapper around MosaicML's embedding inference service. | ||
To use, you should have the | ||
environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass | ||
it as a named parameter to the constructor. | ||
Example: | ||
.. code-block:: python | ||
from langchain.llms import MosaicMLInstructorEmbeddings | ||
endpoint_url = ( | ||
"https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict" | ||
) | ||
mosaic_llm = MosaicMLInstructorEmbeddings( | ||
endpoint_url=endpoint_url, | ||
mosaicml_api_token="my-api-key" | ||
) | ||
""" | ||
|
||
endpoint_url: str = ( | ||
"https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict" | ||
) | ||
"""Endpoint URL to use.""" | ||
embed_instruction: str = "Represent the document for retrieval: " | ||
"""Instruction used to embed documents.""" | ||
query_instruction: str = ( | ||
"Represent the question for retrieving supporting documents: " | ||
) | ||
"""Instruction used to embed the query.""" | ||
retry_sleep: float = 1.0 | ||
"""How long to try sleeping for if a rate limit is encountered""" | ||
|
||
mosaicml_api_token: Optional[str] = None | ||
|
||
class Config: | ||
"""Configuration for this pydantic object.""" | ||
|
||
extra = Extra.forbid | ||
|
||
@root_validator() | ||
def validate_environment(cls, values: Dict) -> Dict: | ||
"""Validate that api key and python package exists in environment.""" | ||
mosaicml_api_token = get_from_dict_or_env( | ||
values, "mosaicml_api_token", "MOSAICML_API_TOKEN" | ||
) | ||
values["mosaicml_api_token"] = mosaicml_api_token | ||
return values | ||
|
||
@property | ||
def _identifying_params(self) -> Mapping[str, Any]: | ||
"""Get the identifying parameters.""" | ||
return {"endpoint_url": self.endpoint_url} | ||
|
||
def _embed( | ||
self, input: List[Tuple[str, str]], is_retry: bool = False | ||
) -> List[List[float]]: | ||
payload = {"input_strings": input} | ||
|
||
# HTTP headers for authorization | ||
headers = { | ||
"Authorization": f"{self.mosaicml_api_token}", | ||
"Content-Type": "application/json", | ||
} | ||
|
||
# send request | ||
try: | ||
response = requests.post(self.endpoint_url, headers=headers, json=payload) | ||
except requests.exceptions.RequestException as e: | ||
raise ValueError(f"Error raised by inference endpoint: {e}") | ||
|
||
try: | ||
parsed_response = response.json() | ||
|
||
if "error" in parsed_response: | ||
# if we get rate limited, try sleeping for 1 second | ||
if ( | ||
not is_retry | ||
and "rate limit exceeded" in parsed_response["error"].lower() | ||
): | ||
import time | ||
|
||
time.sleep(self.retry_sleep) | ||
|
||
return self._embed(input, is_retry=True) | ||
|
||
raise ValueError( | ||
f"Error raised by inference API: {parsed_response['error']}" | ||
) | ||
|
||
if "data" not in parsed_response: | ||
raise ValueError( | ||
f"Error raised by inference API, no key data: {parsed_response}" | ||
) | ||
embeddings = parsed_response["data"] | ||
except requests.exceptions.JSONDecodeError as e: | ||
raise ValueError( | ||
f"Error raised by inference API: {e}.\nResponse: {response.text}" | ||
) | ||
|
||
return embeddings | ||
|
||
def embed_documents(self, texts: List[str]) -> List[List[float]]: | ||
"""Embed documents using a MosaicML deployed instructor embedding model. | ||
Args: | ||
texts: The list of texts to embed. | ||
Returns: | ||
List of embeddings, one for each text. | ||
""" | ||
instruction_pairs = [(self.embed_instruction, text) for text in texts] | ||
embeddings = self._embed(instruction_pairs) | ||
return embeddings | ||
|
||
def embed_query(self, text: str) -> List[float]: | ||
"""Embed a query using a MosaicML deployed instructor embedding model. | ||
Args: | ||
text: The text to embed. | ||
Returns: | ||
Embeddings for the text. | ||
""" | ||
instruction_pair = (self.query_instruction, text) | ||
embedding = self._embed([instruction_pair])[0] | ||
return embedding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.