Merge branch 'master' of github.com:interval-network/langchain

langchain-ai · Jan 19, 2025 · c28e1b1 · c28e1b1
2 parents 95f655a + 9e4e053
commit c28e1b1
Show file tree

Hide file tree

Showing 28 changed files with 1,140 additions and 73 deletions.
diff --git a/.github/scripts/prep_api_docs_build.py b/.github/scripts/prep_api_docs_build.py
@@ -82,6 +82,12 @@ def main():
             and p["repo"] != "langchain-ai/langchain"
         ])
 
+        # Delete ones without a pyproject.toml
+        for partner in Path("langchain/libs/partners").iterdir():
+            if partner.is_dir() and not (partner / "pyproject.toml").exists():
+                print(f"Removing {partner} as it does not have a pyproject.toml")
+                shutil.rmtree(partner)
+
         print("Library sync completed successfully!")
 
     except Exception as e:

diff --git a/docs/docs/concepts/output_parsers.mdx b/docs/docs/concepts/output_parsers.mdx
@@ -27,7 +27,7 @@ LangChain has lots of different types of output parsers. This is a list of outpu
 | Name                                                                                                                                                                                                                                    | Supports Streaming | Has Format Instructions | Calls LLM | Input Type         | Output Type          | Description                                                                                                                                                                                                                                              |
 |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|-------------------------|-----------|--------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Str](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.string.StrOutputParser.html)                                                                                                         | ✅                  |                         |           | `str` \| `Message` | String                | Parses texts from message objects. Useful for handling variable formats of message content (e.g., extracting text from content blocks).                                                                                                                |
-| [JSON](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JSONOutputParser.html#langchain_core.output_parsers.json.JSONOutputParser)                                                     | ✅                  | ✅                       |           | `str` \| `Message` | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
+| [JSON](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html)                                                     | ✅                  | ✅                       |           | `str` \| `Message` | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
 | [XML](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser)                                                          | ✅                  | ✅                       |           | `str` \| `Message` | `dict`               | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's).                                                                                                                            |
 | [CSV](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser)                          | ✅                  | ✅                       |           | `str` \| `Message` | `List[str]`          | Returns a list of comma separated values.                                                                                                                                                                                                                |
 | [OutputFixing](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)                                                |                    |                         | ✅         | `str` \| `Message` |                      | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output.                                                                                              |

diff --git a/docs/docs/concepts/vectorstores.mdx b/docs/docs/concepts/vectorstores.mdx
@@ -151,10 +151,10 @@ Many vectorstores support [the `k`](/docs/integrations/vectorstores/pinecone/#qu
 ### Metadata filtering
 
 While vectorstore implement a search algorithm to efficiently search over *all* the embedded documents to find the most similar ones, many also support filtering on metadata.
-This allows structured filters to reduce the size of the similarity search space. These two concepts work well together:
+Metadata filtering helps narrow down the search by applying specific conditions such as retrieving documents from a particular source or date range. These two concepts work well together:
 
-1. **Semantic search**: Query the unstructured data directly, often using via embedding or keyword similarity.
-2. **Metadata search**: Apply structured query to the metadata, filering specific documents.
+1. **Semantic search**: Query the unstructured data directly, often via embedding or keyword similarity.
+2. **Metadata search**: Apply structured query to the metadata, filtering specific documents.
 
 Vector store support for metadata filtering is typically dependent on the underlying vector store implementation.
 

diff --git a/docs/docs/how_to/qa_per_user.ipynb b/docs/docs/how_to/qa_per_user.ipynb
@@ -228,7 +228,7 @@
     "# highlight-next-line\n",
     "def retrieve(state: State, config: RunnableConfig):\n",
     "    # highlight-next-line\n",
-    "    retrieved_docs = configurable_retriever.invoke(state[\"question\"])\n",
+    "    retrieved_docs = configurable_retriever.invoke(state[\"question\"], config)\n",
     "    return {\"context\": retrieved_docs}\n",
     "\n",
     "\n",

diff --git a/docs/docs/integrations/graphs/amazon_neptune_open_cypher.ipynb b/docs/docs/integrations/graphs/amazon_neptune_open_cypher.ipynb
@@ -70,9 +70,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Austin airport has 98 outgoing routes.\n"
+     ]
+    }
+   ],
    "source": [
     "from langchain_aws import ChatBedrockConverse\n",
     "from langchain_aws.chains import create_neptune_opencypher_qa_chain\n",
@@ -83,21 +91,169 @@
     "    temperature=0,\n",
     ")\n",
     "\n",
-    "chain = create_neptune_opencypher_qa_chain(\n",
-    "    llm=llm,\n",
-    "    graph=graph,\n",
-    ")\n",
+    "chain = create_neptune_opencypher_qa_chain(llm=llm, graph=graph)\n",
+    "\n",
+    "result = chain.invoke(\"How many outgoing routes does the Austin airport have?\")\n",
+    "print(result[\"result\"].content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Adding Message History\n",
+    "\n",
+    "The Neptune openCypher QA chain has the ability to be wrapped by [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). This adds message history to the chain, allowing us to create a chatbot that retains conversation state across multiple invocations.\n",
+    "\n",
+    "To start, we need a way to store and load the message history. For this purpose, each thread will be created as an instance of [`InMemoryChatMessageHistory`](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.InMemoryChatMessageHistory.html), and stored into a dictionary for repeated access.\n",
+    "\n",
+    "(Also see: https://python.langchain.com/docs/versions/migrating_memory/chat_history/#chatmessagehistory)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.chat_history import InMemoryChatMessageHistory\n",
+    "\n",
+    "chats_by_session_id = {}\n",
+    "\n",
+    "\n",
+    "def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:\n",
+    "    chat_history = chats_by_session_id.get(session_id)\n",
+    "    if chat_history is None:\n",
+    "        chat_history = InMemoryChatMessageHistory()\n",
+    "        chats_by_session_id[session_id] = chat_history\n",
+    "    return chat_history"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, the QA chain and message history storage can be used to create the new `RunnableWithMessageHistory`. Note that we must set `query` as the input key to match the format expected by the base chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
+    "\n",
+    "runnable_with_history = RunnableWithMessageHistory(\n",
+    "    chain,\n",
+    "    get_chat_history,\n",
+    "    input_messages_key=\"query\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Before invoking the chain, a unique `session_id` needs to be generated for the conversation that the new `InMemoryChatMessageHistory` will remember."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import uuid\n",
     "\n",
-    "result = chain.invoke(\n",
-    "    {\"query\": \"How many outgoing routes does the Austin airport have?\"}\n",
+    "session_id = uuid.uuid4()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, invoke the message history enabled chain with the `session_id`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "You can fly directly to 98 destinations from Austin airport.\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = runnable_with_history.invoke(\n",
+    "    {\"query\": \"How many destinations can I fly to directly from Austin airport?\"},\n",
+    "    config={\"configurable\": {\"session_id\": session_id}},\n",
+    ")\n",
+    "print(result[\"result\"].content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As the chain continues to be invoked with the same `session_id`, responses will be returned in the context of previous queries in the conversation.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "You can fly directly to 4 destinations in Europe from Austin airport.\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = runnable_with_history.invoke(\n",
+    "    {\"query\": \"Out of those destinations, how many are in Europe?\"},\n",
+    "    config={\"configurable\": {\"session_id\": session_id}},\n",
+    ")\n",
+    "print(result[\"result\"].content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The four European destinations you can fly to directly from Austin airport are:\n",
+      "- AMS (Amsterdam Airport Schiphol)\n",
+      "- FRA (Frankfurt am Main)\n",
+      "- LGW (London Gatwick)\n",
+      "- LHR (London Heathrow)\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = runnable_with_history.invoke(\n",
+    "    {\"query\": \"Give me the codes and names of those airports.\"},\n",
+    "    config={\"configurable\": {\"session_id\": session_id}},\n",
     ")\n",
     "print(result[\"result\"].content)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -111,7 +267,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,