forked from tylerprogramming/ai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding new service for asking questions to a pdf with langchain, vect…
…ordb, and autogen
- Loading branch information
1 parent
7794410
commit da7857d
Showing
4 changed files
with
97 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
OPENAI_API_KEY=sk-1111 | ||
BASE_URL=http://localhost:1234/v1 | ||
AUTOGEN_USE_DOCKER=False | ||
TOKENIZERS_PARALLELISM=false |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
|
||
import autogen | ||
import dotenv | ||
from langchain.chains import ConversationalRetrievalChain | ||
from langchain.memory import ConversationBufferMemory | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain_community.document_loaders import PyPDFLoader | ||
from langchain_community.embeddings import HuggingFaceEmbeddings | ||
from langchain_community.vectorstores import Chroma | ||
from langchain_openai import OpenAI | ||
from typing_extensions import Annotated | ||
|
||
dotenv.load_dotenv() | ||
|
||
# load the pdf file from directory | ||
loaders = [PyPDFLoader('./ancient_rome.pdf')] | ||
docs = [] | ||
for file in loaders: | ||
docs.extend(file.load()) | ||
# split text to chunks | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000) | ||
docs = text_splitter.split_documents(docs) | ||
|
||
# pages = loaders[0].load_and_split() | ||
# print(pages[0]) | ||
|
||
# create a vectorstore | ||
vectorstore = Chroma( | ||
collection_name="full_documents", | ||
embedding_function=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", | ||
model_kwargs={'device': 'cpu'}) | ||
) | ||
vectorstore.add_documents(docs) | ||
|
||
qa = ConversationalRetrievalChain.from_llm( | ||
OpenAI(temperature=0), | ||
vectorstore.as_retriever(), | ||
memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True) | ||
) | ||
|
||
# set config for autogen | ||
config_list = [ | ||
{ | ||
# "base_url": os.getenv("BASE_URL"), | ||
"api_key": os.getenv("OPENAI_API_KEY"), | ||
"model": "gpt-3.5-turbo" | ||
} | ||
] | ||
|
||
# set autogen user agent and assistant agent with function calling | ||
llm_config = { | ||
"timeout": 600, | ||
"seed": 42, | ||
"config_list": config_list, | ||
"temperature": 0.7, | ||
} | ||
|
||
# create an AssistantAgent instance "assistant" | ||
assistant = autogen.AssistantAgent( | ||
name="assistant", | ||
llm_config=llm_config, | ||
) | ||
|
||
# create a UserProxyAgent instance "user_proxy" | ||
user_proxy = autogen.UserProxyAgent( | ||
name="user_proxy", | ||
human_input_mode="NEVER", | ||
max_consecutive_auto_reply=1, | ||
code_execution_config=False, | ||
llm_config=llm_config, | ||
system_message="""Reply TERMINATE if the task has been solved at full satisfaction. | ||
Otherwise, reply CONTINUE, or the reason why the task is not solved yet.""" | ||
) | ||
|
||
|
||
@user_proxy.register_for_execution() | ||
@assistant.register_for_llm(description="PDF Assistant.") | ||
def chat_docs(question: Annotated[str, "The question to be asked"]) -> str: | ||
response = qa({"question": question}) | ||
return response["answer"] | ||
|
||
|
||
user_proxy.initiate_chat( | ||
assistant, | ||
message=""" | ||
Find the answers to the question below from the ancient_rome.pdf and do not write any code. | ||
1. Can you give me a multiple choice question from the pdf with 4 choices and 1 of them being the answer? Also give the reason for the answer and the page it was found on | ||
Start the work now. | ||
""" | ||
) |