-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #214 from polywrap/dev
merge dev
- Loading branch information
Showing
11 changed files
with
1,797 additions
and
1,906 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 3 additions & 8 deletions
11
workers/fund_public_goods/lib/strategy/utils/fetch_matching_projects.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,13 @@ | ||
from fund_public_goods.db.entities import Projects | ||
from fund_public_goods.db.tables.projects import get_all_projects_lightweight, get_projects_by_ids | ||
from fund_public_goods.db.tables.projects import get_projects_by_ids | ||
from fund_public_goods.lib.strategy.models.answer import Answer | ||
from fund_public_goods.lib.strategy.utils.get_top_matching_projects import get_top_matching_projects | ||
from fund_public_goods.lib.strategy.utils.utils import get_latest_project_per_website | ||
|
||
|
||
def fetch_matching_projects(prompt: str) -> list[tuple[Projects, list[Answer]]]: | ||
projects_to_rank = get_all_projects_lightweight() | ||
|
||
deduplicated_projects = get_latest_project_per_website(projects_to_rank) | ||
matching_projects = get_top_matching_projects(prompt, deduplicated_projects)[:10] | ||
|
||
matching_projects = get_top_matching_projects(prompt)[:10] | ||
matched_ids = [p.id for p in matching_projects] | ||
|
||
matching_projects_with_answers = get_projects_by_ids(matched_ids) | ||
|
||
return matching_projects_with_answers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from fund_public_goods.db.app_db import load_env | ||
from fund_public_goods.lib.strategy.utils.utils import get_latest_project_per_website | ||
from langchain.text_splitter import CharacterTextSplitter | ||
from fund_public_goods.db.tables.projects import get_all_projects_lightweight | ||
from langchain_openai import OpenAIEmbeddings | ||
from langchain_pinecone import Pinecone | ||
from langchain.schema import Document | ||
|
||
|
||
env = load_env() | ||
|
||
def run(): | ||
projects = get_all_projects_lightweight() | ||
deduplicated_projects = get_latest_project_per_website(projects) | ||
|
||
text_splitter = CharacterTextSplitter.from_tiktoken_encoder( | ||
chunk_size=200, | ||
chunk_overlap=10, | ||
separator=" ", | ||
keep_separator=True | ||
) | ||
|
||
documents: list[Document] = [] | ||
|
||
for project in deduplicated_projects: | ||
description_chunks = text_splitter.split_text(project.description) | ||
|
||
for description_chunk in description_chunks: | ||
doc = Document(page_content=description_chunk, metadata={ | ||
"id": project.id, | ||
}) | ||
|
||
documents.append(doc) | ||
|
||
vectorstore = Pinecone( | ||
index_name=env.pinecone_index, | ||
embedding=OpenAIEmbeddings(), | ||
pinecone_api_key=env.pinecone_key | ||
) | ||
|
||
try: | ||
vectorstore.delete(delete_all=True) | ||
except: | ||
print("Pinecone index empty. Skipping deletion.") | ||
|
||
vectorstore.add_documents(documents) |
Oops, something went wrong.