Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release Feb 21st #211

Merged
merged 9 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions workers/fund_public_goods/db/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,6 @@ class Projects(BaseModel):
website: str
logo: Optional[str] = None
twitter: Optional[str] = None
keywords: list[str] = []
categories: list[str] = []
impact_funding_report: Optional[str] = Field(..., alias="impactFundingReport")
impact: Optional[float] = None
funding_needed: Optional[float] = Field(..., alias="fundingNeeded")
Expand Down
83 changes: 54 additions & 29 deletions workers/fund_public_goods/db/tables/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ def upsert(
"website": row.website,
"twitter": row.twitter,
"short_description": row.short_description,
"keywords": row.keywords,
"categories": row.categories,
"logo": row.logo,
"funding_needed": row.funding_needed,
"impact_funding_report": row.impact_funding_report,
Expand All @@ -37,8 +35,6 @@ def upsert_multiple(
"website": row.website,
"twitter": row.twitter,
"short_description": row.short_description,
"keywords": row.keywords,
"categories": row.categories,
"logo": row.logo,
"funding_needed": row.funding_needed,
"impact_funding_report": row.impact_funding_report,
Expand Down Expand Up @@ -69,8 +65,6 @@ def sanitize_projects_information(projects: list[dict[str, Any]]) -> list[tuple[
website=project_data.get("website", ""),
twitter=project_data.get("twitter", ""),
logo=project_data.get("logo", ""),
keywords=project_data.get("keywords", []),
categories=project_data.get("categories", []),
short_description=project_data.get("short_description", None),
funding_needed=project_data.get("funding_needed", None),
impact=project_data.get("impact", None),
Expand All @@ -82,35 +76,66 @@ def sanitize_projects_information(projects: list[dict[str, Any]]) -> list[tuple[
return projects_with_answers


def get_unique_categories() -> list[str]:
def get_projects_lightweight(range_from: int, range_to: int) -> PostgrestAPIResponse[dict[str, Any]]:
db = create_admin()
response: PostgrestAPIResponse[list[dict[str, str]]] = (
db.table("unique_categories_views").select("*").execute()
return (
db.table("projects")
.select(
"id, title, website, updated_at, description"
)
.range(range_from, range_to)
.execute()
)
if not response.data:
return []

categories = []

for row in response.data:
categories.append(row["category"]) # type: ignore

return categories

def fetch_projects_by_category(categories: list[str]) -> list[tuple[Projects, list[Answer]]]:
results = get_projects_from_description(categories).data
sanitized_projects = sanitize_projects_information(results)
return sanitized_projects

def get_projects_from_description(categories: list[str]):

def get_projects_by_ids(ids: list[str]) -> list[tuple[Projects, list[Answer]]]:
db = create_admin()
request = (
results = (
db.table("projects")
.select(
"* applications(id, recipient, round, answers)"
"*, applications(id, recipient, round, answers)"
)
.ov("categories", categories)
.in_('id', ids)
.execute()
)

return sanitize_projects_information(results.data)


def get_all_projects_lightweight() -> list[Projects]:
all_results: list[dict[str, Any]] = []
current_from = 0
page_size = 999
while True:
current_to = current_from + page_size
results = get_projects_lightweight(current_from, current_to).data
all_results.extend(results)

if len(results) < page_size:
break

current_from += page_size

projects: list[Projects] = []

for item in all_results:
# Remove all None values
project_data = {k: v for k, v in item.items() if v is not None}

project = Projects(
id=project_data.get("id", ""),
updated_at=project_data.get("updated_at", ""),
title=project_data.get("title", ""),
description=project_data.get("description", ""),
website=project_data.get("website", ""),
twitter=project_data.get("twitter", ""),
logo=project_data.get("logo", ""),
short_description=project_data.get("short_description", None),
funding_needed=project_data.get("funding_needed", None),
impact=project_data.get("impact", None),
impact_funding_report=project_data.get("impact_funding_report", None),
)

projects.append(project)

return projects

return request
47 changes: 0 additions & 47 deletions workers/fund_public_goods/lib/strategy/utils/categorize_project.py

This file was deleted.

53 changes: 0 additions & 53 deletions workers/fund_public_goods/lib/strategy/utils/categorize_prompt.py

This file was deleted.

102 changes: 0 additions & 102 deletions workers/fund_public_goods/lib/strategy/utils/constants.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
from fund_public_goods.db.entities import Projects
from fund_public_goods.db.tables.projects import fetch_projects_by_category, get_unique_categories
from fund_public_goods.db.tables.projects import get_all_projects_lightweight, get_projects_by_ids
from fund_public_goods.lib.strategy.models.answer import Answer
from fund_public_goods.lib.strategy.utils.categorize_prompt import categorize_prompt
from fund_public_goods.lib.strategy.utils.get_top_matching_projects import get_top_matching_projects
from fund_public_goods.lib.strategy.utils.utils import get_latest_project_per_website


def fetch_matching_projects(prompt: str) -> list[tuple[Projects, list[Answer]]]:
prompt_categories = categorize_prompt(prompt, get_unique_categories())
fetched_projects = fetch_projects_by_category(prompt_categories)

answers_by_id = { project.id: answers for (project, answers) in fetched_projects }
projects = [project for (project, _) in fetched_projects]
projects_to_rank = get_all_projects_lightweight()

deduplicated_projects = get_latest_project_per_website(projects)
deduplicated_projects = get_latest_project_per_website(projects_to_rank)
matching_projects = get_top_matching_projects(prompt, deduplicated_projects)[:10]

matching_projects_with_answers = [(project, answers_by_id[project.id]) for project in matching_projects]
matched_ids = [p.id for p in matching_projects]

matching_projects_with_answers = get_projects_by_ids(matched_ids)

return matching_projects_with_answers
33 changes: 33 additions & 0 deletions workers/fund_public_goods/lib/strategy/utils/generate_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser


queries_prompt_template = """
Your goal is to provide a list of queries that will be used to perform
and embeddings search over different project descriptions and get the ones
that best match the user's interests. All projects are public goods funding
projects in the crypto ecosystem.

Provide a maximum of {n} queries.

This is the user's interest: {prompt}

Respond strictly with a comma-separated list of queries, without quotes
"""


def generate_queries(prompt: str, n) -> list[str]:
queries_prompt = ChatPromptTemplate.from_messages([
("system", queries_prompt_template),
])
llm = ChatOpenAI(model="gpt-4-1106-preview") # type: ignore

queries_chain = queries_prompt | llm | CommaSeparatedListOutputParser()

queries = queries_chain.invoke({
"prompt": prompt,
"n": n,
})

return queries
Loading
Loading