Skip to content

Commit

Permalink
chore: improve deduplicate algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
cbrzn committed Feb 21, 2024
1 parent 498c0d9 commit 9f2c9dc
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions workers/fund_public_goods/lib/strategy/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,21 @@


def get_latest_project_per_website(projects: list[Projects]) -> list[Projects]:
latest_projects: dict[str, Projects] = {}
projects_with_website_as_key: dict[str, Projects] = {}

for project in projects:
project_website = sanitize_url(project.website)
if (project_website not in latest_projects or
project.updated_at > latest_projects[project_website].updated_at):
latest_projects[project_website] = project
if (project_website not in projects_with_website_as_key or
project.updated_at > projects_with_website_as_key[project_website].updated_at):
projects_with_website_as_key[project_website] = project

return list(latest_projects.values())
projects_with_twitter_handle_as_key: dict[str, Projects] = {}

for project in list(projects_with_website_as_key.values()):
if (project.twitter not in projects_with_twitter_handle_as_key or project.updated_at > projects_with_website_as_key[project_website].updated_at):
projects_with_twitter_handle_as_key[project.twitter] = project

return list(projects_with_twitter_handle_as_key.values())

def get_project_text(project_with_answers: tuple[Projects, list[Answer]]) -> str:
(project, answers) = project_with_answers
Expand Down

0 comments on commit 9f2c9dc

Please sign in to comment.