Skip to content

Commit

Permalink
Merge pull request #212 from polywrap/chore/improve-deduplicate-algo
Browse files Browse the repository at this point in the history
chore: improve deduplicate algorithm
  • Loading branch information
cbrzn authored Feb 23, 2024
2 parents 6765161 + 9a05ba3 commit f185a9e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
2 changes: 1 addition & 1 deletion workers/fund_public_goods/db/tables/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_projects_lightweight(range_from: int, range_to: int) -> PostgrestAPIResp
return (
db.table("projects")
.select(
"id, title, website, updated_at, description"
"id, title, website, updated_at, description, twitter"
)
.range(range_from, range_to)
.execute()
Expand Down
18 changes: 13 additions & 5 deletions workers/fund_public_goods/lib/strategy/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,23 @@


def get_latest_project_per_website(projects: list[Projects]) -> list[Projects]:
latest_projects: dict[str, Projects] = {}
projects_with_website_as_key: dict[str, Projects] = {}

for project in projects:
project_website = sanitize_url(project.website)
if (project_website not in latest_projects or
project.updated_at > latest_projects[project_website].updated_at):
latest_projects[project_website] = project
if (project_website not in projects_with_website_as_key or
project.updated_at > projects_with_website_as_key[project_website].updated_at):
projects_with_website_as_key[project_website] = project

return list(latest_projects.values())
projects_with_twitter_handle_as_key: dict[str, Projects] = {}
for (website, project) in projects_with_website_as_key.items():
if project.twitter:
if (project.twitter not in projects_with_twitter_handle_as_key):
projects_with_twitter_handle_as_key[project.twitter] = project
else:
projects_with_twitter_handle_as_key[website] = project

return list(projects_with_twitter_handle_as_key.values())

def get_project_text(project_with_answers: tuple[Projects, list[Answer]]) -> str:
(project, answers) = project_with_answers
Expand Down

0 comments on commit f185a9e

Please sign in to comment.