Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Organic query selection revision #192

Open
wants to merge 34 commits into
base: production
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
015228a
Revert "Feat: Make synth data generation more customiseable (#67)"
tripathiarpan20 Oct 17, 2024
4fcc073
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 5, 2024
66572cb
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 6, 2024
c5e8614
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 13, 2024
c598855
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 13, 2024
3452271
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 14, 2024
7e30ab0
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 14, 2024
2ba59ba
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 15, 2024
ca3c561
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 18, 2024
5c28570
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 19, 2024
3028e03
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 20, 2024
b4e3c7b
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 20, 2024
5486f20
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 21, 2024
efbe10a
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 21, 2024
4a9b87a
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 21, 2024
c430cd3
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 25, 2024
0bc088a
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 27, 2024
26ae982
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 27, 2024
0b5d595
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 28, 2024
6168ab9
Merge branch 'namoray:production' into production
tripathiarpan20 Nov 30, 2024
3d92303
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 4, 2024
fb3905e
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 5, 2024
54a6cb0
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 6, 2024
39ef8c1
Merge branch 'production' of https://github.com/tripathiarpan20/ninet…
tripathiarpan20 Dec 8, 2024
b5ff5cf
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 8, 2024
fe1d09e
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 9, 2024
4721a64
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 9, 2024
1db5819
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 9, 2024
99f1666
Merge branch 'production' of https://github.com/tripathiarpan20/ninet…
tripathiarpan20 Dec 11, 2024
eddbe82
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 12, 2024
b7b7ca7
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 12, 2024
e583a0c
Merge branch 'namoray:production' into production
tripathiarpan20 Dec 13, 2024
fe0424a
updated organic contender selection
tripathiarpan20 Dec 13, 2024
f94b8b3
shuffle chosen top_x contenders
tripathiarpan20 Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 22 additions & 27 deletions validator/db/src/sql/contenders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from asyncpg import Connection
import random

from validator.db.src.database import PSQLDB
from validator.models import Contender, PeriodScore, calculate_period_score
from validator.utils.database import database_constants as dcst
Expand Down Expand Up @@ -160,12 +159,13 @@ async def get_contenders_for_organic_task(psql_db: PSQLDB, task: str, top_x: int
async with await psql_db.connection() as connection:
rows = await connection.fetch(
f"""
WITH ranked_contenders AS (
WITH filtered_contenders AS (
SELECT DISTINCT ON (c.{dcst.NODE_HOTKEY}, c.{dcst.TASK})
c.{dcst.CONTENDER_ID}, c.{dcst.NODE_HOTKEY}, c.{dcst.NODE_ID}, c.{dcst.TASK},
c.{dcst.RAW_CAPACITY}, c.{dcst.CAPACITY_TO_SCORE}, c.{dcst.CONSUMED_CAPACITY},
c.{dcst.TOTAL_REQUESTS_MADE}, c.{dcst.REQUESTS_429}, c.{dcst.REQUESTS_500},
c.{dcst.CAPACITY}, c.{dcst.PERIOD_SCORE}, c.{dcst.NETUID}, s.{dcst.COLUMN_NORMALISED_NET_SCORE}
c.{dcst.CAPACITY}, c.{dcst.PERIOD_SCORE}, c.{dcst.NETUID}, s.{dcst.COLUMN_NORMALISED_NET_SCORE},
(c.{dcst.TOTAL_REQUESTS_MADE} - c.{dcst.REQUESTS_429} - c.{dcst.REQUESTS_500})::FLOAT / NULLIF(c.{dcst.TOTAL_REQUESTS_MADE}, 0) AS success_ratio
FROM {dcst.CONTENDERS_TABLE} c
JOIN {dcst.NODES_TABLE} n ON c.{dcst.NODE_ID} = n.{dcst.NODE_ID} AND c.{dcst.NETUID} = n.{dcst.NETUID}
JOIN {dcst.CONTENDERS_WEIGHTS_STATS_TABLE} s ON c.{dcst.NODE_HOTKEY} = s.{dcst.NODE_HOTKEY}
Expand All @@ -175,37 +175,32 @@ async def get_contenders_for_organic_task(psql_db: PSQLDB, task: str, top_x: int
AND n.{dcst.SYMMETRIC_KEY_UUID} IS NOT NULL
AND s.{dcst.COLUMN_NORMALISED_NET_SCORE} IS NOT NULL
AND s.{dcst.COLUMN_NORMALISED_NET_SCORE} > 0
ORDER BY c.{dcst.NODE_HOTKEY}, c.{dcst.TASK}, s.{dcst.COLUMN_NORMALISED_NET_SCORE} DESC, s.{dcst.CREATED_AT} DESC
),
percentiles AS (
SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY {dcst.COLUMN_NORMALISED_NET_SCORE}) AS score_75th
FROM filtered_contenders
),
top_contenders AS (
SELECT f.*
FROM filtered_contenders f, percentiles p
WHERE f.{dcst.COLUMN_NORMALISED_NET_SCORE} >= p.score_75th
ORDER BY success_ratio DESC, f.{dcst.CONSUMED_CAPACITY}
LIMIT $2
)
SELECT *
FROM ranked_contenders
FROM top_contenders
""",
task,
top_x
)
logger.debug(f"Number of valid contenders for task {task} for organic query : {len(rows)}")

contenders = [Contender(**{k: v for k, v in row.items() if k != dcst.COLUMN_NORMALISED_NET_SCORE}) for row in rows]
scores = {c.node_id: row[dcst.COLUMN_NORMALISED_NET_SCORE] for c, row in zip(contenders, rows)}

if contenders:
if len(contenders) > top_x:
top_x_percent = contenders[:max(1, int(gcst.ORGANIC_SELECT_CONTENDER_LOW_POURC * len(contenders)))] # top 75%
best_top_x_percent = top_x_percent[:max(1, int(gcst.ORGANIC_TOP_POURC * len(top_x_percent)))] # top 25% within the top 75%
remaining_top_x_percent = top_x_percent[len(best_top_x_percent):]

best_top_x_weights = [gcst.ORGANIC_TOP_POURC_FACTOR / (len(top_x_percent) + 1) for _ in range(len(best_top_x_percent))] # higher weight for top 25%
remaining_top_x_weights = [1 / (len(top_x_percent) + 1) for _ in range(len(remaining_top_x_percent))]

combined_contenders = best_top_x_percent + remaining_top_x_percent
combined_weights = best_top_x_weights + remaining_top_x_weights
logger.debug(f"Number of valid contenders for task {task} for organic query : {len(rows)}")

selected_contenders = random.choices(combined_contenders, weights=combined_weights, k=min(top_x, len(combined_contenders)))
selected_contenders = sorted(selected_contenders, key=lambda x: scores[x.node_id], reverse=True)
logger.debug(f"Selected contenders for task {task} : {selected_contenders}")
return selected_contenders
else:
logger.debug(f"Number of contenders ({len(contenders)}) < top_x ({top_x}). Returning all contenders")
return contenders
if rows:
contenders = [Contender(**{k: v for k, v in row.items() if k != dcst.COLUMN_NORMALISED_NET_SCORE}) for row in rows]
random.shuffle(contenders)
logger.debug(f"Selected contenders for task {task}: {contenders}")
return contenders
else:
logger.debug(f"Contenders selection for organic queries with task {task} yielded nothing (probably statistiques table is empty), falling back to synthetic queries logic.")
return await get_contenders_for_synthetic_task(psql_db, task, top_x)
Expand Down