From 2ae3fb39858f92adcc818fd9bb541d2a7883ee19 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 7 Aug 2024 09:50:56 +0200 Subject: [PATCH] fix(progress): check for resolved conditions in dataset count Signed-off-by: David Wallace --- rdmo/projects/progress.py | 97 ++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 39c4cd8a06..15c88e1f18 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -1,49 +1,62 @@ from collections import defaultdict from itertools import chain +from typing import Dict, Set -from django.db.models import Exists, OuterRef, Q +from django.db.models import Exists, OuterRef, Q, QuerySet from rdmo.conditions.models import Condition from rdmo.core.utils import markdown2html +from rdmo.projects.models import Value from rdmo.questions.models import Page, Question, QuestionSet -def resolve_conditions(project, values): +def get_conditions_from_catalog(catalog): # get all conditions for this catalog - pages_conditions_subquery = Page.objects.filter_by_catalog(project.catalog) \ + pages_conditions_subquery = Page.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - questionsets_conditions_subquery = QuestionSet.objects.filter_by_catalog(project.catalog) \ + questionsets_conditions_subquery = QuestionSet.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - questions_conditions_subquery = Question.objects.filter_by_catalog(project.catalog) \ + questions_conditions_subquery = Question.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - catalog_conditions = Condition.objects.annotate(has_page=Exists(pages_conditions_subquery)) \ - .annotate(has_questionset=Exists(questionsets_conditions_subquery)) \ - .annotate(has_question=Exists(questions_conditions_subquery)) \ - .filter(Q(has_page=True) | Q(has_questionset=True) | Q(has_question=True)) \ - .distinct().select_related('source', 'target_option') + return Condition.objects.annotate( + has_page=Exists(pages_conditions_subquery), + has_questionset=Exists(questionsets_conditions_subquery), + has_question=Exists(questions_conditions_subquery) + ).filter( + Q(has_page=True) | Q(has_questionset=True) | Q(has_question=True) + ).distinct().select_related('source', 'target_option') - # evaluate conditions - conditions = set() - for condition in catalog_conditions: - if condition.resolve(values): - conditions.add(condition.id) +def evaluate_conditions(conditions: QuerySet, values: QuerySet[Value]) -> Dict[int, Set[int]]: + # evaluate conditions and # return all true conditions for this project - return conditions + condition_true_ids = defaultdict(set) + for condition in conditions: + resolved_value_set_indexes = {value.set_index for value in values if condition.resolve([value])} + if resolved_value_set_indexes: + condition_true_ids[condition.id].update(resolved_value_set_indexes) + return condition_true_ids + + +def compute_sets_from_values(values): + # compute sets from values (including empty values) + sets = defaultdict(lambda: defaultdict(list)) + for attribute, set_prefix, set_index in values.distinct_list(): + sets[attribute][set_prefix].append(set_index) + return sets def compute_navigation(section, project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') + # get conditions from project.catalog + conditions = get_conditions_from_catalog(project.catalog) # get true conditions - conditions = resolve_conditions(project, values) - + condition_true_ids = evaluate_conditions(conditions, values) # compute sets from values (including empty values) - sets = defaultdict(lambda: defaultdict(list)) - for attribute, set_prefix, set_index in values.distinct_list(): - sets[attribute][set_prefix].append(set_index) + sets = compute_sets_from_values(values) # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() @@ -62,11 +75,11 @@ def compute_navigation(section, project, snapshot=None): navigation_section['pages'] = [] for page in catalog_section.elements: - pages_conditions = {page.id for page in page.conditions.all()} - show = bool(not pages_conditions or pages_conditions.intersection(conditions)) + pages_condition_ids = {page.id for page in page.conditions.all()} + show = bool(not pages_condition_ids or pages_condition_ids.intersection(condition_true_ids)) # count the total number of questions, taking sets and conditions into account - counts = count_questions(page, sets, conditions) + counts = count_questions(page, sets, condition_true_ids) # filter the values_list for the attributes, and compute the total sum of counts count = len(tuple(filter(lambda value: value[0] in counts.keys(), values_list))) @@ -94,20 +107,18 @@ def compute_progress(project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') + # get conditions for project and catalog + conditions = get_conditions_from_catalog(project.catalog) # get true conditions - conditions = resolve_conditions(project, values) - + condition_true_ids = evaluate_conditions(conditions, values) # compute sets from values (including empty values) - sets = defaultdict(lambda: defaultdict(list)) - for attribute, set_prefix, set_index in values.distinct_list(): - sets[attribute][set_prefix].append(set_index) + sets = compute_sets_from_values(values) # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() - # count the total number of questions, taking sets and conditions into account - counts = count_questions(project.catalog, sets, conditions) + counts = count_questions(project.catalog, sets, condition_true_ids) # filter the values_list for the attributes, and compute the total sum of counts count = len(tuple(filter(lambda value: value[0] in counts.keys(), values_list))) @@ -141,6 +152,7 @@ def count_questions(element, sets, conditions): set_count = len(counted_sets) else: + counted_sets = set() set_count = 1 # loop over all children of this element @@ -149,21 +161,30 @@ def count_questions(element, sets, conditions): if isinstance(child, (Page, QuestionSet, Question)): child_conditions = {condition.id for condition in child.conditions.all()} else: - child_conditions = [] + child_conditions = set() if not child_conditions or child_conditions.intersection(conditions): - if isinstance(child, Question): + if isinstance(child, Question) and child.attribute: # for regular questions add the set_count to the counts dict, since the # question should be answered in every set # for optional questions add just the number of present answers, so that # only answered questions count for the progress/navigation # use the max function, since the same attribute could appear twice in the tree - if child.attribute is not None: - if child.is_optional: - child_count = sum(len(set_indexes) for set_indexes in sets[child.attribute.id].values()) - counts[child.attribute.id] = max(counts[child.attribute.id], child_count) + if child.is_optional: + child_count = sum(len(set_indexes) for set_indexes in sets[child.attribute.id].values()) + counts[child.attribute.id] = max(counts[child.attribute.id], child_count) + else: + resolved_condition_sets = set() + condition_intersection = list(child_conditions.intersection(conditions)) + # update the set_count for the current child element + # check for the sets that have conditions resolved to true + for child_condition in condition_intersection: + resolved_condition_sets.update(conditions[child_condition]) + if condition_intersection: + current_set_count = len(counted_sets & resolved_condition_sets) else: - counts[child.attribute.id] = max(counts[child.attribute.id], set_count) + current_set_count = set_count + counts[child.attribute.id] = max(counts[child.attribute.id], current_set_count) else: # for everything else, call this function recursively counts.update(count_questions(child, sets, conditions))