From 4e99174dbb2df72936b43ab380e3a292f0bb3e8d Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 7 Aug 2024 15:41:11 +0200 Subject: [PATCH 1/7] fix(progress): add check for resolved conditions in dataset count (#1103) Signed-off-by: David Wallace --- rdmo/projects/progress.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 39c4cd8a06..1d804f4a34 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -24,10 +24,11 @@ def resolve_conditions(project, values): .distinct().select_related('source', 'target_option') # evaluate conditions - conditions = set() + conditions = defaultdict(set) for condition in catalog_conditions: if condition.resolve(values): - conditions.add(condition.id) + resolved_value_set_indexes = {value.set_index for value in values if condition.resolve([value])} + conditions[condition.id].update(resolved_value_set_indexes) # return all true conditions for this project return conditions @@ -141,6 +142,7 @@ def count_questions(element, sets, conditions): set_count = len(counted_sets) else: + counted_sets = set() set_count = 1 # loop over all children of this element @@ -149,7 +151,7 @@ def count_questions(element, sets, conditions): if isinstance(child, (Page, QuestionSet, Question)): child_conditions = {condition.id for condition in child.conditions.all()} else: - child_conditions = [] + child_conditions = set() if not child_conditions or child_conditions.intersection(conditions): if isinstance(child, Question): @@ -163,7 +165,17 @@ def count_questions(element, sets, conditions): child_count = sum(len(set_indexes) for set_indexes in sets[child.attribute.id].values()) counts[child.attribute.id] = max(counts[child.attribute.id], child_count) else: - counts[child.attribute.id] = max(counts[child.attribute.id], set_count) + resolved_condition_sets = set() + condition_intersection = list(child_conditions.intersection(conditions)) + # update the set_count for the current child element + # check for the sets that have conditions resolved to true + for child_condition in condition_intersection: + resolved_condition_sets.update(conditions[child_condition]) + if condition_intersection: + current_set_count = len(counted_sets & resolved_condition_sets) + else: + current_set_count = set_count + counts[child.attribute.id] = max(counts[child.attribute.id], current_set_count) else: # for everything else, call this function recursively counts.update(count_questions(child, sets, conditions)) From 9ab99d1d7fda642ea42a6c0b550780b83461df05 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 7 Aug 2024 16:18:22 +0200 Subject: [PATCH 2/7] Some renaming, small changes and add refactoring again --- rdmo/projects/progress.py | 83 ++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 1d804f4a34..31b1090f15 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -8,43 +8,49 @@ from rdmo.questions.models import Page, Question, QuestionSet -def resolve_conditions(project, values): - # get all conditions for this catalog - pages_conditions_subquery = Page.objects.filter_by_catalog(project.catalog) \ +def get_catalog_conditions(catalog): + pages_conditions_subquery = Page.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - questionsets_conditions_subquery = QuestionSet.objects.filter_by_catalog(project.catalog) \ + questionsets_conditions_subquery = QuestionSet.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - questions_conditions_subquery = Question.objects.filter_by_catalog(project.catalog) \ + questions_conditions_subquery = Question.objects.filter_by_catalog(catalog) \ .filter(conditions=OuterRef('pk')) - catalog_conditions = Condition.objects.annotate(has_page=Exists(pages_conditions_subquery)) \ - .annotate(has_questionset=Exists(questionsets_conditions_subquery)) \ - .annotate(has_question=Exists(questions_conditions_subquery)) \ - .filter(Q(has_page=True) | Q(has_questionset=True) | Q(has_question=True)) \ - .distinct().select_related('source', 'target_option') + return Condition.objects.annotate( + has_page=Exists(pages_conditions_subquery), + has_questionset=Exists(questionsets_conditions_subquery), + has_question=Exists(questions_conditions_subquery) + ).filter( + Q(has_page=True) | Q(has_questionset=True) | Q(has_question=True) + ).distinct().select_related('source', 'target_option') - # evaluate conditions - conditions = defaultdict(set) - for condition in catalog_conditions: - if condition.resolve(values): - resolved_value_set_indexes = {value.set_index for value in values if condition.resolve([value])} - conditions[condition.id].update(resolved_value_set_indexes) - # return all true conditions for this project +def resolve_conditions(catalog, values): + # resolve conditions and return for each condition the set_indexes which resolved true + conditions = defaultdict(set) + for condition in get_catalog_conditions(catalog): + resolved_set_indexes = {value.set_index for value in values if condition.resolve([value])} + conditions[condition.id].update(resolved_set_indexes) return conditions +def compute_sets(values): + # compute sets from values (including empty values) + sets = defaultdict(lambda: defaultdict(list)) + for attribute, set_prefix, set_index in values.distinct_list(): + sets[attribute][set_prefix].append(set_index) + return sets + + def compute_navigation(section, project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') - # get true conditions - conditions = resolve_conditions(project, values) + # resolve all conditions to get a dict mapping conditions to set_indexes + conditions = resolve_conditions(project.catalog, values) # compute sets from values (including empty values) - sets = defaultdict(lambda: defaultdict(list)) - for attribute, set_prefix, set_index in values.distinct_list(): - sets[attribute][set_prefix].append(set_index) + sets = compute_sets(values) # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() @@ -95,13 +101,11 @@ def compute_progress(project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') - # get true conditions - conditions = resolve_conditions(project, values) + # resolve all conditions to get a dict mapping conditions to set_indexes + conditions = resolve_conditions(project.catalog, values) # compute sets from values (including empty values) - sets = defaultdict(lambda: defaultdict(list)) - for attribute, set_prefix, set_index in values.distinct_list(): - sets[attribute][set_prefix].append(set_index) + sets = compute_sets(values) # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() @@ -153,7 +157,11 @@ def count_questions(element, sets, conditions): else: child_conditions = set() - if not child_conditions or child_conditions.intersection(conditions): + # compute the intersection of the condition of this child with the full set of conditions + condition_intersection = child_conditions.intersection(conditions) + + # check if the element either has no condition or its conditions intersect with the full set of conditions + if not child_conditions or condition_intersection: if isinstance(child, Question): # for regular questions add the set_count to the counts dict, since the # question should be answered in every set @@ -165,17 +173,18 @@ def count_questions(element, sets, conditions): child_count = sum(len(set_indexes) for set_indexes in sets[child.attribute.id].values()) counts[child.attribute.id] = max(counts[child.attribute.id], child_count) else: - resolved_condition_sets = set() - condition_intersection = list(child_conditions.intersection(conditions)) - # update the set_count for the current child element - # check for the sets that have conditions resolved to true - for child_condition in condition_intersection: - resolved_condition_sets.update(conditions[child_condition]) if condition_intersection: - current_set_count = len(counted_sets & resolved_condition_sets) + # update the set_count for the current child element + # count only the sets that have conditions resolved to true + resolved_set_indexes = set() + for condition in condition_intersection: + resolved_set_indexes.update(conditions[condition]) + + current_count = len(counted_sets & resolved_set_indexes) else: - current_set_count = set_count - counts[child.attribute.id] = max(counts[child.attribute.id], current_set_count) + current_count = set_count + + counts[child.attribute.id] = max(counts[child.attribute.id], current_count) else: # for everything else, call this function recursively counts.update(count_questions(child, sets, conditions)) From 9de51ee0b6de1db97ae15d38dc7665ac650599c4 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 7 Aug 2024 17:05:28 +0200 Subject: [PATCH 3/7] fix(progress): add if block to resolve_conditions Signed-off-by: David Wallace --- rdmo/projects/progress.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 31b1090f15..dc8ba37b81 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -29,8 +29,9 @@ def resolve_conditions(catalog, values): # resolve conditions and return for each condition the set_indexes which resolved true conditions = defaultdict(set) for condition in get_catalog_conditions(catalog): - resolved_set_indexes = {value.set_index for value in values if condition.resolve([value])} - conditions[condition.id].update(resolved_set_indexes) + if condition.resolve(values): + resolved_value_set_indexes = {value.set_index for value in values if condition.resolve([value])} + conditions[condition.id].update(resolved_value_set_indexes) return conditions From 0cb9c8421917a5913f5f2ee2271ecffbe759897a Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 7 Aug 2024 19:29:10 +0200 Subject: [PATCH 4/7] Consider set_prefix and set_index in resolve_conditions and fix show computation --- rdmo/projects/progress.py | 41 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index dc8ba37b81..38cd09df23 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -25,13 +25,18 @@ def get_catalog_conditions(catalog): ).distinct().select_related('source', 'target_option') -def resolve_conditions(catalog, values): - # resolve conditions and return for each condition the set_indexes which resolved true +def resolve_conditions(catalog, values, sets): + # resolve all conditions and return for each condition the set_prefix and set_index for which it resolved true conditions = defaultdict(set) for condition in get_catalog_conditions(catalog): - if condition.resolve(values): - resolved_value_set_indexes = {value.set_index for value in values if condition.resolve([value])} - conditions[condition.id].update(resolved_value_set_indexes) + conditions[condition.id] = { + (set_prefix, set_index) + for set_prefixes in sets.values() + for set_prefix, set_indexes in set_prefixes.items() + for set_index in set_indexes + if condition.resolve(values, set_prefix=set_prefix, set_index=set_index) + } + return conditions @@ -47,12 +52,12 @@ def compute_navigation(section, project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') - # resolve all conditions to get a dict mapping conditions to set_indexes - conditions = resolve_conditions(project.catalog, values) - # compute sets from values (including empty values) sets = compute_sets(values) + # resolve all conditions to get a dict mapping conditions to set_indexes + conditions = resolve_conditions(project.catalog, values, sets) + # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() @@ -71,7 +76,18 @@ def compute_navigation(section, project, snapshot=None): for page in catalog_section.elements: pages_conditions = {page.id for page in page.conditions.all()} - show = bool(not pages_conditions or pages_conditions.intersection(conditions)) + + # show only pages with resolved conditions, but show all pages without conditions + if pages_conditions: + # check if any valuesets for set_prefix = '' resolved + # for non collection pages restrict further to set_index = 0 + show = any( + (set_prefix == '') and (page.is_collection or set_index == 0) + for page_condition in pages_conditions + for set_prefix, set_index in conditions[page_condition] + ) + else: + show = True # count the total number of questions, taking sets and conditions into account counts = count_questions(page, sets, conditions) @@ -102,16 +118,15 @@ def compute_progress(project, snapshot=None): # get all values for this project and snapshot values = project.values.filter(snapshot=snapshot).select_related('attribute', 'option') - # resolve all conditions to get a dict mapping conditions to set_indexes - conditions = resolve_conditions(project.catalog, values) - # compute sets from values (including empty values) sets = compute_sets(values) + # resolve all conditions to get a dict mapping conditions to set_indexes + conditions = resolve_conditions(project.catalog, values, sets) + # query distinct, non empty set values values_list = values.exclude_empty().distinct_list() - # count the total number of questions, taking sets and conditions into account counts = count_questions(project.catalog, sets, conditions) From b1a97cfd082bb4dba60c678e081428f0aa3c1e66 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 7 Aug 2024 21:58:27 +0200 Subject: [PATCH 5/7] Refactor count questions to properly consider page/questionset conditions --- rdmo/projects/progress.py | 91 +++++++++++++++----------- rdmo/projects/tests/test_navigation.py | 22 +++---- rdmo/projects/tests/test_progress.py | 2 +- 3 files changed, 66 insertions(+), 49 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 38cd09df23..2e5bcb0d8d 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -28,23 +28,22 @@ def get_catalog_conditions(catalog): def resolve_conditions(catalog, values, sets): # resolve all conditions and return for each condition the set_prefix and set_index for which it resolved true conditions = defaultdict(set) - for condition in get_catalog_conditions(catalog): - conditions[condition.id] = { - (set_prefix, set_index) - for set_prefixes in sets.values() - for set_prefix, set_indexes in set_prefixes.items() - for set_index in set_indexes - if condition.resolve(values, set_prefix=set_prefix, set_index=set_index) - } + if sets: + for condition in get_catalog_conditions(catalog): + conditions[condition.id] = { + (set_prefix, set_index) + for set_prefix, set_index in chain.from_iterable(sets.values()) + if condition.resolve(values, set_prefix=set_prefix, set_index=set_index) + } return conditions def compute_sets(values): # compute sets from values (including empty values) - sets = defaultdict(lambda: defaultdict(list)) + sets = defaultdict(set) for attribute, set_prefix, set_index in values.distinct_list(): - sets[attribute][set_prefix].append(set_index) + sets[attribute].add((set_prefix, set_index)) return sets @@ -140,44 +139,66 @@ def compute_progress(project, snapshot=None): def count_questions(element, sets, conditions): counts = defaultdict(int) - # obtain the maximum number of set-distinct values for the questions in this page or - # question set this number is how often each question is displayed and we will use - # this number to determine how often a question needs to be counted - if isinstance(element, (Page, QuestionSet)) and element.is_collection: - counted_sets = set() + if isinstance(element, (Page, QuestionSet)): + # obtain the maximum number of set-distinct values for the questions in this page or + # question set. this number is how often each question is displayed and we will use + # this number to determine how often a question needs to be counted + element_sets = set() # count the sets for the id attribute of the page or question if element.attribute is not None: - # nested loop over the separate set_index lists in sets[element.attribute.id] - for set_index in chain.from_iterable(sets[element.attribute.id].values()): - counted_sets.add(set_index) + # nested loop over the separate set_prefix, set_index lists in sets[element.attribute.id] + for set_prefix, set_index in sets[element.attribute.id]: + element_sets.add((set_prefix, set_index)) # count the sets for the questions in the page or question for child in element.elements: if isinstance(child, Question): if child.attribute is not None: - # nested loop over the separate set_index lists in sets[element.attribute.id] - for set_index in chain.from_iterable(sets[child.attribute.id].values()): - counted_sets.add(set_index) + # nested loop over the separate set_prefix, set_index lists in sets[element.attribute.id] + for set_prefix, set_index in sets[child.attribute.id]: + element_sets.add((set_prefix, set_index)) - set_count = len(counted_sets) - else: - counted_sets = set() - set_count = 1 + # look for the elements conditions + element_conditions = {condition.id for condition in element.conditions.all()} + + # if this element has conditions: check if those conditions resolve for the found sets + if element_conditions: + # compute the intersection of the conditions of this child with the full set of conditions + element_condition_intersection = { + (set_prefix, set_index) + for condition_id, condition in conditions.items() + for set_prefix, set_index in conditions[condition_id] + if condition_id in element_conditions + } + + resolved_sets = element_sets.intersection(element_condition_intersection) + if resolved_sets: + set_count = len(resolved_sets) if element.is_collection else 1 + else: + # return immediately and do not consider children, this page/questionset is hidden + return counts + else: + set_count = len(element_sets) if element.is_collection else 1 # loop over all children of this element for child in element.elements: - # look for the elements conditions + # look for the child element's conditions if isinstance(child, (Page, QuestionSet, Question)): child_conditions = {condition.id for condition in child.conditions.all()} else: child_conditions = set() - # compute the intersection of the condition of this child with the full set of conditions - condition_intersection = child_conditions.intersection(conditions) + # compute the intersection of the conditions of this child with the full set of conditions + child_condition_intersection = { + (set_prefix, set_index) + for condition_id, condition in conditions.items() + for set_prefix, set_index in conditions[condition_id] + if condition_id in child_conditions + } # check if the element either has no condition or its conditions intersect with the full set of conditions - if not child_conditions or condition_intersection: + if not child_conditions or child_condition_intersection: if isinstance(child, Question): # for regular questions add the set_count to the counts dict, since the # question should be answered in every set @@ -186,17 +207,13 @@ def count_questions(element, sets, conditions): # use the max function, since the same attribute could appear twice in the tree if child.attribute is not None: if child.is_optional: - child_count = sum(len(set_indexes) for set_indexes in sets[child.attribute.id].values()) + child_count = len(sets[child.attribute.id]) counts[child.attribute.id] = max(counts[child.attribute.id], child_count) else: - if condition_intersection: - # update the set_count for the current child element + if child_condition_intersection: + # update the set_count for the current question (child element) # count only the sets that have conditions resolved to true - resolved_set_indexes = set() - for condition in condition_intersection: - resolved_set_indexes.update(conditions[condition]) - - current_count = len(counted_sets & resolved_set_indexes) + current_count = len(element_sets.intersection(child_condition_intersection)) else: current_count = set_count diff --git a/rdmo/projects/tests/test_navigation.py b/rdmo/projects/tests/test_navigation.py index 0af6e30f10..6128a02e7f 100644 --- a/rdmo/projects/tests/test_navigation.py +++ b/rdmo/projects/tests/test_navigation.py @@ -24,27 +24,27 @@ 'http://example.com/terms/questions/catalog/set/individual-collection': (9, 10, True), 'http://example.com/terms/questions/catalog/set/collection-single': (14, 18, True), 'http://example.com/terms/questions/catalog/set/collection-collection': (16, 20, True), - 'http://example.com/terms/questions/catalog/conditions': (15, 23), + 'http://example.com/terms/questions/catalog/conditions': (7, 15), 'http://example.com/terms/questions/catalog/conditions/input': (2, 2, True), 'http://example.com/terms/questions/catalog/conditions/text_contains': (1, 1, True), - 'http://example.com/terms/questions/catalog/conditions/text_empty': (1, 1, False), + 'http://example.com/terms/questions/catalog/conditions/text_empty': (0, 0, False), 'http://example.com/terms/questions/catalog/conditions/text_equal': (1, 1, True), - 'http://example.com/terms/questions/catalog/conditions/text_greater_than': (1, 1, False), - 'http://example.com/terms/questions/catalog/conditions/text_greater_than_equal': (1, 1, False), - 'http://example.com/terms/questions/catalog/conditions/text_lesser_than': (1, 1, False), - 'http://example.com/terms/questions/catalog/conditions/text_lesser_than_equal': (1, 1, False), + 'http://example.com/terms/questions/catalog/conditions/text_greater_than': (0, 0, False), + 'http://example.com/terms/questions/catalog/conditions/text_greater_than_equal': (0, 0, False), + 'http://example.com/terms/questions/catalog/conditions/text_lesser_than': (0, 0, False), + 'http://example.com/terms/questions/catalog/conditions/text_lesser_than_equal': (0, 0, False), 'http://example.com/terms/questions/catalog/conditions/text_not_empty': (1, 1, True), - 'http://example.com/terms/questions/catalog/conditions/text_not_equal': (1, 1, False), - 'http://example.com/terms/questions/catalog/conditions/option_empty': (1, 1, False), + 'http://example.com/terms/questions/catalog/conditions/text_not_equal': (0, 0, False), + 'http://example.com/terms/questions/catalog/conditions/option_empty': (0, 0, False), 'http://example.com/terms/questions/catalog/conditions/option_equal': (1, 1, True), 'http://example.com/terms/questions/catalog/conditions/option_not_empty': (1, 1, True), - 'http://example.com/terms/questions/catalog/conditions/option_not_equal': (1, 1, False), + 'http://example.com/terms/questions/catalog/conditions/option_not_equal': (0, 0, False), 'http://example.com/terms/questions/catalog/conditions/set': (0, 2, True), 'http://example.com/terms/questions/catalog/conditions/set_set': (0, 2, True), 'http://example.com/terms/questions/catalog/conditions/optionset': (0, 2, True), 'http://example.com/terms/questions/catalog/conditions/text_set': (0, 2, True), - 'http://example.com/terms/questions/catalog/blocks': (9, 12), - 'http://example.com/terms/questions/catalog/blocks/set': (9, 12, True), + 'http://example.com/terms/questions/catalog/blocks': (9, 18), + 'http://example.com/terms/questions/catalog/blocks/set': (9, 18, True), } diff --git a/rdmo/projects/tests/test_progress.py b/rdmo/projects/tests/test_progress.py index 2b653dc4cd..d153df624b 100644 --- a/rdmo/projects/tests/test_progress.py +++ b/rdmo/projects/tests/test_progress.py @@ -6,7 +6,7 @@ projects = [1, 11] results_map = { - 1: (58, 88), + 1: (58, 94), 11: (0, 36) } From aa2846a17b8f75e7ef13bce74c5b7fdbd010c1cc Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 7 Aug 2024 22:35:54 +0200 Subject: [PATCH 6/7] Fix tests --- rdmo/projects/tests/test_viewset_project_progress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdmo/projects/tests/test_viewset_project_progress.py b/rdmo/projects/tests/test_viewset_project_progress.py index 45f900d68f..5d7529261f 100644 --- a/rdmo/projects/tests/test_viewset_project_progress.py +++ b/rdmo/projects/tests/test_viewset_project_progress.py @@ -119,7 +119,7 @@ def test_progress_post_unchanged(db, client): project = Project.objects.get(id=1) project.progress_count = progress_count = 58 # the progress in the fixture is not up-to-date - project.progress_total = progress_total = 88 + project.progress_total = progress_total = 94 project.save() project.refresh_from_db() project_updated = project.updated From 514a43f8a4305a14ebbcdbf02462320f37ce808f Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 8 Aug 2024 09:44:21 +0200 Subject: [PATCH 7/7] refactor(progress): simplify count of counts definition Signed-off-by: David Wallace --- rdmo/projects/progress.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdmo/projects/progress.py b/rdmo/projects/progress.py index 2e5bcb0d8d..efe7421369 100644 --- a/rdmo/projects/progress.py +++ b/rdmo/projects/progress.py @@ -92,7 +92,7 @@ def compute_navigation(section, project, snapshot=None): counts = count_questions(page, sets, conditions) # filter the values_list for the attributes, and compute the total sum of counts - count = len(tuple(filter(lambda value: value[0] in counts.keys(), values_list))) + count = sum(1 for value in values_list if value[0] in counts) total = sum(counts.values()) navigation_section['count'] += count @@ -130,7 +130,7 @@ def compute_progress(project, snapshot=None): counts = count_questions(project.catalog, sets, conditions) # filter the values_list for the attributes, and compute the total sum of counts - count = len(tuple(filter(lambda value: value[0] in counts.keys(), values_list))) + count = sum(1 for value in values_list if value[0] in counts) total = sum(counts.values()) return count, total