Skip to content

Commit

Permalink
[FEATURE
Browse files Browse the repository at this point in the history
] Keyword evaluation behavior toggleable
  • Loading branch information
jmbannon committed Nov 26, 2024
1 parent 8c3f852 commit 53b2474
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,13 @@ contains
:description:
Returns True if ``contains`` is in ``string``. False otherwise.

contains_all
~~~~~~~~~~~~
:spec: ``contains_all(string: String, contains_array: Array) -> Boolean``

:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.

contains_any
~~~~~~~~~~~~
:spec: ``contains_any(string: String, contains_array: Array) -> Boolean``
Expand Down
17 changes: 13 additions & 4 deletions docs/source/prebuilt_presets/helpers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ Filter Keywords

``Filter Keywords`` can include or exclude media with any of the listed keywords. Both keywords and title/description are lower-cased before filtering.

Default behavior for Keyword evaluation is ANY, meaning the filter will succeed if any of the keywords are present. This can be set to ANY or ALL using the respective ``_eval`` variable.

Supports the following override variables:

* ``title_include_keywords``
* ``title_exclude_keywords``
* ``description_include_keywords``
* ``description_exclude_keywords``
* ``title_include_keywords``, ``title_include_eval``
* ``title_exclude_keywords``, ``title_exclude_eval``
* ``description_include_keywords``, ``title_exclude_eval``
* ``description_exclude_keywords``, ``title_exclude_eval``

.. tip::

Expand All @@ -61,6 +63,13 @@ Supports the following override variables:
title_include_keywords:
- "To Catch a Smuggler"
= Sports:
"~Maple Leafs Highlights":
url: "https://www.youtube.com/@NHL"
title_include_eval: "ALL"
title_include_keywords:
- "maple leafs"
- "highlights"
Chunk Downloads
---------------
Expand Down
34 changes: 27 additions & 7 deletions src/ytdl_sub/prebuilt_presets/helpers/filtering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ presets:
description_include_keywords: "{ [] }"
description_exclude_keywords: "{ [] }"

title_include_eval: "ANY"
title_exclude_eval: "ANY"
description_include_eval: "ANY"
description_exclude_eval: "ANY"

"%ensure_string": >-
{
%assert_then(
Expand All @@ -32,21 +37,36 @@ presets:
)
}
# $0 - var to evaluate
# $1 - keyword list
# $2 - eval type
"%contains_keywords_inner": >-
{
%elif(
%eq(%ensure_string($2), 'any'),
%contains_any( $0, $1 ),
%eq(%ensure_string($2), 'all'),
%contains_all( $0, $1 ),
%throw('Keyword eval must be either ANY or ALL')
)
}
# $0 - var to evaluate
# $1 - keyword list
# $2 - variable name for error messages
# $3 - default return if keyword list is empty
# $3 - keyword eval
# $4 - default return if keyword list is empty
"%contains_keywords": >-
{
%if(
%bool( $1 ),
%contains_any( %lower($0), %ensure_lower_array($1, $2) ),
$3
%contains_keywords_inner( %lower($0), %ensure_lower_array($1, $2), $3 ),
$4
)
}
filter_exclude:
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords',false) }"
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', title_include_eval, true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', description_include_eval, true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', title_exclude_eval, false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords', description_exclude_eval, false) }"
14 changes: 14 additions & 0 deletions src/ytdl_sub/script/functions/string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ def contains_any(string: String, contains_array: Array) -> Boolean:
)
)

@staticmethod
def contains_all(string: String, contains_array: Array) -> Boolean:
"""
:description:
Returns true if all elements in ``contains_array`` are in ``string``. False otherwise.
"""
return Boolean(
all(
str(val) in string.value
for val in contains_array.value
if isinstance(val, (String, Integer, Boolean, Float))
)
)

@staticmethod
def slice(string: String, start: Integer, end: Optional[Integer] = None) -> String:
"""
Expand Down
102 changes: 99 additions & 3 deletions tests/integration/prebuilt_presets/test_filter_keywords.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import re

import pytest
from expected_transaction_log import assert_transaction_log_matches

from ytdl_sub.script.utils.exceptions import UserThrownRuntimeError
from ytdl_sub.subscriptions.subscription import Subscription
from ytdl_sub.utils.exceptions import ValidationException


@pytest.fixture
Expand Down Expand Up @@ -77,6 +74,39 @@ def test_title(
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_title_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"title_{filter_mode}_eval"] = "all"
filter_subscription_dict["overrides"][f"title_{filter_mode}_keywords"] = [
"MOCK",
"ENTRY",
"20-3",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description(
self,
Expand Down Expand Up @@ -108,6 +138,38 @@ def test_description(
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description_all(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"description_{filter_mode}_eval"] = "ALL"
filter_subscription_dict["overrides"][f"description_{filter_mode}_keywords"] = [
"descr",
"iption",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize(
"keyword_variable",
[
Expand Down Expand Up @@ -169,3 +231,37 @@ def test_error_not_string_keyword(
pytest.raises(UserThrownRuntimeError, match="filter keywords must be strings"),
):
_ = subscription.download(dry_run=True)

@pytest.mark.parametrize(
"keyword_variable",
[
"title_include",
"title_exclude",
"description_include",
"description_exclude",
],
)
def test_error_not_correct_eval(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
keyword_variable,
):
filter_subscription_dict["overrides"][f"{keyword_variable}_keywords"] = ["hmm"]
filter_subscription_dict["overrides"][f"{keyword_variable}_eval"] = "LOL"
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with (
mock_download_collection_entries(is_youtube_channel=False, num_urls=1, is_dry_run=True),
pytest.raises(
UserThrownRuntimeError, match="Keyword eval must be either ANY or ALL"
),
):
_ = subscription.download(dry_run=True)
8 changes: 8 additions & 0 deletions tests/unit/script/functions/test_string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ def test_contains_any(self, value, expected_output):
output = single_variable_output(f"{{%contains_any('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"value, expected_output",
[("['a', 'b', 'c']", False), ("['nope', [], {}]", False), ("['a', 'dog']", True)],
)
def test_contains_all(self, value, expected_output):
output = single_variable_output(f"{{%contains_all('a brown dog', {value})}}")
assert output == expected_output

@pytest.mark.parametrize(
"input_string, split, max_split, expected_output",
[
Expand Down

0 comments on commit 53b2474

Please sign in to comment.