From 626fff46074d4de761b7d9140c9685e3ed809e2c Mon Sep 17 00:00:00 2001 From: Kevin Klein <7267523+kklein@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:09:34 +0200 Subject: [PATCH] Add example for usage of `optuna` (#149) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> --- docs/examples/example_optuna.rst | 396 +++++++++++++++++++++++++++++++ docs/examples/index.rst | 1 + 2 files changed, 397 insertions(+) create mode 100644 docs/examples/example_optuna.rst diff --git a/docs/examples/example_optuna.rst b/docs/examples/example_optuna.rst new file mode 100644 index 00000000..9ec532e3 --- /dev/null +++ b/docs/examples/example_optuna.rst @@ -0,0 +1,396 @@ +.. _example-optuna : + +========================================= + Example: Model selection with optuna +========================================= + +Motivation +---------- + +We know that model selection and/or hyperparameter optimization (HPO) can +have massive impacts on the prediction quality in regular Machine +Learning. Yet, it seems that model selection and hyperparameter +optimization are of substantial importance for CATE estimation with +MetaLearners, too, see e.g. `Machlanski et. al `_. + +However, model selection and HPO for MetaLearners look quite different from what we're used to from e.g. simple supervised learning problems. Concretely, + +* In terms of a MetaLearners's option space, there are several levels + to optimize for: + + 1. The MetaLearner architecture, e.g. R-Learner vs DR-Learner + 2. The model to choose per base estimator of said MetaLearner architecture, e.g. ``LogisticRegression`` vs ``LGBMClassifier`` + 3. The model hyperparameters per base model + +* On a conceptual level, it's not clear how to measure model quality + for MetaLearners. As a proxy for the underlying quantity of + interest one might look into base model performance, the R-Loss of + the CATE estimates or some more elaborate approaches alluded to by + `Machlanski et. al `_. + +We think that HPO can be divided into two camps: + +* Exploration of (hyperparameter, metric evaluation) pairs where the + pairs do not influence each other (e.g. grid search, random search) + +* Exploration of (hyperparameter, metric evaluation) pairs where the + pairs do influence each other (e.g. Bayesian optimization, + evolutionary algorithms); in other words, there is a feedback-loop between + sample result and sample + +In this example, we will illustrate the latter camp based on an +application of `optuna `_ -- a +popular framework for HPO -- in interplay with ``metalearners``. + +Installation +------------ + +In order to use ``optuna``, we first need to install the package. +We can do so either via conda and conda-forge + +.. code-block:: console + + $ conda install optuna -c conda-forge + +or via pip and PyPI + +.. code-block:: console + + $ pip install optuna + +Usage +----- + +Loading the data +"""""""""""""""" + +Just like in our :ref:`example on estimating CATEs with a MetaLearner +`, we will first load some experiment data: + +.. code-block:: python + + import pandas as pd + from pathlib import Path + from git_root import git_root + + df = pd.read_csv(git_root("data/learning_mindset.zip")) + outcome_column = "achievement_score" + treatment_column = "intervention" + feature_columns = [ + column + for column in df.columns + if column not in [outcome_column, treatment_column] + ] + categorical_feature_columns = [ + "ethnicity", + "gender", + "frst_in_family", # spellchecker:disable-line + "school_urbanicity", + "schoolid", + ] + # Note that explicitly setting the dtype of these features to category + # allows both lightgbm as well as shap plots to + # 1. Operate on features which are not of type int, bool or float + # 2. Correctly interpret categoricals with int values to be + # interpreted as categoricals, as compared to ordinals/numericals. + for categorical_feature_column in categorical_feature_columns: + df[categorical_feature_column] = df[categorical_feature_column].astype( + "category" + ) + +Now that we've loaded the experiment data, we can split it up into +train and validation data: + +.. code-block:: python + + from sklearn.model_selection import train_test_split + + X_train, X_validation, y_train, y_validation, w_train, w_validation = train_test_split( + df[feature_columns], df[outcome_column], df[treatment_column], test_size=0.25 + ) + + +Optimizing base model hyperparameters +""""""""""""""""""""""""""""""""""""" + +Let's say that we want to work with an +:class:`~metalearners.rlearner.RLearner` and ``LightGBM`` estimators +for base models. We will seek two optimize three hyperparameters of +our base models: + +* The number of estimators ``n_estimators`` of our outcome model. +* The max depth ``max_depth`` of our outcome model. +* The number of estimators ``n_estimators`` of our treatment effect + model. + +We can mold this ambition into the following simple script creating an +``optuna`` ``study``: + +.. code-block:: python + + import optuna + from metalearners.rlearner import r_loss + from metalearners.utils import simplify_output + from metalearners import RLearner + from lightgbm import LGBMRegressor, LGBMClassifier + + def objective(trial): + + n_estimators_nuisance = trial.suggest_int("n_estimators_nuisance", 5, 250) + max_depth_nuisance = trial.suggest_int("max_depth_nuisance", 3, 30) + n_estimators_treatment = trial.suggest_int("n_estimators_treatment", 5, 100) + + rlearner = RLearner( + nuisance_model_factory=LGBMRegressor, + nuisance_model_params={"n_estimators": n_estimators_nuisance, "max_depth": max_depth_nuisance, "verbosity": -1}, + propensity_model_factory=LGBMClassifier, + propensity_model_params={"n_estimators": 5, "verbosity": -1}, + treatment_model_factory=LGBMRegressor, + treatment_model_params={"n_estimators": n_estimators_treatment, "verbosity": -1}, + is_classification=False, + n_variants=2, + ) + + rlearner.fit(X=X_train, y=y_train, w=w_train) + + return rlearner.evaluate( + X=X_validation, + y=y_validation, + w=w_validation, + is_oos=True, + )["r_loss_1_vs_0"] + + study = optuna.create_study(direction='minimize') + study.optimize(objective, n_trials=100) + +Note that the metric to be optimized is the R-Loss here. We can obtain +it -- among other metrics -- via the +:class:`~metalearners.rlearner.RLearner`'s +:meth:`~metalearners.rlearner.RLearner.evaluate` method. We can see it +evolve as follows across the 100 trials: + +.. code-block:: console + + [I 2024-06-03 19:20:13,029] A new study created in memory with name: no-name-342fd485-82a8-403b-9201-c98d01274024 + [I 2024-06-03 19:20:18,481] Trial 0 finished with value: 0.8058146296845027 and parameters: {'n_estimators_nuisance': 78, 'max_depth_nuisance': 7, 'n_estimators_treatment': 28}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:20:25,886] Trial 1 finished with value: 0.8158949566481672 and parameters: {'n_estimators_nuisance': 110, 'max_depth_nuisance': 5, 'n_estimators_treatment': 90}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:20:31,482] Trial 2 finished with value: 0.8059918319487219 and parameters: {'n_estimators_nuisance': 108, 'max_depth_nuisance': 24, 'n_estimators_treatment': 10}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:20:37,337] Trial 3 finished with value: 0.8202956725566805 and parameters: {'n_estimators_nuisance': 16, 'max_depth_nuisance': 10, 'n_estimators_treatment': 99}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:20:47,279] Trial 4 finished with value: 0.8289376079649482 and parameters: {'n_estimators_nuisance': 168, 'max_depth_nuisance': 16, 'n_estimators_treatment': 58}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:20:58,013] Trial 5 finished with value: 0.8301790091587447 and parameters: {'n_estimators_nuisance': 194, 'max_depth_nuisance': 26, 'n_estimators_treatment': 64}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:21:09,459] Trial 6 finished with value: 0.8295207764791498 and parameters: {'n_estimators_nuisance': 239, 'max_depth_nuisance': 27, 'n_estimators_treatment': 39}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:21:17,956] Trial 7 finished with value: 0.81843425756809 and parameters: {'n_estimators_nuisance': 195, 'max_depth_nuisance': 4, 'n_estimators_treatment': 93}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:21:28,087] Trial 8 finished with value: 0.8281067745873746 and parameters: {'n_estimators_nuisance': 143, 'max_depth_nuisance': 23, 'n_estimators_treatment': 100}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:21:36,216] Trial 9 finished with value: 0.8174537107742403 and parameters: {'n_estimators_nuisance': 82, 'max_depth_nuisance': 15, 'n_estimators_treatment': 87}. Best is trial 0 with value: 0.8058146296845027. + [I 2024-06-03 19:21:39,475] Trial 10 finished with value: 0.7953483370042609 and parameters: {'n_estimators_nuisance': 37, 'max_depth_nuisance': 10, 'n_estimators_treatment': 14}. Best is trial 10 with value: 0.7953483370042609. + [I 2024-06-03 19:21:42,757] Trial 11 finished with value: 0.7952682807538822 and parameters: {'n_estimators_nuisance': 37, 'max_depth_nuisance': 10, 'n_estimators_treatment': 13}. Best is trial 11 with value: 0.7952682807538822. + [I 2024-06-03 19:21:44,104] Trial 12 finished with value: 0.8249424971850418 and parameters: {'n_estimators_nuisance': 9, 'max_depth_nuisance': 11, 'n_estimators_treatment': 5}. Best is trial 11 with value: 0.7952682807538822. + [I 2024-06-03 19:21:48,302] Trial 13 finished with value: 0.7984298284740843 and parameters: {'n_estimators_nuisance': 38, 'max_depth_nuisance': 12, 'n_estimators_treatment': 26}. Best is trial 11 with value: 0.7952682807538822. + ... + [I 2024-06-03 19:27:47,641] Trial 97 finished with value: 0.7884468426483208 and parameters: {'n_estimators_nuisance': 135, 'max_depth_nuisance': 3, 'n_estimators_treatment': 12}. Best is trial 73 with value: 0.7839636658643124. + [I 2024-06-03 19:27:50,443] Trial 98 finished with value: 0.7880500972663745 and parameters: {'n_estimators_nuisance': 133, 'max_depth_nuisance': 3, 'n_estimators_treatment': 12}. Best is trial 73 with value: 0.7839636658643124. + [I 2024-06-03 19:27:53,078] Trial 99 finished with value: 0.7869700632283656 and parameters: {'n_estimators_nuisance': 139, 'max_depth_nuisance': 3, 'n_estimators_treatment': 8}. Best is trial 73 with value: 0.7839636658643124. + + +Alternatively, if we'd like to optimize a base model in light of its +individual metric -- in this case an RMSE on the observed outcomes for an +the outcome model -- we can easily do that, too: + +.. code-block :: python + + from sklearn.metrics import root_mean_squared_error + + def objective_individual(trial): + + n_estimators_nuisance = trial.suggest_int("n_estimators_nuisance", 5, 250) + max_depth_nuisance = trial.suggest_int("max_depth_nuisance", 3, 30) + + rlearner = RLearner( + nuisance_model_factory=LGBMRegressor, + nuisance_model_params={"n_estimators": n_estimators_nuisance, "max_depth": max_depth_nuisance, "verbosity": -1}, + propensity_model_factory=LGBMClassifier, + treatment_model_factory=LGBMRegressor, + is_classification=False, + n_variants=2, + ) + + rlearner.fit_nuisance(X=X_train, y=y_train, model_kind="outcome_model", model_ord=0) + + outcome_predictions = rlearner.predict_nuisance(X=X_validation, model_kind="outcome_model", model_ord=0, is_oos=True) + + return root_mean_squared_error(y_validation, outcome_predictions) + + study_individual = optuna.create_study(direction='minimize') + study_individual.optimize(objective_individual, n_trials=100) + + +Leading to the following output + +.. code-block:: console + + [I 2024-06-03 19:35:14,137] A new study created in memory with name: no-name-94241529-da38-41bd-a486-040308b1f023 + [I 2024-06-03 19:35:18,853] Trial 0 finished with value: 0.8203088574399058 and parameters: {'n_estimators_nuisance': 96, 'max_depth_nuisance': 28}. Best is trial 0 with value: 0.8203088574399058. + [I 2024-06-03 19:35:27,959] Trial 1 finished with value: 0.8383203265114683 and parameters: {'n_estimators_nuisance': 236, 'max_depth_nuisance': 13}. Best is trial 0 with value: 0.8203088574399058. + [I 2024-06-03 19:35:32,507] Trial 2 finished with value: 0.821097910375138 and parameters: {'n_estimators_nuisance': 100, 'max_depth_nuisance': 19}. Best is trial 0 with value: 0.8203088574399058. + [I 2024-06-03 19:35:35,020] Trial 3 finished with value: 0.8069517664852458 and parameters: {'n_estimators_nuisance': 227, 'max_depth_nuisance': 3}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:35:37,044] Trial 4 finished with value: 0.8108314861878544 and parameters: {'n_estimators_nuisance': 36, 'max_depth_nuisance': 7}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:35:45,037] Trial 5 finished with value: 0.8324009294214451 and parameters: {'n_estimators_nuisance': 189, 'max_depth_nuisance': 23}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:35:51,032] Trial 6 finished with value: 0.8255894318735717 and parameters: {'n_estimators_nuisance': 134, 'max_depth_nuisance': 23}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:35:57,481] Trial 7 finished with value: 0.8295098178376358 and parameters: {'n_estimators_nuisance': 160, 'max_depth_nuisance': 26}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:04,078] Trial 8 finished with value: 0.8301142921842086 and parameters: {'n_estimators_nuisance': 165, 'max_depth_nuisance': 19}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:12,468] Trial 9 finished with value: 0.8353268112420604 and parameters: {'n_estimators_nuisance': 213, 'max_depth_nuisance': 23}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:12,604] Trial 10 finished with value: 0.8941068693906029 and parameters: {'n_estimators_nuisance': 5, 'max_depth_nuisance': 3}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:12,968] Trial 11 finished with value: 0.8195432140897054 and parameters: {'n_estimators_nuisance': 21, 'max_depth_nuisance': 3}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:15,676] Trial 12 finished with value: 0.8120941736598022 and parameters: {'n_estimators_nuisance': 51, 'max_depth_nuisance': 9}. Best is trial 3 with value: 0.8069517664852458. + [I 2024-06-03 19:36:19,044] Trial 13 finished with value: 0.8157459078823713 and parameters: {'n_estimators_nuisance': 64, 'max_depth_nuisance': 8}. Best is trial 3 with value: 0.8069517664852458. + ... + [I 2024-06-03 19:40:05,394] Trial 97 finished with value: 0.8121024415590181 and parameters: {'n_estimators_nuisance': 55, 'max_depth_nuisance': 6}. Best is trial 25 with value: 0.8022308112279057. + [I 2024-06-03 19:40:06,124] Trial 98 finished with value: 0.8039230485915543 and parameters: {'n_estimators_nuisance': 47, 'max_depth_nuisance': 3}. Best is trial 25 with value: 0.8022308112279057. + [I 2024-06-03 19:40:07,671] Trial 99 finished with value: 0.8056511905287118 and parameters: {'n_estimators_nuisance': 67, 'max_depth_nuisance': 4}. Best is trial 25 with value: 0.8022308112279057. + + +Optimizing over architectures +""""""""""""""""""""""""""""" + +``optuna``'s flexibility allows for not only the search over classical +hyperparameters of a given estimator but also to iterate over the +choice of base estimator architectures. Pushing it a step further, one +can even optimize over the space of MetaLearner architectures. + +In the following example we will attempt to optimize over the +following search space: + +1. MetaLearner: R-Learner vs DR-Learner +2. Nuisance model: ``LGBMRegressor`` vs ``Ridge`` +3. Hyperparameter: ``n_estimators`` if ``LGBMRegressor`` and ``alpha`` + if ``Ridge`` + +Note that the choice of the base learner in the second step should be +conditioned on the choice in the first step. In other words, we do not +want to update our belief system on outcome learners for the R-Learner +by observing outcome learner for the DR-Learner. The same idea applies +to the interplay between steps two and three. This conditioning +becomes apparent in the source code below via the underscores, +e.g. ``nuisance_r``, which is only sampled (and thereby updated) if we +are using an R-Learner and and ``nuisance_dr``, which is only sampled +(and thereby updated) if we are using a DR-Learner. + +.. code-block:: python + + import optuna + from metalearners.utils import metalearner_factory, simplify_output + from sklearn.linear_model import Ridge + from metalearners.rlearner import r_loss + + + # Arbitrary models for R-Loss + outcome_estimates = LGBMRegressor().fit(X_train, y_train).predict(X_validation) + propensity_scores = LGBMClassifier().fit(X_train, w_train).predict(X_validation) + + def objective_overall(trial): + + ### SAMPLING + + # Highest level of granularity: we sample the MetaLearner architecture. + architecture = trial.suggest_categorical('architecture', ['R', 'DR']) + + # We distinguish cases because we do not want a DR-Learner run to influence + # the optimizing process of R-Learner-related parameters. + if architecture == 'R': + + # Second level of granularity: we sample the nuisance base model. + # Note that this is conditioned on using the R-Learner. + nuisance_r = trial.suggest_categorical('nuisance_r', ['LGBMRegressor', 'Ridge']) + nuisance_dr = None + nuisance_dr_log_reg_alpha = None + nuisance_dr_lgbm_n_estimators = None + + if nuisance_r == "LGBMRegressor": + + # Lowest level of granularity: we sample the nuisance base model hyperparameters. + nuisance_r_lgbm_n_estimators = trial.suggest_int('nuisance_r_lgbm_n_estimators', 5, 250) + nuisance_r_lin_reg_alpha = None + + nuisance_params = {"n_estimators": nuisance_r_lgbm_n_estimators, "verbose": -1} + else: + nuisance_r_lin_reg_alpha = trial.suggest_float('nuisance_r_lin_reg_alpha', 0, 10) + nuisance_r_lgbm_n_estimators = None + + nuisance_params = {"alpha": nuisance_r_lin_reg_alpha} + + else: + nuisance_dr = trial.suggest_categorical('nuisance_dr', ['LGBMRegressor', 'Ridge']) + nuisance_r = None + nuisance_r_lin_reg_alpha = None + nuisance_r_lgbm_n_estimators = None + + if nuisance_dr == "LGBMRegressor": + + # Lowest level of granularity: we sample the nuisance base model hyperparameters. + nuisance_dr_lgbm_n_estimators = trial.suggest_int('nuisance_dr_lgbm_n_estimators', 5, 250) + nuisance_dr_lin_reg_alpha = None + + nuisance_params = {"n_estimators": nuisance_dr_lgbm_n_estimators, "verbose": -1} + + else: + nuisance_dr_lin_reg_alpha = trial.suggest_float('nuisance_dr_lin_reg_alpha', 0, 10) + nuisance_dr_lgbm_n_estimators = None + + nuisance_params = {"alpha": nuisance_dr_lin_reg_alpha} + + ### LEARNING + + _metalearner_factory = metalearner_factory(architecture) + # We know that only one of them is not None, therefore we can use or. + nuisance_model_type = nuisance_r or nuisance_dr + metalearner = _metalearner_factory( + nuisance_model_factory=LGBMRegressor if nuisance_model_type == "LGBMRegressor" else Ridge, + nuisance_model_params=nuisance_params, + propensity_model_factory=LGBMClassifier, + propensity_model_params={"n_estimators": 5, "max_depth": 5, "verbose": -1}, + treatment_model_factory=LGBMRegressor, + treatment_model_params={"n_estimators": 5, "max_depth": 5, "verbose": -1}, + is_classification=False, + n_variants=2, + ) + metalearner.fit(X_train, y_train, w_train) + + ### EVALUATING + + cate_estimates = simplify_output(metalearner.predict(X_validation, is_oos=True)) + + return r_loss( + cate_estimates=cate_estimates, + outcome_estimates=outcome_estimates, + propensity_scores=propensity_scores, + outcomes=y_validation, + treatments=w_validation, + ) + + study_overall = optuna.create_study(direction='minimize') + study_overall.optimize(objective_overall, n_trials=100) + +Leading to the following output + +.. code-block:: console + + [I 2024-06-03 18:58:24,270] A new study created in memory with name: no-name-1bd16fa2-dee8-4d20-9505-1e272af0f8f9 + [I 2024-06-03 18:58:24,919] Trial 0 finished with value: 0.8147798597769813 and parameters: {'architecture': 'DR', 'nuisance_dr': 'Ridge', 'nuisance_dr_lin_reg_alpha': 2.2902267246714603}. Best is trial 0 with value: 0.8147798597769813. + [I 2024-06-03 18:58:25,526] Trial 1 finished with value: 0.8133275607687053 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 4.637862092794302}. Best is trial 1 with value: 0.8133275607687053. + [I 2024-06-03 18:58:35,532] Trial 2 finished with value: 0.8147846754978962 and parameters: {'architecture': 'R', 'nuisance_r': 'LGBMRegressor', 'nuisance_r_lgbm_n_estimators': 225}. Best is trial 1 with value: 0.8133275607687053. + [I 2024-06-03 18:58:53,424] Trial 3 finished with value: 0.8142647285796442 and parameters: {'architecture': 'DR', 'nuisance_dr': 'LGBMRegressor', 'nuisance_dr_lgbm_n_estimators': 217}. Best is trial 1 with value: 0.8133275607687053. + [I 2024-06-03 18:58:54,052] Trial 4 finished with value: 0.8132955713927359 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 9.719207974019277}. Best is trial 4 with value: 0.8132955713927359. + [I 2024-06-03 18:58:54,696] Trial 5 finished with value: 0.8133872323810601 and parameters: {'architecture': 'DR', 'nuisance_dr': 'Ridge', 'nuisance_dr_lin_reg_alpha': 2.9620885067837213}. Best is trial 4 with value: 0.8132955713927359. + [I 2024-06-03 18:58:55,317] Trial 6 finished with value: 0.8131459701081047 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 7.8677812175017605}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:58:55,966] Trial 7 finished with value: 0.8143004222591679 and parameters: {'architecture': 'DR', 'nuisance_dr': 'Ridge', 'nuisance_dr_lin_reg_alpha': 8.706319710803129}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:58:56,596] Trial 8 finished with value: 0.814613404338131 and parameters: {'architecture': 'DR', 'nuisance_dr': 'Ridge', 'nuisance_dr_lin_reg_alpha': 0.33666864458317125}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:59:12,479] Trial 9 finished with value: 0.8141717774602024 and parameters: {'architecture': 'DR', 'nuisance_dr': 'LGBMRegressor', 'nuisance_dr_lgbm_n_estimators': 178}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:59:13,099] Trial 10 finished with value: 0.8139938540534846 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 9.451770473441274}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:59:13,710] Trial 11 finished with value: 0.8131742628076631 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 9.829822261109065}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:59:14,335] Trial 12 finished with value: 0.8132605930727189 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 7.76020864716155}. Best is trial 6 with value: 0.8131459701081047. + [I 2024-06-03 18:59:14,958] Trial 13 finished with value: 0.8130793186549206 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 0.5322693901044904}. Best is trial 13 with value: 0.8130793186549206. + + ... + [I 2024-06-03 19:01:16,545] Trial 97 finished with value: 0.8130319461209982 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 2.591358482723367}. Best is trial 69 with value: 0.8124600668314831. + [I 2024-06-03 19:01:20,357] Trial 98 finished with value: 0.8144984717668121 and parameters: {'architecture': 'R', 'nuisance_r': 'LGBMRegressor', 'nuisance_r_lgbm_n_estimators': 64}. Best is trial 69 with value: 0.8124600668314831. + [I 2024-06-03 19:01:20,977] Trial 99 finished with value: 0.8125431242342713 and parameters: {'architecture': 'R', 'nuisance_r': 'Ridge', 'nuisance_r_lin_reg_alpha': 8.490782741739888}. Best is trial 69 with value: 0.8124600668314831. diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 7173da8d..4bdb1403 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -9,4 +9,5 @@ Examples Reusing base models Explainability: Lime plots of MetaLearners Explainability: Feature importance and SHAP values + Model selection with optuna Generating data