Skip to content

Commit

Permalink
add integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ablaom committed Jan 12, 2024
1 parent 0f49f1f commit 0eb8c52
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
arch:
- x64
steps:
- name: Set integration test flag
run: |
julia -e 'ENV["MLJ_TEST_INTEGRATION"]="true"'
if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }})
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ MLJEnsembles = "0.4"
MLJFlow = "0.3"
MLJIteration = "0.6"
MLJModels = "0.16"
MLJTestIntegration = "0.5.0"
MLJTuning = "0.8"
OpenML = "0.2,0.3"
Pkg = "<0.0.1, 1"
Expand Down
214 changes: 214 additions & 0 deletions test/integration.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
using MLJTestIntegration, MLJModels, MLJ, Test, Markdown
import MLJTestIntegration as MTI
import Pkg.TOML as TOML

const JULIA_TEST_LEVEL = 1
const OTHER_TEST_LEVEL = 1


# # RECORD OF OUTSTANDING ISSUES

FILTER_GIVEN_ISSUE = Dict(
"https://github.com/JuliaAI/CatBoost.jl/pull/28 (waiting for 0.3.3 release)" =>
model -> model.name == "CatBoostRegressor",
"LOCIDetector too slow to train!" =>
model -> model.name == "LOCIDetector",
"https://github.com/JuliaML/LIBSVM.jl/issues/98" =>
model -> model.name == "LinearSVC" &&
model.package_name == "LIBSVM",
"https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl/issues/4" =>
model -> model.name == "CDDetector" &&
model.package_name == "OutlierDetectionPython",
"https://github.com/JuliaAI/CatBoost.jl/issues/22" =>
model -> model.name == "CatBoostClassifier",
"https://github.com/sylvaticus/BetaML.jl/issues/65" =>
model -> model.name in ["KMeans", "KMedoids"] &&
model.package_name == "BetaML",
"https://github.com/JuliaAI/MLJTSVDInterface.jl/pull/17" =>
model -> model.name == "TSVDTransformer",
"https://github.com/alan-turing-institute/MLJ.jl/issues/1074" =>
model -> model.name == "AutoEncoderMLJ",
"https://github.com/sylvaticus/BetaML.jl/issues/64" =>
model -> model.name =="GaussianMixtureClusterer" && model.package_name=="BetaML",
"https://github.com/rikhuijzer/SIRUS.jl/issues/78" =>
model -> model.package_name == "SIRUS",
"https://github.com/lalvim/PartialLeastSquaresRegressor.jl/issues/29 "*
"(still need release > 2.2.0)" =>
model -> model.package_name == "PartialLeastSquaresRegressor",
"MLJScikitLearnInterface - multiple issues, hangs tests, WIP" =>
model -> model.package_name == "MLJScikitLearnInterface",
)

# # LOG OUTSTANDING ISSUES TO STDOUT

const MODELS= models();
const JULIA_MODELS = filter(m->m.is_pure_julia, MODELS);
const OTHER_MODELS = setdiff(MODELS, JULIA_MODELS);

const EXCLUDED_BY_ISSUE = filter(MODELS) do model
any([p(model) for p in values(FILTER_GIVEN_ISSUE)])
end;

affected_packages = unique([m.package_name for m in EXCLUDED_BY_ISSUE])
n_excluded = length(EXCLUDED_BY_ISSUE)
report = """
# Integration Tests
Currently, $n_excluded models are excluded from integration tests because of outstanding
issues. When fixed, update `FILTER_GIVEN_ISSUE` in /test/integration.jl.
If an issue is related to model traits (aka metadata), then the MLJ Model Registry may
need to be updated to resolve the integration test failures. See the `MLJModels.@update`
document string for how to do that.
## Oustanding issues
""";
for issue in keys(FILTER_GIVEN_ISSUE)
global report *= "\n- $issue\n"
end;
report *= "\n## Affected packages\n"
for pkg in affected_packages
global report *= "\n- $pkg"
end;
report_md = Markdown.parse(report);

n_excluded > 0 && begin
show(stdout, MIME("text/plain"), report_md)
println()
println()
sleep(1)
end


# # FLAG MODELS THAT DON'T HAVE COMPATIBLE DATASETS FOR TESTING

# We use the version of `MLJTestIntegration.test` that infers appropriate datasets. The
# datasets provided by MLJTestIntegration.jl are not yet comprehensive, so we exclude
# models from testing when no compatible dataset can be found.
WITHOUT_DATASETS = filter(MODELS) do model
# multi-target datasets:
model.target_scitype <: Union{Table, AbstractMatrix} ||
# https://github.com/JuliaAI/MLJTestInterface.jl/issues/19
model.package_name == "MLJText" ||
# univariate transformers:
model.input_scitype <: AbstractVector ||
# image data:
model.input_scitype <: AbstractVector{<:Image} ||
# other data:
(model.name == "BernoulliNBClassifier" &&
model.package_name == "MLJScikitLearnInterface") ||
(model.name == "MultinomialNBClassifier" &&
model.package_name == "NaiveBayes") ||
(model.name == "OneRuleClassifier" &&
model.package_name == "OneRule") ||
(model.name == "ComplementNBClassifier" &&
model.package_name == "MLJScikitLearnInterface") ||
(model.name == "MultinomialNBClassifier" &&
model.package_name == "MLJScikitLearnInterface") ||
(model.name == "SMOTEN" &&
model.package_name == "Imbalance")
end;

# To remove any warning issued below, update `WITHOUT_DATASETS` defined above:
for model in WITHOUT_DATASETS
!isempty(MLJTestIntegration.datasets(model)) &&
@warn "The model `$(model.name)` from `$(model.package_name)` "*
"is currently excluded "*
"from integration tests even though a compatible dataset appears "*
"to be available now. "
end

# Additionally exclude some models for which the inferred datasets have a model-specific
# pathololgy that prevents a valid test:

PATHOLOGIES = filter(MODELS) do model
# in the subsampling occuring in stacking, we get a Cholesky
# factorization fail (`PosDefException`):
(model.name=="GaussianNBClassifier" && model.package_name=="NaiveBayes") ||
# https://github.com/JuliaStats/MultivariateStats.jl/issues/224
(model.name =="ICA" && model.package_name=="MultivariateStats") ||
# in tuned_pipe_evaluation C library gives "Incorrect parameter: specified nu is
# infeasible":
(model.name in ["NuSVC", "ProbabilisticNuSVC"] &&
model.package_name == "LIBSVM")
end

WITHOUT_DATASETS = vcat(WITHOUT_DATASETS, PATHOLOGIES)


# # CHECK PROJECT FILE INCLUDES ALL MODEL-PROVIDING PACKAGES

# helper; `project_lines` are lines from a Project.toml file:
function pkgs(project_lines)
project = TOML.parse(join(project_lines, "\n"))
headings = Set(keys(project)) ["deps", "extras"]
return vcat(collect.(keys.([project[h] for h in headings]))...)
end

# identify missing pkgs:
project_path = joinpath(@__DIR__, "..", "Project.toml")
project_lines = open(project_path) do io
readlines(io)
end
pkgs_in_project = pkgs(project_lines)
registry_project_lines = MLJModels.Registry.registry_project()
pkgs_in_registry = pkgs(registry_project_lines)
missing_pkgs = setdiff(pkgs_in_registry, pkgs_in_project)

# throw error if there are any:
isempty(missing_pkgs) || error(
"Integration tests cannot proceed because the following packages are "*
"missing from the [extras] section of the MLJ Project.toml file: "*
join(missing_pkgs, ", ")
)

# # LOAD ALL MODEL CODE

# Load all the model providing packages with a broad level=1 test:
MLJTestIntegration.test(MODELS, (nothing, ), level=1, throw=true, verbosity=0);


# # JULIA TESTS

options = (
level = JULIA_TEST_LEVEL,
verbosity = 0, # bump to 2 to debug
throw = true,
)
@testset "level 4 tests" begin
println()
for model in JULIA_MODELS

# exclusions:
model in WITHOUT_DATASETS && continue
model in EXCLUDED_BY_ISSUE && continue

print("\rTesting $(model.name) ($(model.package_name)) ")
@test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
end
end


# # NON-JULIA TESTS

options = (
level = OTHER_TEST_LEVEL,
verbosity = 0, # bump to 2 to debug
throw = true,
)
@testset "level 3 tests" begin
println()
for model in OTHER_MODELS

# exclusions:
model in WITHOUT_DATASETS && continue
model in EXCLUDED_BY_ISSUE && continue

print("\rTesting $(model.name) ($(model.package_name)) ")
@test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
end
end

true
10 changes: 10 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,13 @@ end
@testset "scitypes" begin
@test include("scitypes.jl")
end

if get(ENV, "MLJ_TEST_INTEGRATION", "false") == "true"
@testset "integration" begin
@test include("integration.jl")
end
else
@info "Integration tests skipped. Set environment variable "*
"MLJ_TEST_INTEGRATION = \"true\" to include them.\n"*
"Integration tests take at least one hour. "
end

0 comments on commit 0eb8c52

Please sign in to comment.