diff --git a/src/InferOpt.jl b/src/InferOpt.jl index 4d62096..c7f3e1a 100644 --- a/src/InferOpt.jl +++ b/src/InferOpt.jl @@ -28,30 +28,32 @@ include("utils/isotonic_regression/isotonic_l2.jl") include("utils/isotonic_regression/isotonic_kl.jl") include("utils/isotonic_regression/projection.jl") -include("simple/interpolation.jl") -include("simple/identity.jl") - -include("regularized/abstract_regularized.jl") -include("regularized/soft_argmax.jl") -include("regularized/sparse_argmax.jl") -include("regularized/soft_rank.jl") -include("regularized/regularized_frank_wolfe.jl") - -include("perturbed/abstract_perturbed.jl") -include("perturbed/additive.jl") -include("perturbed/multiplicative.jl") -include("perturbed/perturbed_oracle.jl") - -include("imitation/spoplus_loss.jl") -include("imitation/ssvm_loss.jl") -include("imitation/fenchel_young_loss.jl") -include("imitation/imitation_loss.jl") -include("imitation/zero_one_loss.jl") +# Layers +include("layers/simple/interpolation.jl") +include("layers/simple/identity.jl") + +include("layers/perturbed/abstract_perturbed.jl") +include("layers/perturbed/additive.jl") +include("layers/perturbed/multiplicative.jl") +include("layers/perturbed/perturbed_oracle.jl") + +include("layers/regularized/abstract_regularized.jl") +include("layers/regularized/soft_argmax.jl") +include("layers/regularized/sparse_argmax.jl") +include("layers/regularized/soft_rank.jl") +include("layers/regularized/regularized_frank_wolfe.jl") if !isdefined(Base, :get_extension) include("../ext/InferOptFrankWolfeExt.jl") end +# Losses +include("losses/fenchel_young_loss.jl") +include("losses/spoplus_loss.jl") +include("losses/ssvm_loss.jl") +include("losses/zero_one_loss.jl") +include("losses/imitation_loss.jl") + export half_square_norm export shannon_entropy, negative_shannon_entropy export one_hot_argmax, ranking diff --git a/src/perturbed/abstract_perturbed.jl b/src/layers/perturbed/abstract_perturbed.jl similarity index 100% rename from src/perturbed/abstract_perturbed.jl rename to src/layers/perturbed/abstract_perturbed.jl diff --git a/src/perturbed/additive.jl b/src/layers/perturbed/additive.jl similarity index 100% rename from src/perturbed/additive.jl rename to src/layers/perturbed/additive.jl diff --git a/src/perturbed/multiplicative.jl b/src/layers/perturbed/multiplicative.jl similarity index 100% rename from src/perturbed/multiplicative.jl rename to src/layers/perturbed/multiplicative.jl diff --git a/src/perturbed/perturbed_oracle.jl b/src/layers/perturbed/perturbed_oracle.jl similarity index 100% rename from src/perturbed/perturbed_oracle.jl rename to src/layers/perturbed/perturbed_oracle.jl diff --git a/src/regularized/abstract_regularized.jl b/src/layers/regularized/abstract_regularized.jl similarity index 100% rename from src/regularized/abstract_regularized.jl rename to src/layers/regularized/abstract_regularized.jl diff --git a/src/regularized/regularized_frank_wolfe.jl b/src/layers/regularized/regularized_frank_wolfe.jl similarity index 100% rename from src/regularized/regularized_frank_wolfe.jl rename to src/layers/regularized/regularized_frank_wolfe.jl diff --git a/src/regularized/soft_argmax.jl b/src/layers/regularized/soft_argmax.jl similarity index 100% rename from src/regularized/soft_argmax.jl rename to src/layers/regularized/soft_argmax.jl diff --git a/src/regularized/soft_rank.jl b/src/layers/regularized/soft_rank.jl similarity index 100% rename from src/regularized/soft_rank.jl rename to src/layers/regularized/soft_rank.jl diff --git a/src/regularized/sparse_argmax.jl b/src/layers/regularized/sparse_argmax.jl similarity index 100% rename from src/regularized/sparse_argmax.jl rename to src/layers/regularized/sparse_argmax.jl diff --git a/src/simple/identity.jl b/src/layers/simple/identity.jl similarity index 100% rename from src/simple/identity.jl rename to src/layers/simple/identity.jl diff --git a/src/simple/interpolation.jl b/src/layers/simple/interpolation.jl similarity index 100% rename from src/simple/interpolation.jl rename to src/layers/simple/interpolation.jl diff --git a/src/imitation/fenchel_young_loss.jl b/src/losses/fenchel_young_loss.jl similarity index 100% rename from src/imitation/fenchel_young_loss.jl rename to src/losses/fenchel_young_loss.jl diff --git a/src/imitation/imitation_loss.jl b/src/losses/imitation_loss.jl similarity index 100% rename from src/imitation/imitation_loss.jl rename to src/losses/imitation_loss.jl diff --git a/src/imitation/spoplus_loss.jl b/src/losses/spoplus_loss.jl similarity index 100% rename from src/imitation/spoplus_loss.jl rename to src/losses/spoplus_loss.jl diff --git a/src/imitation/ssvm_loss.jl b/src/losses/ssvm_loss.jl similarity index 100% rename from src/imitation/ssvm_loss.jl rename to src/losses/ssvm_loss.jl diff --git a/src/imitation/zero_one_loss.jl b/src/losses/zero_one_loss.jl similarity index 100% rename from src/imitation/zero_one_loss.jl rename to src/losses/zero_one_loss.jl diff --git a/test/interface.jl b/test/abstract_regularized_interface.jl similarity index 84% rename from test/interface.jl rename to test/abstract_regularized_interface.jl index 0e6b8b6..a6d1687 100644 --- a/test/interface.jl +++ b/test/abstract_regularized_interface.jl @@ -5,4 +5,5 @@ @test RI.check_interface_implemented(AbstractRegularized, RegularizedFrankWolfe) @test RI.check_interface_implemented(AbstractRegularized, SoftArgmax) @test RI.check_interface_implemented(AbstractRegularized, SparseArgmax) + @test RI.check_interface_implemented(AbstractRegularized, SoftRank) end diff --git a/test/argmax.jl b/test/argmax.jl deleted file mode 100644 index 4075c51..0000000 --- a/test/argmax.jl +++ /dev/null @@ -1,274 +0,0 @@ -@testitem "Argmax - imit - SPO+ (θ)" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitationθ(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=SPOPlusLoss(one_hot_argmax), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - SPO+ (θ & y)" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitationθy(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=SPOPlusLoss(one_hot_argmax), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - SSVM" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=InferOpt.ZeroOneStructuredSVMLoss(), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - MSE SparseArgmax" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=SparseArgmax(), - loss=mse_kw, - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - MSE SoftArgmax" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=SoftArgmax(), - loss=mse_kw, - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - MSE PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=PerturbedAdditive(one_hot_argmax; ε=1.0, nb_samples=10), - loss=mse_kw, - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - MSE PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=PerturbedMultiplicative(one_hot_argmax; ε=1.0, nb_samples=10), - loss=mse_kw, - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - MSE RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=RegularizedFrankWolfe( - one_hot_argmax; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - loss=mse_kw, - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - FYL SparseArgmax" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=FenchelYoungLoss(SparseArgmax()), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - FYL SoftArgmax" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=FenchelYoungLoss(SoftArgmax()), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - FYL PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=FenchelYoungLoss(PerturbedAdditive(one_hot_argmax; ε=1.0, nb_samples=5)), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - FYL PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=FenchelYoungLoss(PerturbedMultiplicative(one_hot_argmax; ε=1.0, nb_samples=5)), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - imit - FYL RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=FenchelYoungLoss( - RegularizedFrankWolfe( - one_hot_argmax; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - ), - error_function=hamming_distance, - ) -end - -@testitem "Argmax - exp - Pushforward PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=Pushforward(PerturbedAdditive(one_hot_argmax; ε=1.0, nb_samples=10), cost), - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - ) -end - -@testitem "Argmax - exp - Pushforward PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=Pushforward( - PerturbedMultiplicative(one_hot_argmax; ε=1.0, nb_samples=10), cost - ), - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - ) -end - -@testitem "Argmax - exp - Pushforward RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, - FrankWolfe, InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=one_hot_argmax, - maximizer=identity_kw, - loss=Pushforward( - RegularizedFrankWolfe( - one_hot_argmax; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - cost, - ), - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - ) -end diff --git a/test/jacobian_approx.jl b/test/jacobian_approx.jl deleted file mode 100644 index abd80ed..0000000 --- a/test/jacobian_approx.jl +++ /dev/null @@ -1,42 +0,0 @@ -@testitem "Jacobian approx" begin - using LinearAlgebra - using Random - using Test - using Zygote - - θ = [3, 5, 4, 2] - - perturbed1 = PerturbedAdditive(one_hot_argmax; ε=2, nb_samples=1_000, seed=0) - perturbed1_big = PerturbedAdditive(one_hot_argmax; ε=2, nb_samples=10_000, seed=0) - perturbed2 = PerturbedMultiplicative(one_hot_argmax; ε=0.5, nb_samples=1_000, seed=0) - perturbed2_big = PerturbedMultiplicative( - one_hot_argmax; ε=0.5, nb_samples=10_000, seed=0 - ) - - @testset "PerturbedAdditive" begin - # Compute jacobian with reverse mode - jac1 = Zygote.jacobian(θ -> perturbed1(θ; autodiff_variance_reduction=false), θ)[1] - jac1_big = Zygote.jacobian( - θ -> perturbed1_big(θ; autodiff_variance_reduction=false), θ - )[1] - # Only diagonal should be positive - @test all(diag(jac1) .>= 0) - @test all(jac1 - Diagonal(jac1) .<= 0) - # Order of diagonal coefficients should follow order of θ - @test sortperm(diag(jac1)) == sortperm(θ) - # No scaling with nb of samples - @test norm(jac1) ≈ norm(jac1_big) rtol = 1e-2 - end - - @testset "PerturbedMultiplicative" begin - jac2 = Zygote.jacobian(θ -> perturbed2(θ; autodiff_variance_reduction=false), θ)[1] - jac2_big = Zygote.jacobian( - θ -> perturbed2_big(θ; autodiff_variance_reduction=false), θ - )[1] - @test all(diag(jac2) .>= 0) - @test all(jac2 - Diagonal(jac2) .<= 0) - @test sortperm(diag(jac2)) != sortperm(θ) - # This is not equal because the diagonal coefficient for θ₃ = 4 is often larger than the one for θ₂ = 5. It happens because θ₃ has the opportunity to *become* the argmax (and hence switch from 0 to 1), whereas θ₂ already *is* the argmax. - @test norm(jac2) ≈ norm(jac2_big) rtol = 1e-2 - end -end diff --git a/test/generalized_maximizer.jl b/test/learning_generalized_maximizer.jl similarity index 100% rename from test/generalized_maximizer.jl rename to test/learning_generalized_maximizer.jl diff --git a/test/ranking.jl b/test/learning_ranking.jl similarity index 70% rename from test/ranking.jl rename to test/learning_ranking.jl index 122ed99..a6773f9 100644 --- a/test/ranking.jl +++ b/test/learning_ranking.jl @@ -1,4 +1,4 @@ -@testitem "Ranking - imit - SPO+ (θ)" default_imports = false begin +@testitem "imit - SPO+ (θ)" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -13,7 +13,7 @@ ) end -@testitem "Ranking - imit - SPO+ (θ & y)" default_imports = false begin +@testitem "imit - SPO+ (θ & y)" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -28,7 +28,7 @@ end ) end -@testitem "Ranking - imit - MSE IdentityRelaxation" default_imports = false begin +@testitem "imit - MSE IdentityRelaxation" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random Random.seed!(63) @@ -43,7 +43,7 @@ end ) end -@testitem "Ranking - imit - MSE Interpolation" default_imports = false begin +@testitem "imit - MSE Interpolation" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -58,7 +58,7 @@ end ) end -@testitem "Ranking - imit - MSE PerturbedAdditive" default_imports = false begin +@testitem "imit - MSE PerturbedAdditive" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -73,7 +73,7 @@ end ) end -@testitem "Ranking - imit - MSE PerturbedMultiplicative" default_imports = false begin +@testitem "imit - MSE PerturbedMultiplicative" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -88,7 +88,7 @@ end ) end -@testitem "Ranking - imit - MSE RegularizedFrankWolfe" default_imports = false begin +@testitem "imit - MSE RegularizedFrankWolfe" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -108,7 +108,7 @@ end ) end -@testitem "Ranking - imit - FYL PerturbedAdditive" default_imports = false begin +@testitem "imit - FYL PerturbedAdditive" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -123,7 +123,7 @@ end ) end -@testitem "Ranking - imit - FYL PerturbedMultiplicative" default_imports = false begin +@testitem "imit - FYL PerturbedMultiplicative" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -138,7 +138,7 @@ end ) end -@testitem "Ranking - imit - FYL PerturbedAdditive{LogNormal}" default_imports = false begin +@testitem "imit - FYL PerturbedAdditive{LogNormal}" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, Random, Distributions, LinearAlgebra Random.seed!(63) @@ -155,7 +155,7 @@ end ) end -@testitem "Ranking - imit - FYL RegularizedFrankWolfe" default_imports = false begin +@testitem "imit - FYL RegularizedFrankWolfe" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random Random.seed!(63) @@ -177,7 +177,7 @@ end ) end -@testitem "Ranking - exp - Pushforward PerturbedAdditive" default_imports = false begin +@testitem "exp - Pushforward PerturbedAdditive" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random Random.seed!(63) @@ -196,7 +196,7 @@ end ) end -@testitem "Ranking - exp - Pushforward PerturbedMultiplicative" default_imports = false begin +@testitem "exp - Pushforward PerturbedMultiplicative" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random Random.seed!(63) @@ -215,7 +215,7 @@ end ) end -@testitem "Ranking - exp - Pushforward PerturbedAdditive{LogNormal}" default_imports = false begin +@testitem "exp - Pushforward PerturbedAdditive{LogNormal}" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Distributions Random.seed!(63) @@ -238,8 +238,7 @@ end ) end -@testitem "Ranking - exp - Pushforward PerturbedMultiplicative{LogNormal}" default_imports = - false begin +@testitem "exp - Pushforward PerturbedMultiplicative{LogNormal}" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Distributions Random.seed!(63) @@ -264,7 +263,7 @@ end ) end -@testitem "Ranking - exp - Pushforward PerturbedOracle{LogNormal}" default_imports = false begin +@testitem "exp - Pushforward PerturbedOracle{LogNormal}" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Distributions Random.seed!(63) @@ -285,7 +284,7 @@ end ) end -@testitem "Ranking - exp - Pushforward RegularizedFrankWolfe" default_imports = false begin +@testitem "exp - Pushforward RegularizedFrankWolfe" default_imports = false begin include("InferOptTestUtils/src/InferOptTestUtils.jl") using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, LinearAlgebra, Random @@ -312,3 +311,83 @@ end cost=cost, ) end + +@testitem "exp - soft rank" default_imports = false begin + include("InferOptTestUtils/src/InferOptTestUtils.jl") + using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Test + Random.seed!(63) + + true_encoder = encoder_factory() + cost(y; instance) = dot(y, -true_encoder(instance)) + + Random.seed!(67) + soft_rank_l2_results = test_pipeline!( + PipelineLossExperience(); + instance_dim=5, + true_maximizer=ranking, + maximizer=SoftRank(), + loss=cost, + error_function=hamming_distance, + true_encoder=true_encoder, + cost=cost, + epochs=50, + ) + + Random.seed!(67) + soft_rank_kl_results = test_pipeline!( + PipelineLossExperience(); + instance_dim=5, + true_maximizer=ranking, + maximizer=SoftRank(; regularization="kl"), + loss=cost, + error_function=hamming_distance, + true_encoder=true_encoder, + cost=cost, + epochs=50, + ) + + Random.seed!(67) + perturbed_results = test_pipeline!( + PipelineLossExperience(); + instance_dim=5, + true_maximizer=ranking, + maximizer=identity_kw, + loss=Pushforward(PerturbedAdditive(ranking; ε=1.0, nb_samples=10), cost), + error_function=hamming_distance, + true_encoder=true_encoder, + cost=cost, + epochs=50, + ) + + # Check that we achieve better performance than the reinforce trick + @test soft_rank_l2_results.test_cost_gaps[end] < perturbed_results.test_cost_gaps[end] + @test soft_rank_kl_results.test_cost_gaps[end] < perturbed_results.test_cost_gaps[end] +end + +@testitem "imit - FYL - soft rank" default_imports = false begin + include("InferOptTestUtils/src/InferOptTestUtils.jl") + using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Test + Random.seed!(63) + + true_encoder = encoder_factory() + + test_pipeline!( + PipelineLossImitation(); + instance_dim=5, + true_maximizer=ranking, + maximizer=identity_kw, + loss=FenchelYoungLoss(SoftRank()), + error_function=hamming_distance, + true_encoder=true_encoder, + ) + + test_pipeline!( + PipelineLossImitation(); + instance_dim=5, + true_maximizer=ranking, + maximizer=identity_kw, + loss=FenchelYoungLoss(SoftRank(; regularization="kl", ε=10.0)), + error_function=hamming_distance, + true_encoder=true_encoder, + ) +end diff --git a/test/paths.jl b/test/paths.jl deleted file mode 100644 index c38176b..0000000 --- a/test/paths.jl +++ /dev/null @@ -1,259 +0,0 @@ -@testitem "Paths - imit - SPO+ (θ)" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitationθ(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=SPOPlusLoss(shortest_path_maximizer), - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - SPO+ (θ & y)" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitationθy(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=SPOPlusLoss(shortest_path_maximizer), - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - MSE IdentityRelaxation" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=normalize ∘ IdentityRelaxation(shortest_path_maximizer), - loss=mse_kw, - error_function=mse_kw, - ) -end - -# @testitem "Paths - imit - MSE Interpolation" default_imports = false begin -# include("InferOptTestUtils/src/InferOptTestUtils.jl") -# using InferOpt, .InferOptTestUtils, Random -# Random.seed!(63) - -# test_pipeline!( -# PipelineLossImitation; -# instance_dim=(5, 5), -# true_maximizer=shortest_path_maximizer, -# maximizer=Interpolation(shortest_path_maximizer; λ=5.0), -# loss=mse_kw, -# error_function=mse_kw, -# ) -# end # TODO: make it work (doesn't seem to depend on λ) - -@testitem "Paths - imit - MSE PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=PerturbedAdditive(shortest_path_maximizer; ε=1.0, nb_samples=10), - loss=mse_kw, - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - MSE PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=PerturbedMultiplicative(shortest_path_maximizer; ε=1.0, nb_samples=10), - loss=mse_kw, - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - MSE RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=RegularizedFrankWolfe( - shortest_path_maximizer; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - loss=mse_kw, - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - FYL PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=FenchelYoungLoss( - PerturbedAdditive(shortest_path_maximizer; ε=1.0, nb_samples=5) - ), - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - FYL PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=FenchelYoungLoss( - PerturbedMultiplicative(shortest_path_maximizer; ε=1.0, nb_samples=5) - ), - error_function=mse_kw, - epochs=100, - ) -end - -@testitem "Paths - imit - FYL PerturbedAdditive{LogNormal}" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random, Distributions - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=FenchelYoungLoss( - PerturbedAdditive( - shortest_path_maximizer; ε=1.0, nb_samples=5, perturbation=LogNormal(0, 1) - ), - ), - error_function=mse_kw, - ) -end - -@testitem "Paths - imit - FYL RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, FrankWolfe, InferOpt, .InferOptTestUtils, Random - Random.seed!(63) - - test_pipeline!( - PipelineLossImitation(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=FenchelYoungLoss( - RegularizedFrankWolfe( - shortest_path_maximizer; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - ), - error_function=mse_kw, - epochs=100, - ) -end - -@testitem "Paths - exp - Pushforward PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=Pushforward( - PerturbedAdditive(shortest_path_maximizer; ε=1.0, nb_samples=10), cost - ), - error_function=mse_kw, - true_encoder=true_encoder, - cost=cost, - epochs=500, - ) -end - -@testitem "Paths - exp - Pushforward PerturbedMultiplicative" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=Pushforward( - PerturbedMultiplicative(shortest_path_maximizer; ε=1.0, nb_samples=10), cost - ), - error_function=mse_kw, - true_encoder=true_encoder, - cost=cost, - epochs=500, - ) -end - -@testitem "Paths - exp - Pushforward RegularizedFrankWolfe" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using DifferentiableFrankWolfe, - FrankWolfe, InferOpt, .InferOptTestUtils, LinearAlgebra, Random - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - test_pipeline!( - PipelineLossExperience(); - instance_dim=(5, 5), - true_maximizer=shortest_path_maximizer, - maximizer=identity_kw, - loss=Pushforward( - RegularizedFrankWolfe( - shortest_path_maximizer; - Ω=half_square_norm, - Ω_grad=identity_kw, - frank_wolfe_kwargs=(; max_iteration=10, line_search=FrankWolfe.Agnostic()), - ), - cost, - ), - error_function=mse_kw, - true_encoder=true_encoder, - cost=cost, - epochs=200, - ) -end diff --git a/test/perturbed.jl b/test/perturbed.jl new file mode 100644 index 0000000..7a09e16 --- /dev/null +++ b/test/perturbed.jl @@ -0,0 +1,111 @@ +@testitem "Jacobian approx" begin + using LinearAlgebra + using Random + using Test + using Zygote + + θ = [3, 5, 4, 2] + + perturbed1 = PerturbedAdditive(one_hot_argmax; ε=2, nb_samples=1_000, seed=0) + perturbed1_big = PerturbedAdditive(one_hot_argmax; ε=2, nb_samples=10_000, seed=0) + perturbed2 = PerturbedMultiplicative(one_hot_argmax; ε=0.5, nb_samples=1_000, seed=0) + perturbed2_big = PerturbedMultiplicative( + one_hot_argmax; ε=0.5, nb_samples=10_000, seed=0 + ) + + @testset "PerturbedAdditive" begin + # Compute jacobian with reverse mode + jac1 = Zygote.jacobian(θ -> perturbed1(θ; autodiff_variance_reduction=false), θ)[1] + jac1_big = Zygote.jacobian( + θ -> perturbed1_big(θ; autodiff_variance_reduction=false), θ + )[1] + # Only diagonal should be positive + @test all(diag(jac1) .>= 0) + @test all(jac1 - Diagonal(jac1) .<= 0) + # Order of diagonal coefficients should follow order of θ + @test sortperm(diag(jac1)) == sortperm(θ) + # No scaling with nb of samples + @test norm(jac1) ≈ norm(jac1_big) rtol = 1e-2 + end + + @testset "PerturbedMultiplicative" begin + jac2 = Zygote.jacobian(θ -> perturbed2(θ; autodiff_variance_reduction=false), θ)[1] + jac2_big = Zygote.jacobian( + θ -> perturbed2_big(θ; autodiff_variance_reduction=false), θ + )[1] + @test all(diag(jac2) .>= 0) + @test all(jac2 - Diagonal(jac2) .<= 0) + @test sortperm(diag(jac2)) != sortperm(θ) + # This is not equal because the diagonal coefficient for θ₃ = 4 is often larger than the one for θ₂ = 5. It happens because θ₃ has the opportunity to *become* the argmax (and hence switch from 0 to 1), whereas θ₂ already *is* the argmax. + @test norm(jac2) ≈ norm(jac2_big) rtol = 1e-2 + end +end + +@testitem "PerturbedOracle vs PerturbedAdditive" default_imports = false begin + include("InferOptTestUtils/src/InferOptTestUtils.jl") + using InferOpt, .InferOptTestUtils, Random, Test + using LinearAlgebra, Zygote, Distributions + Random.seed!(63) + + ε = 1.0 + p(θ) = MvNormal(θ, ε^2 * I) + oracle(η) = η + + po = PerturbedOracle(oracle, p; nb_samples=1_000, seed=0) + pa = PerturbedAdditive(oracle; ε, nb_samples=1_000, seed=0) + + θ = randn(10) + @test po(θ) ≈ pa(θ) rtol = 0.001 + @test all(isapprox.(jacobian(po, θ), jacobian(pa, θ), rtol=0.001)) +end + +@testitem "Variance reduction" default_imports = false begin + include("InferOptTestUtils/src/InferOptTestUtils.jl") + using InferOpt, .InferOptTestUtils, Random, Test + using LinearAlgebra, Zygote + Random.seed!(63) + + ε = 1.0 + oracle(η) = η + + pa = PerturbedAdditive(oracle; ε, nb_samples=100, seed=0) + pm = PerturbedMultiplicative(oracle; ε, nb_samples=100, seed=0) + + n = 10 + θ = randn(10) + + Ja = jacobian(θ -> pa(θ; autodiff_variance_reduction=false), θ)[1] + Ja_reduced_variance = jacobian(pa, θ)[1] + + Jm = jacobian(x -> pm(x; autodiff_variance_reduction=false), θ)[1] + Jm_reduced_variance = jacobian(pm, θ)[1] + + J_true = Matrix(I, n, n) # exact jacobian is the identity matrix + + @test normalized_mape(Ja, J_true) > normalized_mape(Ja_reduced_variance, J_true) + @test normalized_mape(Jm, J_true) > normalized_mape(Jm_reduced_variance, J_true) +end + +@testitem "Perturbed - small ε convergence" default_imports = false begin + include("InferOptTestUtils/src/InferOptTestUtils.jl") + using InferOpt, .InferOptTestUtils, Random, Test + using LinearAlgebra, Zygote + Random.seed!(63) + + ε = 1e-12 + + function already_differentiable(θ) + return 2 ./ exp.(θ) .* θ .^ 2 + end + + θ = randn(5) + Jz = jacobian(already_differentiable, θ)[1] + + pa = PerturbedAdditive(already_differentiable; ε, nb_samples=1e6, seed=0) + Ja = jacobian(pa, θ)[1] + @test_broken all(isapprox.(Ja, Jz, rtol=0.01)) + + pm = PerturbedMultiplicative(already_differentiable; ε, nb_samples=1e6, seed=0) + Jm = jacobian(pm, θ)[1] + @test_broken all(isapprox.(Jm, Jz, rtol=0.01)) +end diff --git a/test/perturbed_oracle.jl b/test/perturbed_oracle.jl deleted file mode 100644 index ef1bccb..0000000 --- a/test/perturbed_oracle.jl +++ /dev/null @@ -1,44 +0,0 @@ -@testitem "PerturbedOracle vs PerturbedAdditive" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random, Test - using LinearAlgebra, Zygote, Distributions - Random.seed!(63) - - ε = 1.0 - p(θ) = MvNormal(θ, ε^2 * I) - oracle(η) = η - - po = PerturbedOracle(oracle, p; nb_samples=1_000, seed=0) - pa = PerturbedAdditive(oracle; ε, nb_samples=1_000, seed=0) - - θ = randn(10) - @test po(θ) ≈ pa(θ) rtol = 0.001 - @test all(isapprox.(jacobian(po, θ), jacobian(pa, θ), rtol=0.001)) -end - -@testitem "Variance reduction" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, Random, Test - using LinearAlgebra, Zygote - Random.seed!(63) - - ε = 1.0 - oracle(η) = η - - pa = PerturbedAdditive(oracle; ε, nb_samples=100, seed=0) - pm = PerturbedAdditive(oracle; ε, nb_samples=100, seed=0) - - n = 10 - θ = randn(10) - - Ja = jacobian(θ -> pa(θ; autodiff_variance_reduction=false), θ)[1] - Ja_reduced_variance = jacobian(pa, θ)[1] - - Jm = jacobian(x -> pm(x; autodiff_variance_reduction=false), θ)[1] - Jm_reduced_variance = jacobian(pm, θ)[1] - - J_true = Matrix(I, n, n) # exact jacobian is the identity matrix - - @test normalized_mape(Ja, J_true) > normalized_mape(Ja_reduced_variance, J_true) - @test normalized_mape(Jm, J_true) > normalized_mape(Jm_reduced_variance, J_true) -end diff --git a/test/soft_rank.jl b/test/soft_rank.jl index dc36c81..7e51508 100644 --- a/test/soft_rank.jl +++ b/test/soft_rank.jl @@ -15,7 +15,7 @@ end using InferOpt, .InferOptTestUtils, Random, HiGHS, JuMP, Test Random.seed!(63) - function isotonic_custom(y) + function isotonic_jump(y) model = Model(HiGHS.Optimizer) set_silent(model) @@ -29,7 +29,7 @@ end for _ in 1:100 y = randn(1000) - x = isotonic_custom(y) + x = isotonic_jump(y) x2 = InferOpt.isotonic_l2(y) @test all(isapprox.(x, x2, atol=1e-2)) end @@ -68,90 +68,3 @@ end @test all(isapprox.(rank_jac, rank_jac_fd, atol=1e-4)) end end - -@testitem "Learn by experience soft rank" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Test - Random.seed!(63) - - true_encoder = encoder_factory() - cost(y; instance) = dot(y, -true_encoder(instance)) - - Random.seed!(67) - soft_rank_l2_results = test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=ranking, - maximizer=SoftRank(), - loss=cost, - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - epochs=50, - ) - - Random.seed!(67) - soft_rank_kl_results = test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=ranking, - maximizer=SoftRank(; regularization="kl"), - loss=cost, - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - epochs=50, - ) - - Random.seed!(67) - perturbed_results = test_pipeline!( - PipelineLossExperience(); - instance_dim=5, - true_maximizer=ranking, - maximizer=identity_kw, - loss=Pushforward(PerturbedAdditive(ranking; ε=1.0, nb_samples=10), cost), - error_function=hamming_distance, - true_encoder=true_encoder, - cost=cost, - epochs=50, - ) - - # Check that we achieve better performance than the reinforce trick - @test soft_rank_l2_results.test_cost_gaps[end] < perturbed_results.test_cost_gaps[end] - @test soft_rank_kl_results.test_cost_gaps[end] < perturbed_results.test_cost_gaps[end] -end - -@testitem "Fenchel-Young loss soft rank L2" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Test - Random.seed!(63) - - true_encoder = encoder_factory() - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=ranking, - maximizer=identity_kw, - loss=FenchelYoungLoss(SoftRank()), - error_function=hamming_distance, - true_encoder=true_encoder, - ) -end - -@testitem "Fenchel-Young loss soft rank kl" default_imports = false begin - include("InferOptTestUtils/src/InferOptTestUtils.jl") - using InferOpt, .InferOptTestUtils, LinearAlgebra, Random, Test - Random.seed!(63) - - true_encoder = encoder_factory() - - test_pipeline!( - PipelineLossImitation(); - instance_dim=5, - true_maximizer=ranking, - maximizer=identity_kw, - loss=FenchelYoungLoss(SoftRank(; regularization="kl", ε=10.0)), - error_function=hamming_distance, - true_encoder=true_encoder, - ) -end