-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathtest_frank_wolfe.py
136 lines (110 loc) · 4.06 KB
/
test_frank_wolfe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""Tests for the Frank-Wolfe algorithm."""
import numpy as np
import pytest
from scipy import optimize
import copt as cp
np.random.seed(0)
n_samples, n_features = 20, 16
A = np.random.randn(n_samples, n_features)
w = np.random.randn(n_features)
b = A.dot(w) + np.random.randn(n_samples)
# we will use a logistic loss, which can't have values
# greater than 1
b = np.abs(b / np.max(np.abs(b)))
LOSS_FUNCS = [cp.loss.LogLoss, cp.loss.SquareLoss]
def test_fw_api():
"""Check that FW takes the right arguments and raises the right exceptions."""
# test that the algorithm does not fail if x0
# is a tuple
f = cp.loss.LogLoss(A, b, 1.0 / n_samples)
cb = cp.utils.Trace(f)
alpha = 1.0
l1ball = cp.constraint.L1Ball(alpha)
cp.minimize_frank_wolfe(
f.f_grad,
[0] * n_features,
l1ball.lmo,
tol=0,
lipschitz=f.lipschitz,
callback=cb,
)
# check that we riase an exception when the DR step-size is used
# but no lipschitz constant is given
with pytest.raises(ValueError):
cp.minimize_frank_wolfe(f.f_grad, [0] * n_features, l1ball.lmo, step="DR")
@pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
@pytest.mark.parametrize("loss_grad", LOSS_FUNCS)
def test_fw_l1(loss_grad, alpha):
"""Test result of FW algorithm with L1 constraint."""
f = loss_grad(A, b, 1.0 / n_samples)
cb = cp.utils.Trace(f)
l1ball = cp.constraint.L1Ball(alpha)
opt = cp.minimize_frank_wolfe(
f.f_grad,
np.zeros(n_features),
l1ball.lmo,
tol=1e-3,
lipschitz=f.lipschitz,
callback=cb,
)
assert np.isfinite(opt.x).sum() == n_features
ss = 1 / f.lipschitz
grad = f.f_grad(opt.x)[1]
grad_map = (opt.x - l1ball.prox(opt.x - ss * grad, ss)) / ss
assert np.linalg.norm(grad_map) < 0.3
def test_callback():
"""Make sure that the algorithm exists when the callback returns False."""
def cb(_):
return False
l1ball = cp.constraint.L1Ball(1)
f = cp.loss.SquareLoss(A, b)
opt = cp.minimize_frank_wolfe(f.f_grad, np.zeros(n_features), l1ball.lmo, callback=cb)
assert opt.nit < 2
def exact_line_search(kw):
def f_on_line(gamma):
return kw["func_and_grad"](kw["x"] + gamma * kw["update_direction"])[0]
line_sol = optimize.minimize_scalar(f_on_line, method='bounded', bounds=[0, kw["max_step_size"]])
return line_sol.x
@pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
@pytest.mark.parametrize("obj", LOSS_FUNCS)
@pytest.mark.parametrize("step", ["DR", "backtracking", "sublinear", exact_line_search])
def test_fw_backtrack(obj, step, alpha):
"""Test FW with different options of the line-search strategy."""
f = obj(A, b, 1.0 / n_samples)
traceball = cp.constraint.TraceBall(alpha, (4, 4))
opt = cp.minimize_frank_wolfe(
f.f_grad,
np.zeros(n_features),
traceball.lmo,
tol=0,
lipschitz=f.lipschitz,
step=step,
max_iter=1000,
)
assert np.isfinite(opt.x).sum() == n_features
ss = 1 / f.lipschitz
grad = f.f_grad(opt.x)[1]
# this is the proximal mapping, zero at optimum
grad_map = (opt.x - traceball.prox(opt.x - ss * grad, ss)) / ss
assert np.linalg.norm(grad_map) < 0.4
@pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
@pytest.mark.parametrize("obj", LOSS_FUNCS)
@pytest.mark.parametrize("step", ["DR", "backtracking", exact_line_search])
def test_pairwise_fw(obj, step, alpha):
"""Test the Pairwise FW method."""
f = obj(A, b, 1.0 / n_samples)
l1ball = cp.constraint.L1Ball(alpha)
x0 = np.zeros(A.shape[1])
x0[0] = alpha
cb = cp.utils.Trace(f)
opt = cp.minimize_frank_wolfe(
f.f_grad, x0, l1ball.lmo_pairwise, x0_rep=(1., 0),
step=step, lipschitz=f.lipschitz, callback=cb,
variant='pairwise'
)
assert np.isfinite(opt.x).sum() == n_features
ss = 1 / f.lipschitz
grad = f.f_grad(opt.x)[1]
# this is the proximal mapping, zero at optimum
grad_map = (opt.x - l1ball.prox(opt.x - ss * grad, ss)) / ss
assert np.linalg.norm(grad_map) < 0.2