Skip to content

Commit 3a3b401

Browse files
committed
create _fit_simple and _fit_ci helpers
1 parent e2f32b2 commit 3a3b401

File tree

2 files changed

+142
-14
lines changed

2 files changed

+142
-14
lines changed

probscale/tests/test_viz.py

+36
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,42 @@ def plot_data():
393393
return data
394394

395395

396+
@pytest.mark.parametrize(('fitlogs', 'known_yhat'), [
397+
(None, numpy.array([0.7887, 3.8946, 7.0005, 10.1065, 13.2124, 16.3183])),
398+
('x', numpy.array([0.2711, 1.2784, 1.5988, 1.7953, 1.9373, 2.0487])),
399+
('y', numpy.array([2.2006e+00, 4.9139e+01, 1.0972e+03, 2.4501e+04, 5.4711e+05, 1.2217e+07])),
400+
('both', numpy.array([1.3114, 3.5908, 4.9472, 6.0211, 6.9402, 7.7577])),
401+
])
402+
def test__fit_simple(plot_data, fitlogs, known_yhat):
403+
x = numpy.arange(1, len(plot_data)+1)
404+
known_results = {'slope': 0.5177, 'intercept': 0.2711}
405+
xhat = x[::6]
406+
yhat, results = viz._fit_simple(x, plot_data, xhat, fitlogs=fitlogs)
407+
assert abs(results['intercept'] - known_results['intercept']) < 0.0001
408+
assert abs(results['slope'] - known_results['slope']) < 0.0001
409+
nptest.assert_allclose(yhat, known_yhat, rtol=0.0001)
410+
411+
412+
@seed
413+
@pytest.mark.parametrize(('fitlogs', 'known_lo', 'known_hi'), [
414+
(None, numpy.array([-0.7944, 2.7051, 6.1974, 9.2612, 11.9382, 14.4290]),
415+
numpy.array([ 2.1447, 4.8360, 7.7140, 10.8646, 14.1014, 17.4432])),
416+
('x', numpy.array([-1.4098, -0.2210, 0.1387, 0.3585, 0.5147, 0.6417]),
417+
numpy.array([ 1.7067, 2.5661, 2.8468, 3.0169, 3.1400, 3.2341])),
418+
('y', numpy.array([4.5187e-01, 1.4956e+01, 4.9145e+02, 1.0522e+04, 1.5299e+05, 1.8468e+06]),
419+
numpy.array([8.5396e+00, 1.2596e+02, 2.2396e+03, 5.2290e+04, 1.3310e+06, 3.7627e+07])),
420+
('both', numpy.array([0.2442, 0.8017, 1.1488, 1.4312, 1.6731, 1.8997]),
421+
numpy.array([5.5107, 13.0148 , 17.232, 20.4285, 23.1035, 25.3843])),
422+
])
423+
def test__fit_ci(plot_data, fitlogs, known_lo, known_hi):
424+
x = numpy.arange(1, len(plot_data)+1)
425+
xhat = x[::6]
426+
yhat_lo, yhat_hi = viz._fit_ci(x, plot_data, xhat, fitlogs=fitlogs, niter=1000)
427+
428+
nptest.assert_allclose(yhat_lo, known_lo, rtol=0.001)
429+
nptest.assert_allclose(yhat_hi, known_hi, rtol=0.001)
430+
431+
396432
@pytest.mark.mpl_image_compare(baseline_dir=BASELINE_DIR, tolerance=10)
397433
def test_probplot_prob(plot_data):
398434
fig, ax = plt.subplots()

probscale/viz.py

+106-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import numpy
1+
import copy
2+
3+
import numpy
24
from matplotlib import pyplot
35

46
from .probscale import _minimal_norm
@@ -387,17 +389,21 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
387389
-------
388390
xhat, yhat : numpy arrays
389391
Linear model estimates of ``x`` and ``y``.
390-
results : a statmodels result object
391-
The object returned by numpy.polyfit
392+
results : dict
393+
Dictionary of linear fit results. Keys include:
392394
393-
"""
395+
- slope
396+
- intersept
397+
- yhat_lo (lower confidence interval of the estimated y-vals)
398+
- yhat_hi (upper confidence interval of the estimated y-vals)
394399
400+
"""
395401
fitprobs = validate.fit_argument(fitprobs, "fitprobs")
396402
fitlogs = validate.fit_argument(fitlogs, "fitlogs")
397403

398404
# maybe set xhat to default values
399405
if xhat is None:
400-
xhat = numpy.array([numpy.min(x), numpy.max(x)])
406+
xhat = copy.copy(x)
401407

402408
# maybe set dist to default value
403409
if dist is None:
@@ -420,23 +426,109 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
420426
if fitlogs in ['y', 'both']:
421427
y = numpy.log(y)
422428

423-
# do the best-fit
424-
coeffs = numpy.polyfit(x, y, 1)
429+
yhat, results = _fit_simple(x, y, xhat, fitlogs=fitlogs)
425430

426-
# estimate y values
427-
yhat = _estimate_from_fit(xhat, coeffs[0], coeffs[1],
428-
xlog=fitlogs in ['x', 'both'],
429-
ylog=fitlogs in ['y', 'both'])
431+
if estimate_ci:
432+
yhat_lo, yhat_hi = _fit_ci(x, y, xhat, fitlogs=fitlogs,
433+
niter=niter, alpha=alpha)
434+
else:
435+
yhat_lo, yhat_hi = None, None
430436

431437
# maybe undo the ppf transform
432438
if fitprobs in ['y', 'both']:
433-
yhat = 100.* dist.cdf(yhat)
439+
yhat = 100. * dist.cdf(yhat)
440+
if yhat_lo is not None:
441+
yhat_lo = 100. * dist.cdf(yhat_lo)
442+
yhat_hi = 100. * dist.cdf(yhat_hi)
434443

435444
# maybe undo ppf transform
436445
if fitprobs in ['x', 'both']:
437-
xhat = 100.* dist.cdf(xhat)
446+
xhat = 100. * dist.cdf(xhat)
447+
448+
results['yhat_lo'] = yhat_lo
449+
results['yhat_hi'] = yhat_hi
450+
451+
return xhat, yhat, results
452+
453+
454+
def _fit_simple(x, y, xhat, fitlogs=None):
455+
"""
456+
Simple linear fit of x and y data using ``numpy.polyfit``.
457+
458+
Parameters
459+
----------
460+
x, y : array-like
461+
fitlogs : str, optional.
462+
Defines which data should be log-transformed. Valid values are
463+
'x', 'y', or 'both'.
464+
465+
Returns
466+
-------
467+
xhat, yhat : array-like
468+
Estimates of x and y based on the linear fit
469+
results : dict
470+
Dictionary of the fit coefficients
471+
472+
See also
473+
--------
474+
numpy.polyfit
475+
476+
"""
477+
478+
# do the best-fit
479+
coeffs = numpy.polyfit(x, y, 1)
480+
481+
results = {
482+
'slope': coeffs[0],
483+
'intercept': coeffs[1]
484+
}
485+
486+
# estimate y values
487+
yhat = _estimate_from_fit(xhat, coeffs[0], coeffs[1],
488+
xlog=fitlogs in ['x', 'both'],
489+
ylog=fitlogs in ['y', 'both'])
490+
491+
return yhat, results
492+
493+
494+
def _fit_ci(x, y, xhat, fitlogs=None, niter=10000, alpha=0.05):
495+
"""
496+
Percentile method bootstrapping of linear fit of x and y data using
497+
``numpy.polyfit``.
498+
499+
Parameters
500+
----------
501+
x, y : array-like
502+
fitlogs : str, optional.
503+
Defines which data should be log-transformed. Valid values are
504+
'x', 'y', or 'both'.
505+
niter : int, optional (default is 10000)
506+
Number of bootstrap iterations to use
507+
alpha : float, optional
508+
Confidence level of the estimate.
509+
510+
Returns
511+
-------
512+
xhat, yhat : array-like
513+
Estimates of x and y based on the linear fit
514+
results : dict
515+
Dictionary of the fit coefficients
516+
517+
See also
518+
--------
519+
numpy.polyfit
520+
521+
"""
522+
523+
index = _make_boot_index(len(x), niter)
524+
yhat_array = numpy.array([
525+
_fit_simple(x[ii], y[ii], xhat, fitlogs=fitlogs)[0]
526+
for ii in index
527+
])
438528

439-
return xhat, yhat, coeffs
529+
percentiles = 100 * numpy.array([alpha*0.5, 1 - alpha*0.5])
530+
yhat_lo, yhat_hi = numpy.percentile(yhat_array, percentiles, axis=0)
531+
return yhat_lo, yhat_hi
440532

441533

442534
def _estimate_from_fit(xdata, slope, intercept, xlog=False, ylog=False):

0 commit comments

Comments
 (0)