1
- import numpy
1
+ import copy
2
+
3
+ import numpy
2
4
from matplotlib import pyplot
3
5
4
6
from .probscale import _minimal_norm
@@ -387,17 +389,21 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
387
389
-------
388
390
xhat, yhat : numpy arrays
389
391
Linear model estimates of ``x`` and ``y``.
390
- results : a statmodels result object
391
- The object returned by numpy.polyfit
392
+ results : dict
393
+ Dictionary of linear fit results. Keys include:
392
394
393
- """
395
+ - slope
396
+ - intersept
397
+ - yhat_lo (lower confidence interval of the estimated y-vals)
398
+ - yhat_hi (upper confidence interval of the estimated y-vals)
394
399
400
+ """
395
401
fitprobs = validate .fit_argument (fitprobs , "fitprobs" )
396
402
fitlogs = validate .fit_argument (fitlogs , "fitlogs" )
397
403
398
404
# maybe set xhat to default values
399
405
if xhat is None :
400
- xhat = numpy . array ([ numpy . min ( x ), numpy . max ( x )] )
406
+ xhat = copy . copy ( x )
401
407
402
408
# maybe set dist to default value
403
409
if dist is None :
@@ -420,23 +426,109 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
420
426
if fitlogs in ['y' , 'both' ]:
421
427
y = numpy .log (y )
422
428
423
- # do the best-fit
424
- coeffs = numpy .polyfit (x , y , 1 )
429
+ yhat , results = _fit_simple (x , y , xhat , fitlogs = fitlogs )
425
430
426
- # estimate y values
427
- yhat = _estimate_from_fit (xhat , coeffs [0 ], coeffs [1 ],
428
- xlog = fitlogs in ['x' , 'both' ],
429
- ylog = fitlogs in ['y' , 'both' ])
431
+ if estimate_ci :
432
+ yhat_lo , yhat_hi = _fit_ci (x , y , xhat , fitlogs = fitlogs ,
433
+ niter = niter , alpha = alpha )
434
+ else :
435
+ yhat_lo , yhat_hi = None , None
430
436
431
437
# maybe undo the ppf transform
432
438
if fitprobs in ['y' , 'both' ]:
433
- yhat = 100. * dist .cdf (yhat )
439
+ yhat = 100. * dist .cdf (yhat )
440
+ if yhat_lo is not None :
441
+ yhat_lo = 100. * dist .cdf (yhat_lo )
442
+ yhat_hi = 100. * dist .cdf (yhat_hi )
434
443
435
444
# maybe undo ppf transform
436
445
if fitprobs in ['x' , 'both' ]:
437
- xhat = 100. * dist .cdf (xhat )
446
+ xhat = 100. * dist .cdf (xhat )
447
+
448
+ results ['yhat_lo' ] = yhat_lo
449
+ results ['yhat_hi' ] = yhat_hi
450
+
451
+ return xhat , yhat , results
452
+
453
+
454
+ def _fit_simple (x , y , xhat , fitlogs = None ):
455
+ """
456
+ Simple linear fit of x and y data using ``numpy.polyfit``.
457
+
458
+ Parameters
459
+ ----------
460
+ x, y : array-like
461
+ fitlogs : str, optional.
462
+ Defines which data should be log-transformed. Valid values are
463
+ 'x', 'y', or 'both'.
464
+
465
+ Returns
466
+ -------
467
+ xhat, yhat : array-like
468
+ Estimates of x and y based on the linear fit
469
+ results : dict
470
+ Dictionary of the fit coefficients
471
+
472
+ See also
473
+ --------
474
+ numpy.polyfit
475
+
476
+ """
477
+
478
+ # do the best-fit
479
+ coeffs = numpy .polyfit (x , y , 1 )
480
+
481
+ results = {
482
+ 'slope' : coeffs [0 ],
483
+ 'intercept' : coeffs [1 ]
484
+ }
485
+
486
+ # estimate y values
487
+ yhat = _estimate_from_fit (xhat , coeffs [0 ], coeffs [1 ],
488
+ xlog = fitlogs in ['x' , 'both' ],
489
+ ylog = fitlogs in ['y' , 'both' ])
490
+
491
+ return yhat , results
492
+
493
+
494
+ def _fit_ci (x , y , xhat , fitlogs = None , niter = 10000 , alpha = 0.05 ):
495
+ """
496
+ Percentile method bootstrapping of linear fit of x and y data using
497
+ ``numpy.polyfit``.
498
+
499
+ Parameters
500
+ ----------
501
+ x, y : array-like
502
+ fitlogs : str, optional.
503
+ Defines which data should be log-transformed. Valid values are
504
+ 'x', 'y', or 'both'.
505
+ niter : int, optional (default is 10000)
506
+ Number of bootstrap iterations to use
507
+ alpha : float, optional
508
+ Confidence level of the estimate.
509
+
510
+ Returns
511
+ -------
512
+ xhat, yhat : array-like
513
+ Estimates of x and y based on the linear fit
514
+ results : dict
515
+ Dictionary of the fit coefficients
516
+
517
+ See also
518
+ --------
519
+ numpy.polyfit
520
+
521
+ """
522
+
523
+ index = _make_boot_index (len (x ), niter )
524
+ yhat_array = numpy .array ([
525
+ _fit_simple (x [ii ], y [ii ], xhat , fitlogs = fitlogs )[0 ]
526
+ for ii in index
527
+ ])
438
528
439
- return xhat , yhat , coeffs
529
+ percentiles = 100 * numpy .array ([alpha * 0.5 , 1 - alpha * 0.5 ])
530
+ yhat_lo , yhat_hi = numpy .percentile (yhat_array , percentiles , axis = 0 )
531
+ return yhat_lo , yhat_hi
440
532
441
533
442
534
def _estimate_from_fit (xdata , slope , intercept , xlog = False , ylog = False ):
0 commit comments