|
| 1 | +#' @param ... ignored |
| 2 | +#' |
| 3 | +#' @rdname GOF |
| 4 | +#' @export |
| 5 | +NSE <- function(yobs, ysim, w, ...) { |
| 6 | + if (missing(w)) w <- rep(1, length(yobs)) |
| 7 | + |
| 8 | + ind <- valindex(yobs, ysim) |
| 9 | + w <- w[ind] |
| 10 | + |
| 11 | + y_mean <- sum(yobs[ind] * w) / sum(w) |
| 12 | + # R2: the portion of regression explained variance, also known as |
| 13 | + # coefficient of determination |
| 14 | + |
| 15 | + # SSR <- sum((ysim - y_mean)^2 * w) |
| 16 | + SST <- sum((yobs[ind] - y_mean)^2 * w) |
| 17 | + # R2 <- SSR / SST |
| 18 | + |
| 19 | + RE <- ysim[ind] - yobs[ind] |
| 20 | + # Bias <- sum(w * RE) / sum(w) # bias |
| 21 | + # Bias_perc <- Bias / y_mean # bias percentage |
| 22 | + # MAE <- sum(w * abs(RE)) / sum(w) # mean absolute error |
| 23 | + RMSE <- sqrt(sum(w * (RE)^2) / sum(w)) # root mean sqrt error |
| 24 | + |
| 25 | + NSE <- 1 - sum((RE)^2 * w) / SST # NSE coefficient |
| 26 | + NSE |
| 27 | +} |
| 28 | + |
| 29 | +#' @rdname GOF |
| 30 | +#' @export |
| 31 | +KGE <- function(yobs, ysim, ...) { |
| 32 | + ind <- valindex(yobs, ysim) |
| 33 | + yobs <- yobs[ind] |
| 34 | + ysim <- ysim[ind] |
| 35 | + |
| 36 | + c1 = cor(yobs, ysim) |
| 37 | + c2 = sd(ysim) / sd(yobs) |
| 38 | + c3 = mean(ysim) / mean(yobs) |
| 39 | + |
| 40 | + 1 - sqrt((c1 - 1)^2 + (c2 - 1)^2 + (c3 - 1)^2) |
| 41 | +} |
| 42 | + |
| 43 | +#' GOF |
| 44 | +#' |
| 45 | +#' Good of fitting |
| 46 | +#' |
| 47 | +#' @param yobs Numeric vector, observations |
| 48 | +#' @param ysim Numeric vector, corresponding simulated values |
| 49 | +#' @param w Numeric vector, weights of every points. If w included, when |
| 50 | +#' calculating mean, Bias, MAE, RMSE and NSE, w will be taken into considered. |
| 51 | +#' @param include.cv If true, cv will be included. |
| 52 | +#' @param include.r If true, r and R2 will be included. |
| 53 | +#' |
| 54 | +#' @return |
| 55 | +#' * `RMSE` root mean square error |
| 56 | +#' * `NSE` NASH coefficient |
| 57 | +#' * `MAE` mean absolute error |
| 58 | +#' * `AI` Agreement index (only good points (w == 1)) participate to |
| 59 | +#' calculate. See details in Zhang et al., (2015). |
| 60 | +#' * `Bias` bias |
| 61 | +#' * `Bias_perc` bias percentage |
| 62 | +#' * `n_sim` number of valid obs |
| 63 | +#' * `cv` Coefficient of variation |
| 64 | +#' * `R2` correlation of determination |
| 65 | +#' * `R` pearson correlation |
| 66 | +#' * `pvalue` pvalue of `R` |
| 67 | +#' |
| 68 | +#' @references |
| 69 | +#' 1. https://en.wikipedia.org/wiki/Coefficient_of_determination |
| 70 | +#' 2. https://en.wikipedia.org/wiki/Explained_sum_of_squares |
| 71 | +#' 3. https://en.wikipedia.org/wiki/Nash%E2%80%93Sutcliffe_model_efficiency_coefficient |
| 72 | +#' 4. Zhang Xiaoyang (2015), http://dx.doi.org/10.1016/j.rse.2014.10.012 |
| 73 | +#' |
| 74 | +#' @examples |
| 75 | +#' yobs = rnorm(100) |
| 76 | +#' ysim = yobs + rnorm(100)/4 |
| 77 | +#' GOF(yobs, ysim) |
| 78 | +#' |
| 79 | +#' @importFrom dplyr tibble |
| 80 | +#' @export |
| 81 | +GOF <- function(yobs, ysim, w, include.cv = FALSE, include.r = TRUE){ |
| 82 | + if (missing(w)) w <- rep(1, length(yobs)) |
| 83 | + |
| 84 | + # remove NA_real_ and Inf values in ysim, yobs and w |
| 85 | + valid <- function(x) !is.na(x) & is.finite(x) |
| 86 | + |
| 87 | + I <- which(valid(ysim) & valid(yobs) & valid(w)) |
| 88 | + # n_obs <- length(yobs) |
| 89 | + n_sim <- length(I) |
| 90 | + |
| 91 | + ysim <- ysim[I] |
| 92 | + yobs <- yobs[I] |
| 93 | + w <- w[I] |
| 94 | + |
| 95 | + if (include.cv) { |
| 96 | + CV_obs <- cv_coef(yobs, w) |
| 97 | + CV_sim <- cv_coef(ysim, w) |
| 98 | + } |
| 99 | + if (is_empty(yobs)){ |
| 100 | + out <- c(RMSE = NA_real_, |
| 101 | + KGE = NA_real_, |
| 102 | + NSE = NA_real_, MAE = NA_real_, AI = NA_real_, |
| 103 | + Bias = NA_real_, Bias_perc = NA_real_, n_sim = NA_real_) |
| 104 | + |
| 105 | + if (include.r) out <- c(out, R2 = NA_real_, R = NA_real_, pvalue = NA_real_) |
| 106 | + if (include.cv) out <- c(out, obs = CV_obs, sim = CV_sim) |
| 107 | + return(out) |
| 108 | + } |
| 109 | + |
| 110 | + # R2: the portion of regression explained variance, also known as |
| 111 | + # coefficient of determination |
| 112 | + KGE = KGE(ysim, yobs) |
| 113 | + # https://en.wikipedia.org/wiki/Coefficient_of_determination |
| 114 | + # https://en.wikipedia.org/wiki/Explained_sum_of_squares |
| 115 | + y_mean <- sum(yobs * w) / sum(w) |
| 116 | + |
| 117 | + SSR <- sum( (ysim - y_mean)^2 * w) |
| 118 | + SST <- sum( (yobs - y_mean)^2 * w) |
| 119 | + # R2 <- SSR / SST |
| 120 | + |
| 121 | + RE <- ysim - yobs |
| 122 | + Bias <- sum ( w*RE) /sum(w) # bias |
| 123 | + Bias_perc <- Bias/y_mean # bias percentage |
| 124 | + MAE <- sum ( w*abs(RE))/sum(w) # mean absolute error |
| 125 | + RMSE <- sqrt( sum(w*(RE)^2)/sum(w) ) # root mean sqrt error |
| 126 | + |
| 127 | + # https://en.wikipedia.org/wiki/Nash%E2%80%93Sutcliffe_model_efficiency_coefficient |
| 128 | + NSE <- 1 - sum( (RE)^2 * w) / SST # NSE coefficient |
| 129 | + |
| 130 | + # Observations number are not same, so comparing correlation coefficient |
| 131 | + # was meaningless. |
| 132 | + # In the current, I have no idea how to add weights `R`. |
| 133 | + if (include.r){ |
| 134 | + R <- NA_real_ |
| 135 | + pvalue <- NA_real_ |
| 136 | + |
| 137 | + tryCatch({ |
| 138 | + cor.obj <- cor.test(yobs, ysim, use = "complete.obs") |
| 139 | + R <- cor.obj$estimate[[1]] |
| 140 | + pvalue <- cor.obj$p.value |
| 141 | + }, error = function(e){ |
| 142 | + message(e$message) |
| 143 | + }) |
| 144 | + R2 = R^2 |
| 145 | + } |
| 146 | + # In Linear regression, R2 = R^2 (R is pearson cor) |
| 147 | + # R2 <- summary(lm(ysim ~ yobs))$r.squared # low efficient |
| 148 | + |
| 149 | + # AI: Agreement Index (only good values(w==1) calculate AI) |
| 150 | + AI <- NA_real_ |
| 151 | + I2 <- which(w == 1) |
| 152 | + if (length(I2) >= 2) { |
| 153 | + yobs = yobs[I2] |
| 154 | + ysim = ysim[I2] |
| 155 | + y_mean = mean(yobs) |
| 156 | + AI = 1 - sum( (ysim - yobs)^2 ) / sum( (abs(ysim - y_mean) + abs(yobs - y_mean))^2 ) |
| 157 | + } |
| 158 | + |
| 159 | + out <- tibble(R, pvalue, R2, NSE, KGE, RMSE, MAE, |
| 160 | + Bias, Bias_perc, AI = AI, n_sim = n_sim) |
| 161 | + if (include.cv) out <- cbind(out, CV_obs, CV_sim) |
| 162 | + return(out) |
| 163 | +} |
| 164 | + |
| 165 | +#' skewness and kurtosis |
| 166 | +#' |
| 167 | +#' Inherit from package `e1071` |
| 168 | +#' @param x a numeric vector containing the values whose skewness is to be |
| 169 | +#' computed. |
| 170 | +#' @param na.rm a logical value indicating whether NA values should be stripped |
| 171 | +#' before the computation proceeds. |
| 172 | +#' @param type an integer between 1 and 3 selecting one of the algorithms for |
| 173 | +#' computing skewness. |
| 174 | +#' |
| 175 | +#' @examples |
| 176 | +#' x <- rnorm(100) |
| 177 | +#' coef_kurtosis <- kurtosis(x) |
| 178 | +#' coef_skewness <- skewness(x) |
| 179 | +#' @keywords internal |
| 180 | +#' @export |
| 181 | +kurtosis <- function(x, na.rm = FALSE, type = 3) { |
| 182 | + if (any(ina <- is.na(x))) { |
| 183 | + if (na.rm) { |
| 184 | + x <- x[!ina] |
| 185 | + } else { |
| 186 | + return(NA_real_) |
| 187 | + } |
| 188 | + } |
| 189 | + if (!(type %in% (1:3))) stop("Invalid 'type' argument.") |
| 190 | + n <- length(x) |
| 191 | + x <- x - mean(x) |
| 192 | + r <- n * sum(x^4) / (sum(x^2)^2) |
| 193 | + |
| 194 | + y <- if (type == 1) { |
| 195 | + r - 3 |
| 196 | + } else if (type == 2) { |
| 197 | + if (n < 4) { |
| 198 | + warning("Need at least 4 complete observations.") |
| 199 | + return(NA_real_) |
| 200 | + } |
| 201 | + ((n + 1) * (r - 3) + 6) * (n - 1) / ((n - 2) * (n - 3)) |
| 202 | + } else { |
| 203 | + r * (1 - 1 / n)^2 - 3 |
| 204 | + } |
| 205 | + y |
| 206 | +} |
| 207 | + |
| 208 | +#' @rdname kurtosis |
| 209 | +#' @export |
| 210 | +skewness <- function(x, na.rm = FALSE, type = 3) { |
| 211 | + if (any(ina <- is.na(x))) { |
| 212 | + if (na.rm) { |
| 213 | + x <- x[!ina] |
| 214 | + } else { |
| 215 | + return(NA_real_) |
| 216 | + } |
| 217 | + } |
| 218 | + if (!(type %in% (1:3))) stop("Invalid 'type' argument.") |
| 219 | + n <- length(x) |
| 220 | + x <- x - mean(x) |
| 221 | + y <- sqrt(n) * sum(x^3) / (sum(x^2)^(3 / 2)) |
| 222 | + |
| 223 | + if (type == 2) { |
| 224 | + if (n < 3) { |
| 225 | + warning("Need at least 3 complete observations.") |
| 226 | + return(NA_real_) |
| 227 | + } |
| 228 | + y <- y * sqrt(n * (n - 1)) / (n - 2) |
| 229 | + } else if (type == 3) { |
| 230 | + y <- y * ((1 - 1 / n))^(3 / 2) |
| 231 | + } |
| 232 | + y |
| 233 | +} |
| 234 | + |
| 235 | +#' weighted CV |
| 236 | +#' @param x Numeric vector |
| 237 | +#' @param w weights of different point |
| 238 | +#' |
| 239 | +#' @return Named numeric vector, (mean, sd, cv). |
| 240 | +#' @examples |
| 241 | +#' x <- rnorm(100) |
| 242 | +#' coefs <- cv_coef(x) |
| 243 | +#' @keywords internal |
| 244 | +#' @export |
| 245 | +cv_coef <- function(x, w) { |
| 246 | + if (missing(w)) w <- rep(1, length(x)) |
| 247 | + if (length(x) == 0) { |
| 248 | + return(c(mean = NA_real_, sd = NA_real_, cv = NA_real_)) |
| 249 | + } |
| 250 | + # rm NA_real_ |
| 251 | + I <- is.finite(x) |
| 252 | + x <- x[I] |
| 253 | + w <- w[I] |
| 254 | + |
| 255 | + mean <- sum(x * w) / sum(w) |
| 256 | + sd <- sqrt(sum((x - mean)^2 * w) / sum(w)) |
| 257 | + cv <- sd / mean |
| 258 | + c(mean = mean, sd = sd, cv = cv) # quickly return |
| 259 | +} |
| 260 | + |
| 261 | +#' Critical value of determined correlation |
| 262 | +#' |
| 263 | +#' @param n length of observation. |
| 264 | +#' @param NumberOfPredictor Number of predictor, including constant. |
| 265 | +#' @param alpha significant level. |
| 266 | +#' |
| 267 | +#' @return `F` statistic and `R2` at significant level. |
| 268 | +#' |
| 269 | +#' @keywords internal |
| 270 | +#' @references |
| 271 | +#' Chen Yanguang (2012), Geographical Data analysis with MATLAB. |
| 272 | +#' @examples |
| 273 | +#' R2_critical <- R2_sign(30, NumberOfPredictor = 2, alpha = 0.05) |
| 274 | +#' @export |
| 275 | +R2_sign <- function(n, NumberOfPredictor = 2, alpha = 0.05) { |
| 276 | + freedom_r <- NumberOfPredictor - 1 # regression |
| 277 | + freedom_e <- n - NumberOfPredictor # error |
| 278 | + |
| 279 | + F <- qf(1 - alpha, freedom_r, freedom_e) |
| 280 | + R2 <- 1 - 1 / (1 + F * freedom_r / freedom_e) |
| 281 | + |
| 282 | + # F = 485.1 |
| 283 | + # F = R2/freedom_r/((1-R2)/freedom_e) |
| 284 | + # Rc = sqrt(/(qf(1 - alpha, 1, freedom) + freedom)) %TRUE>% print # 0.11215 |
| 285 | + return(list(F = F, R2 = R2)) |
| 286 | +} |
0 commit comments