mydata = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_precip.dat"),
ncol = 4, byrow = T)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = as.data.frame(cbind(lat, long, elevation)) #predictor variables
library(MPV)
library(leaps)
library(MASS)
library(akima)
library(fields)
library(fitdistrplus)
library(sm)
library(locfit)
library(stats)
library(LatticeKrig)
# functions to produce boxplots with whiskers at 5th and 95th percentile
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot-stats.r")
thebest = function(X, Y, deg, family) {
nvar = length(X[1, ])
N = length(precip)
minalpha = 2 * (nvar * deg + 1)/N
alpha_grid = seq(minalpha, 1, by = 0.05)
zz = gcvplot(precip ~ X, alpha = alpha_grid, deg = deg, kern = "bisq", ev = dat(),
scale = TRUE, family = family)
mygcv = sort(zz$values)[1]
# return(mygcv)
alpha = alpha_grid
# pick the best alpha from the best family that gives the least GCV
z6 = order(zz$values)[1]
bestalpha = alpha[z6]
# return(bestalpha)
myresult = list(gcv = mygcv, bestalpha = bestalpha, family = family)
}
(a) Fit a best linear regression model
# Find the best model using PRESS
# all combinations
combs = leaps(X, precip)
combos = combs$which
ncombos = length(combos[, 1])
xpress = 1:ncombos
for (i in 1:ncombos) {
zz = lm(precip ~ as.matrix(X[, combos[i, ]]))
xpress[i] = PRESS(zz)
}
# best model based on PRESS
zc = order(xpress)[1]
# indicates which variables are present in the best model
combos[zc, ]
## 1 2 3
## FALSE TRUE FALSE
# best model omits lat and elevation
# Compare with stepAIC
zz = lm(precip ~ elevation + lat + long)
bestmodel = stepAIC(zz, trace = FALSE)
(b) Perform ANOVA and model Diagnostics
n = length(precip)
par(mfrow = c(2, 2))
# Model Diagnostics: Check normality, autocorrelation and homoskedasticity
# of residuals
error = resid(bestmodel)
Y_hat = precip - error
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
# nonparametric PDF
sm.density(error, add = T, col = "blue")
legend("topright", legend = c("mean error", "non par"), lty = 1, lwd = c(1,
1), col = c("red", "blue"))
# Homoskedastic: residuals versus y_hat
plot(Y_hat, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
# ANOVA
bestmodel$anova
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## precip ~ elevation + lat + long
##
## Final Model:
## precip ~ long
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 487 26328826 5355
## 2 - elevation 1 16565 488 26345391 5353
## 3 - lat 1 24373 489 26369763 5352
# p value= 2.8 X 10^-9 Regression is sifnficant if p-value less than alpha
# regression is significant
c) Compute cross-validated and fitted estimates at each observation points and plot them against the observed values.
# get the cross validated estimates..
n = length(precip)
y_hat = 1:n
index = 1:n
for (i in 1:n) {
index1 = index[index != i]
Xval = long[index1]
Yval = precip[index1]
zz = lsfit(Xval, Yval)
# now estimate at the point that was dropped
xpred = c(1, long[i])
y_hat[i] = sum(zz$coef * xpred)
}
par(mfrow = c(1, 1))
plot(precip, y_hat, ylab = "estimated values")
(d) Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
n = length(precip)
nsim = 500
# initialize vectors
rmse = 1:nsim
rsq = 1:nsim
index = 1:n
N10 = round(0.1 * n)
for (i in 1:nsim) {
drop = sample(c(1:n), N10)
keep = setdiff(index, drop)
xx = long[keep]
y = precip[keep]
zz = lm(y ~ xx)
xx = long[drop]
yhat = predict(zz, newdata = as.data.frame(xx), type = "response")
resid = precip[drop] - yhat
rmse[i] = sqrt(mean((precip[drop] - yhat)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yhat)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted\nvalues of the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted\nvalues of the 10% dropped")
(e) Estimate the precipitation and the standard errors on the DEM grid and show them as 3-D plots
## Now we wish to estimate/predict at the points on the DEM
mydata = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_precip.dat"),
ncol = 4, byrow = T)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
zz = lm(precip ~ elevation + lat + long)
z1 = stepAIC(zz)
## Start: AIC=5355
## precip ~ elevation + lat + long
##
## Df Sum of Sq RSS AIC
## - elevation 1 16565 26345391 5353
## - lat 1 37323 26366148 5354
## <none> 26328826 5355
## - long 1 1843395 28172221 5386
##
## Step: AIC=5353
## precip ~ lat + long
##
## Df Sum of Sq RSS AIC
## - lat 1 24373 26369763 5352
## <none> 26345391 5353
## - long 1 1996901 28342292 5387
##
## Step: AIC=5352
## precip ~ long
##
## Df Sum of Sq RSS AIC
## <none> 26369763 5352
## - long 1 1977078 28346841 5385
predpoints = read.table("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat",
sep = "")
Xpred = cbind(predpoints[, 1], predpoints[, 2], predpoints[, 3])
X = as.data.frame(Xpred)
names(X) = c("lat", "long", "elevation")
ypred = predict.lm(z1, newdata = X, type = "response", se.fit = TRUE)
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
par(mfrow = c(1, 2))
ypred.mat = matrix(ypred$fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Precipitation")
contour(long1, lat1, t(ypred.mat), add = T)
ypred.mat = matrix(ypred$se.fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Error")
contour(long1, lat1, t(ypred.mat), add = T)
(a)Fit a best generalized linear model
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = as.matrix(cbind(lat, long, elevation)) #predictor variables
# To fit glm, need to know what family to chose
xeval = seq(max(0, min(precip) - sd(precip)), max(precip) + sd(precip), length = length(precip))
# fit gamma and weibull using fitdistrplus
zgamma = fitdist(precip, "gamma")
options(warn = -1)
zweibull = fitdist(precip, "weibull")
options(warn = -1)
# fit other distributions
fitnorm = dnorm(xeval, mean = mean(precip), sd = sd(precip))
fitlnorm = dlnorm(xeval, meanlog = mean(log(precip)), sdlog = sd(log(precip)))
fitexp = dexp(xeval, rate = (1/mean(precip)))
par(mfrow = c(1, 1))
# plot hist and overlay
hist(precip, probability = TRUE, ylim = c(0, 0.005))
lines(xeval, fitlnorm, col = "orange")
lines(xeval, fitnorm, col = "red")
lines(xeval, fitexp, col = "green")
lines(xeval, dgamma(xeval, shape = zgamma$estimate[1], scale = 1/zgamma$estimate[2]),
col = "blue")
lines(xeval, dweibull(xeval, shape = zweibull$estimate[1], scale = zweibull$estimate[2]),
col = "purple")
legend("topright", legend = c("lognormal", "normal", "exp", "gamma", "weibull"),
lty = 1, lwd = 1, col = c("orange", "red", "green", "blue", "purple"))
# Fit model with gamma family; find best link function and model using AIC
X = as.data.frame(X)
zz = glm(precip ~ ., data = X, family = Gamma(link = "inverse"))
bestmodel = stepAIC(zz, trace = F)
X = as.data.frame(X)
zz = glm(precip ~ ., data = X, family = Gamma(link = "log"))
bestmodel = stepAIC(zz, trace = F)
X = as.data.frame(X)
zz = glm(precip ~ ., data = X, family = Gamma(link = "identity"))
bestmodel = stepAIC(zz, trace = F)
# identity is the best link function; it has the lowest AIC
(b) Perform ANOVA and model Diagnostics
n = length(precip)
par(mfrow = c(2, 2))
# Model Diagnostics: Check normality, autocorrelation and homoskedasticity
# of residuals
error = resid(bestmodel)
Y_hat = precip - error
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
# nonparametric PDF
sm.density(error, add = T, col = "blue")
legend("topright", legend = c("mean error", "non par"), lty = 1, lwd = c(1,
1), col = c("red", "blue"))
# Homoskedastic: residuals versus y_hat
plot(Y_hat, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
# ANOVA
summary(bestmodel)
##
## Call:
## glm(formula = precip ~ long, family = Gamma(link = "identity"),
## data = X)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9874 -0.3208 -0.0687 0.2125 1.3466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3888.40 554.64 -7.01 7.9e-12 ***
## long -41.85 5.27 -7.94 1.4e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1633)
##
## Null deviance: 87.027 on 490 degrees of freedom
## Residual deviance: 78.531 on 489 degrees of freedom
## AIC: 6596
##
## Number of Fisher Scoring iterations: 6
c) Compute cross-validated and fitted estimates at each observation points and plot them against the observed values.
# get the cross validated estimates..
n = length(precip)
yhat = 1:n
index = 1:n
for (i in 1:n) {
index1 = index[index != i]
Xval = long[index1]
Yval = precip[index1]
zz = glm(Yval ~ Xval, family = Gamma(link = "identity"))
# now estimate at the point that was dropped
xpred = c(1, long[i])
yhat[i] = sum(zz$coef * xpred)
}
par(mfrow = c(2, 1))
plot(precip, yhat, ylab = "cross validated estimates")
plot(precip, Y_hat, ylab = "fitted estimates")
(d) Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
n = length(precip)
nsim = 500
# initialize vectors
rmse = 1:nsim
rsq = 1:nsim
index = 1:n
N10 = round(0.1 * n)
for (i in 1:nsim) {
drop = sample(c(1:n), N10)
keep = setdiff(index, drop)
xx = long[keep]
y = precip[keep]
xx = as.data.frame(xx)
zz = glm(y ~ ., data = xx, family = Gamma(link = "identity"))
xx = long[drop]
yhat = predict(zz, newdata = as.data.frame(xx), type = "response")
resid = precip[drop] - yhat
rmse[i] = sqrt(mean((precip[drop] - yhat)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yhat)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
(e) Estimate the precipitation and the standard errors on the DEM grid and show them as 3-D plots as in (v) above
X = as.data.frame(X)
zz = glm(precip ~ ., data = X, family = Gamma(link = "log"))
bestmodel = stepAIC(zz)
## Start: AIC=6604
## precip ~ lat + long + elevation
##
## Df Deviance AIC
## - elevation 1 79.1 6602
## - lat 1 79.2 6603
## <none> 79.0 6604
## - long 1 86.4 6646
##
## Step: AIC=6602
## precip ~ lat + long
##
## Df Deviance AIC
## - lat 1 79.2 6601
## <none> 79.1 6602
## - long 1 87.0 6648
##
## Step: AIC=6601
## precip ~ long
##
## Df Deviance AIC
## <none> 79.2 6601
## - long 1 87.0 6646
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
ypred = predict.glm(bestmodel, newdata = X, type = "response", se.fit = TRUE)
par(mfrow = c(1, 2))
ypred.mat = matrix(ypred$fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat))
contour(long1, lat1, t(ypred.mat), add = T)
ypred.mat = matrix(ypred$se.fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat))
contour(long1, lat1, t(ypred.mat), add = T)
data
Xf = matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), nrow = 10, ncol = 1, byrow = T)
Yf = matrix(c(-1.4, 2.45, 1.45, 5.38, 5.6, 5.99, 8.1, 7.54, 8.24, 10.8), nrow = 10,
ncol = 1, byrow = T)
N = length(Yf)
alpha = 0.5
K = round(alpha * N)
L = matrix(0, ncol = N, nrow = N)
yest = as.numeric(1:N)
(i) Evaluate the L matrix;
# by formula
for (i in 1:N) {
# the point at which you want the estimation.
xp = Xf[i, ]
xx = rbind(xp, Xf)
xx = as.matrix(dist(xx))
nxx = length(xx[1, ])
xdist = xx[1, 2:nxx]
xorder = order(xdist)
kdist = xdist[xorder[K]] #distance to Kth nearest neighbor
kdist = xdist[xdist >= kdist] #make all distances greater than distk
# get the weights
weights = 15 * ((1 - ((xdist/kdist) * (xdist/kdist)))^2)/16
weights = weights/sum(weights)
# create a diagonal matrix with these weights..
W = diag(weights) #points outside of the K neighbors gets a weight of 0
# add the first column to be 1 to get
ax = t(t(Xf) - xp)
xk = cbind(rep(1, N), ax)
yk = Yf
evect = rep(0, length(Xf[1]) + 1) #unit vector
evect[1] = 1
L[i, ] = evect %*% (solve(t(xk) %*% W %*% xk)) %*% (t(xk) %*% W)
yest[i] = sum(L[, i] * Yf)
}
L
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 0.4114 0.3171 0.2187 0.1396 0.08000 0.03582 0.01196 -0.05495
## [2,] 0.2698 0.2900 0.2198 0.1478 0.09034 0.04729 0.10301 -0.02952
## [3,] 0.0000 0.1566 0.2385 0.1745 0.11197 0.06683 0.03579 1.07571
## [4,] 0.0000 0.0000 0.1919 0.2903 0.21044 0.13307 0.07747 0.05256
## [5,] 0.0000 0.0000 0.0000 0.2526 0.37345 0.26184 0.15691 0.02811
## [6,] 0.0000 0.0000 0.0000 0.0000 0.29046 0.39678 0.24363 0.11120
## [7,] 0.0000 0.0000 0.0000 0.0000 0.00000 0.31579 0.40352 0.23219
## [8,] 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 0.32206 0.40484
## [9,] 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 0.09296 0.20246
## [10,] 0.0000 0.0000 0.0000 0.0000 0.00000 0.00000 -0.17982 0.07674
## [,9] [,10]
## [1,] -0.076772 -0.08282
## [2,] -0.064856 -0.07372
## [3,] -0.054998 -0.80487
## [4,] 0.993022 -0.94874
## [5,] 0.043146 -0.11601
## [6,] 0.007276 -0.04935
## [7,] 0.061929 -0.01342
## [8,] 0.224129 0.04897
## [9,] 0.316201 0.38838
## [10,] 0.385964 0.71711
# compare with locfit
N = length(Yf)
L1 = matrix(0, nrow = N, ncol = N)
X = as.matrix(Xf)
for (i in 1:N) {
L1[i, ] = locfit(Yf ~ Xf, alpha = 0.5, deg = 1, geth = 1, kern = "bisq",
scale = TRUE, ev = Xf[i, ])
}
L1
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 0.7919 0.3261 -0.02797 -0.09007 0.0000 0.0000 0.00000 0.00000
## [2,] 0.3499 0.3632 0.22402 0.06293 0.0000 0.0000 0.00000 0.00000
## [3,] 0.0000 0.2647 0.47059 0.26471 0.0000 0.0000 0.00000 0.00000
## [4,] 0.0000 0.0000 0.26471 0.47059 0.2647 0.0000 0.00000 0.00000
## [5,] 0.0000 0.0000 0.00000 0.26471 0.4706 0.2647 0.00000 0.00000
## [6,] 0.0000 0.0000 0.00000 0.00000 0.2647 0.4706 0.26471 0.00000
## [7,] 0.0000 0.0000 0.00000 0.00000 0.0000 0.2647 0.47059 0.26471
## [8,] 0.0000 0.0000 0.00000 0.00000 0.0000 0.0000 0.26471 0.47059
## [9,] 0.0000 0.0000 0.00000 0.00000 0.0000 0.0000 0.06293 0.22402
## [10,] 0.0000 0.0000 0.00000 0.00000 0.0000 0.0000 -0.09007 -0.02797
## [,9] [,10]
## [1,] 0.0000 0.0000
## [2,] 0.0000 0.0000
## [3,] 0.0000 0.0000
## [4,] 0.0000 0.0000
## [5,] 0.0000 0.0000
## [6,] 0.0000 0.0000
## [7,] 0.0000 0.0000
## [8,] 0.2647 0.0000
## [9,] 0.3632 0.3499
## [10,] 0.3261 0.7919
(ii) Evaluate the GCV
# using locfit
model = gcvplot(Yf ~ Xf, alpha = 0.5, deg = 1, kern = "bisq", ev = dat(), scale = TRUE)
sort(model$values)[1]
## [1] 2.75
(iii) Estimate the value of Y at each X and also the 95% confidence interval (plot the fit and the confidence interval). Check your results with the output from LOCFIT package - they both should match.
nu1 = sum(diag(L))
nu2 = sum(diag((t(L) %*% L)))
RSS = sum((Yf - yest)^2)
sigmae = RSS/(N - 2 * nu1 + nu2) #error variance
stderror = sqrt((diag(L %*% t(L)))) * sqrt(sigmae)
par(mfrow = c(1, 2))
cl = qnorm(0.975) #95% confidence level
## confidence intervals are
lowerc = yest - cl * stderror
upperc = yest + cl * stderror
plot(Xf, Yf, xlab = "X values", ylab = "Y values", main = "output from formula 95% CI interval")
# intervals
lines(lowerc, lty = "dashed", col = "red")
lines(upperc, lty = "dashed", col = "blue")
# Compare with locfit
model = locfit(Yf ~ Xf, alpha = 0.5, deg = 1, kern = "bisq", ev = dat(), scale = TRUE)
zp = predict(model, se.fit = T)
lowerc = zp$fit - cl * zp$se
upperc = zp$fit + cl * zp$se
plot(Xf, Yf, xlab = "X values", ylab = "Y values", main = "output from model 95% CI interval")
# intervals
lines(lowerc, lty = "dashed", col = "red")
lines(upperc, lty = "dashed", col = "blue")
(iv) As another check if you fix alpha = 1 and all the weights to be same (you can make them all equal to 1.0), the results from your local polynomial code should be identical to that from linear regression.
# redefine L
N = length(Yf)
L1 = matrix(0, nrow = N, ncol = N)
Xf = as.matrix(Xf)
for (i in 1:N) {
L1[i, ] = locfit(Yf ~ Xf, alpha = 1, deg = 1, geth = 1, kern = "bisq", scale = TRUE,
ev = Xf[i, ])
}
L1
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.4857 0.3593134 0.226838 0.10563 0.010528 -0.048269 -0.06738
## [2,] 0.3511 0.2921097 0.214970 0.13323 0.060062 0.006224 -0.02198
## [3,] 0.2259 0.2232220 0.197389 0.15561 0.107051 0.061106 0.02561
## [4,] 0.1146 0.1521096 0.171296 0.16993 0.149953 0.116428 0.07654
## [5,] 0.0243 0.0768077 0.132313 0.17282 0.187519 0.172817 0.13231
## [6,] 0.0000 0.0243024 0.076808 0.13231 0.172817 0.187519 0.17282
## [7,] 0.0000 0.0105953 0.038510 0.07654 0.116428 0.149953 0.16993
## [8,] 0.0000 -0.0009851 0.005094 0.02561 0.061106 0.107051 0.15561
## [9,] 0.0000 -0.0109709 -0.024779 -0.02198 0.006224 0.060062 0.13323
## [10,] 0.0000 -0.0199534 -0.052387 -0.06738 -0.048269 0.010528 0.10563
## [,8] [,9] [,10]
## [1,] -0.052387 -0.0199534 0.0000
## [2,] -0.024779 -0.0109709 0.0000
## [3,] 0.005094 -0.0009851 0.0000
## [4,] 0.038510 0.0105953 0.0000
## [5,] 0.076808 0.0243024 0.0000
## [6,] 0.132313 0.0768077 0.0243
## [7,] 0.171296 0.1521096 0.1146
## [8,] 0.197389 0.2232220 0.2259
## [9,] 0.214970 0.2921097 0.3511
## [10,] 0.226838 0.3593134 0.4857
# compare to hat matrix from linear regression
XX = cbind(rep(1, N), X)
# hat= X
hatmat = XX %*% solve(t(XX) %*% XX) %*% t(XX)
hatmat
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.34545 0.290909 0.236364 0.18182 0.12727 0.07273 0.01818
## [2,] 0.29091 0.248485 0.206061 0.16364 0.12121 0.07879 0.03636
## [3,] 0.23636 0.206061 0.175758 0.14545 0.11515 0.08485 0.05455
## [4,] 0.18182 0.163636 0.145455 0.12727 0.10909 0.09091 0.07273
## [5,] 0.12727 0.121212 0.115152 0.10909 0.10303 0.09697 0.09091
## [6,] 0.07273 0.078788 0.084848 0.09091 0.09697 0.10303 0.10909
## [7,] 0.01818 0.036364 0.054545 0.07273 0.09091 0.10909 0.12727
## [8,] -0.03636 -0.006061 0.024242 0.05455 0.08485 0.11515 0.14545
## [9,] -0.09091 -0.048485 -0.006061 0.03636 0.07879 0.12121 0.16364
## [10,] -0.14545 -0.090909 -0.036364 0.01818 0.07273 0.12727 0.18182
## [,8] [,9] [,10]
## [1,] -0.036364 -0.090909 -0.14545
## [2,] -0.006061 -0.048485 -0.09091
## [3,] 0.024242 -0.006061 -0.03636
## [4,] 0.054545 0.036364 0.01818
## [5,] 0.084848 0.078788 0.07273
## [6,] 0.115152 0.121212 0.12727
## [7,] 0.145455 0.163636 0.18182
## [8,] 0.175758 0.206061 0.23636
## [9,] 0.206061 0.248485 0.29091
## [10,] 0.236364 0.290909 0.34545
(v) Replace the last value 10.8 with a high value to create an outlier, compute the L matrix (with alpha = 0.9) and Hat matrix from linear regression - compare the weights and comment.
Yf[10] = 50
N = length(Yf)
L1 = matrix(0, nrow = N, ncol = N)
Xf = as.matrix(Xf)
for (i in 1:N) {
L1[i, ] = locfit(Yf ~ Xf, alpha = 0.9, deg = 1, geth = 1, kern = "bisq",
scale = TRUE, ev = X[i, ])
}
L1
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.52767 0.37256 0.212079 0.07246 -0.025353 -0.069908 -0.06344
## [2,] 0.36509 0.30124 0.213038 0.12042 0.042349 -0.007111 -0.02277
## [3,] 0.21549 0.22581 0.205077 0.16187 0.108584 0.058276 0.02154
## [4,] 0.08621 0.14435 0.183102 0.19278 0.172225 0.127696 0.07171
## [5,] 0.00000 0.04487 0.131868 0.20604 0.234432 0.206044 0.13187
## [6,] 0.00000 0.00000 0.044872 0.13187 0.206044 0.234432 0.20604
## [7,] 0.00000 0.00000 0.021925 0.07171 0.127696 0.172225 0.19278
## [8,] 0.00000 0.00000 0.003357 0.02154 0.058276 0.108584 0.16187
## [9,] 0.00000 0.00000 -0.012257 -0.02277 -0.007111 0.042349 0.12042
## [10,] 0.00000 0.00000 -0.026072 -0.06344 -0.069908 -0.025353 0.07246
## [,8] [,9] [,10]
## [1,] -0.026072 0.00000 0.00000
## [2,] -0.012257 0.00000 0.00000
## [3,] 0.003357 0.00000 0.00000
## [4,] 0.021925 0.00000 0.00000
## [5,] 0.044872 0.00000 0.00000
## [6,] 0.131868 0.04487 0.00000
## [7,] 0.183102 0.14435 0.08621
## [8,] 0.205077 0.22581 0.21549
## [9,] 0.213038 0.30124 0.36509
## [10,] 0.212079 0.37256 0.52767
# compare to hat matrix from linear regression
XX = cbind(rep(1, N), X)
# hat= X
hatmat = XX %*% solve(t(XX) %*% XX) %*% t(XX)
hatmat
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.34545 0.290909 0.236364 0.18182 0.12727 0.07273 0.01818
## [2,] 0.29091 0.248485 0.206061 0.16364 0.12121 0.07879 0.03636
## [3,] 0.23636 0.206061 0.175758 0.14545 0.11515 0.08485 0.05455
## [4,] 0.18182 0.163636 0.145455 0.12727 0.10909 0.09091 0.07273
## [5,] 0.12727 0.121212 0.115152 0.10909 0.10303 0.09697 0.09091
## [6,] 0.07273 0.078788 0.084848 0.09091 0.09697 0.10303 0.10909
## [7,] 0.01818 0.036364 0.054545 0.07273 0.09091 0.10909 0.12727
## [8,] -0.03636 -0.006061 0.024242 0.05455 0.08485 0.11515 0.14545
## [9,] -0.09091 -0.048485 -0.006061 0.03636 0.07879 0.12121 0.16364
## [10,] -0.14545 -0.090909 -0.036364 0.01818 0.07273 0.12727 0.18182
## [,8] [,9] [,10]
## [1,] -0.036364 -0.090909 -0.14545
## [2,] -0.006061 -0.048485 -0.09091
## [3,] 0.024242 -0.006061 -0.03636
## [4,] 0.054545 0.036364 0.01818
## [5,] 0.084848 0.078788 0.07273
## [6,] 0.115152 0.121212 0.12727
## [7,] 0.145455 0.163636 0.18182
## [8,] 0.175758 0.206061 0.23636
## [9,] 0.206061 0.248485 0.29091
## [10,] 0.236364 0.290909 0.34545
(a) Fit a best linear regression model
mydata = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_precip.dat"),
ncol = 4, byrow = T)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = as.matrix(cbind(lat, long, elevation)) #predictor variables
# Note: qguassian is default family Note: thebest is an original function
# that outputs bestalpha based on X, Y, degree and family
mystud1 = thebest(X, precip, 1, "qguassian")
mystud2 = thebest(X, precip, 2, "qguassian")
mystud1$gcv
## [1] 46354
mystud2$gcv
## [1] 46994
# degree 1 is best
bestalpha = mystud1$bestalpha
# Find best predictors
combs = leaps(X, precip)
combos = combs$which
ncombos = length(combos[, 1])
z1 = 1:ncombos
for (i in 1:ncombos) {
zz = gcvplot(precip ~ X[, combos[i, ]], alpha = bestalpha, deg = 1, kern = "bisq",
ev = dat(), scale = TRUE)
z1[i] = as.vector(zz$values)
}
z2 = order(z1)[1]
bestpredictors = combs$which[z2, ]
bestpredictors
## 1 2 3
## TRUE TRUE FALSE
# the lowest GVC is with lat + long
# Now fit the LOCFIT model
bestmodel = locfit(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
scale = TRUE)
# goodness of fit Now fit the LOCFIT model using the best alpha and degree
# obtained from above.. compute RSS0, RSS1 test goodness of fit..
RSS1 = sum(residuals(bestmodel, type = "response")^2)
nu1 = sum(fitted(bestmodel, what = "infl")) # trace(L)
nu2 = sum(fitted(bestmodel, what = "vari")) # trace(L^T L)
nu11 = N - 2 * nu1 + nu2
## Linear regression..
zz = lm(precip ~ lat + long)
N = length(precip)
XX = cbind(rep(1, N), X)
# Compute the Hat matrix
hatm = XX %*% solve(t(XX) %*% XX) %*% t(XX)
II = diag(N)
delta0 = t(II - hatm) %*% (II - hatm) #Equation 9.2
nu00 = sum(diag(delta0))
RSS0 = sum(residuals(zz, type = "response")^2)
Fdata = (RSS0 - RSS1)/(nu00 - nu11)
Fdata = (Fdata/(RSS1/nu11))
Ftheor = qf(0.95, (nu00 - nu11), nu11) #95% confidence level..
Fdata
## [1] -0.08601
Ftheor
## [1] NaN
## Fdata > Ftheor - reject null - i.e., data fits a local poly model
(b) Perform ANOVA and model diagnostics
par(mfrow = c(2, 2))
error = residuals(bestmodel)
Y_hat = precip - error
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
# nonparametric PDF
sm.density(error, add = T, col = "blue")
# Homoskedastic: residuals versus y_hat
plot(Y_hat, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
c) Compute cross-validated and fitted estimates at each observation points and plot them against the observed values.
n = length(precip)
ycv = 1:n
index = 1:n
X = cbind(lat, long)
for (i in 1:n) {
# get all the data points by dropping the ith point
index1 = index[index != i]
xx = X[index1]
yy = precip[index1]
# the dropped point
xp = X[i]
zl = locfit(yy ~ xx, alpha = bestalpha, deg = 1, kern = "bisq", scale = TRUE)
ycv[i] = predict(zl, xp)
}
par(mfrow = c(2, 1))
plot(precip, ycv, ylab = "cross validated estimates")
plot(precip, Y_hat, ylab = "fitted estimates")
(d) Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
n = length(precip)
nsim = 500
# initialize vectors
rmse = 1:nsim
rsq = 1:nsim
index = 1:n
N10 = round(0.1 * n)
for (i in 1:nsim) {
drop = sample(c(1:n), N10)
keep = setdiff(index, drop)
xx = long[keep]
y = precip[keep]
xx = as.data.frame(xx)
zz = locfit(y ~ ., data = xx, alpha = bestalpha, deg = 1, kern = "bisq",
scale = TRUE)
xx = long[drop]
yhat = predict(zz, newdata = as.data.frame(xx))
resid = precip[drop] - yhat
rmse[i] = sqrt(mean((precip[drop] - yhat)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yhat)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
(e) Estimate the precipitation and the standard errors on the DEM grid and show them as 3-D plots as in (v) above
bestmodel = locfit(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
scale = TRUE)
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
ypred = predict(bestmodel, newdata = X, type = "response", se.fit = TRUE)
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
par(mfrow = c(1, 2))
ypred.mat = matrix(ypred$fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Precipitation")
contour(long1, lat1, t(ypred.mat), add = T)
ypred.mat = matrix(ypred$se.fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Errors")
contour(long1, lat1, t(ypred.mat), add = T)
options for family: “gaussian”, “binomial”, “poisson”, “gamma”, “geom”, “density”, or “q+one of the ones listed” options for link: “ident”,“log”,“logit”,“inverse”,“sqrt”, “arcsin”
(a) Fit a best linear regression model
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = as.matrix(cbind(lat, long, elevation)) #predictor variables
fam = c("gamma", "qgamma", "guassian", "qguassian")
Nc = length(fam)
mystud1 = 1:Nc
mystud2 = 1:Nc
for (i in 1:Nc) {
mystud1[i] = list(thebest(X, precip, 1, fam[i]))
mystud2[i] = list(thebest(X, precip, 2, fam[i]))
}
best.deg1 = order(c(mystud1[[1]]$gcv, mystud1[[2]]$gcv, mystud1[[3]]$gcv, mystud1[[4]]$gcv))
mystud1[[best.deg1[1]]]
## $gcv
## [1] 0.1309
##
## $bestalpha
## [1] 0.06629
##
## $family
## [1] "gamma"
best.deg2 = order(c(mystud2[[1]]$gcv, mystud2[[2]]$gcv, mystud2[[3]]$gcv, mystud2[[4]]$gcv))
mystud2[[best.deg2[1]]]
## $gcv
## [1] 0.1339
##
## $bestalpha
## [1] 0.2285
##
## $family
## [1] "gamma"
# lowest gcv occurs with deg=1 and family= 'gamma'
bestalpha = mystud1[[best.deg1[1]]]$bestalpha
bestfamily = mystud1[[best.deg1[1]]]$family
# Find best predictors
combs = leaps(X, precip)
combos = combs$which
ncombos = length(combos[, 1])
z1 = 1:ncombos
for (i in 1:ncombos) {
zz = gcvplot(precip ~ X[, combos[i, ]], alpha = bestalpha, deg = 1, kern = "bisq",
ev = dat(), scale = TRUE, family = bestfamily)
z1[i] = as.vector(zz$values)
}
z2 = order(z1)[1]
bestpredictors = combs$which[z2, ]
bestpredictors
## 1 2 3
## TRUE TRUE FALSE
# the lowest GVC is with lat + long
# find best link
zz = gcvplot(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
ev = dat(), scale = TRUE, family = bestfamily, link = "log")
zz1 = gcvplot(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
ev = dat(), scale = TRUE, family = bestfamily, link = "inverse")
zz2 = gcvplot(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
ev = dat(), scale = TRUE, family = bestfamily, link = "ident")
sort(zz$values)[1]
## [1] 0.1185
sort(zz1$values)[1]
## [1] 0.1181
sort(zz2$values)[1]
## [1] 0.1194
# use link= 'log'
# Now fit the LOCFIT model
bestmodel = locfit(precip ~ lat + long, alpha = bestalpha, deg = 1, kern = "bisq",
scale = TRUE, family = bestfamily, link = "log")
# Goodness of Fit
Y = precip
X = as.matrix(cbind(X[, 1:2]))
mymod = locfit(Y ~ X, deg = 2, alpha = bestalpha, kern = "bisq", scale = TRUE,
ev = dat(), family = bestfamily)
RSS1 = sum(residuals(mymod)^2)
nu1 = sum(fitted(mymod, what = "infl"))
nu2 = sum(fitted(mymod, what = "vari"))
nu11 = N - 2 * nu1 + nu2
# Compare with gamma glm model using same vars (lon, lat)
zz = glm(Y ~ ., data = as.data.frame(X), family = Gamma(link = "identity"))
XX = as.matrix(cbind(rep(1, N), X))
hatm = XX %*% solve(t(XX) %*% XX) %*% t(XX)
II = diag(N)
delta0 = t(II - hatm) %*% (II - hatm)
nu00 = sum(diag(delta0))
RSS0 = sum(residuals(zz, type = "response")^2)
nu0 = length(zz$coef) #number of model coefficients
nu00 = N - nu0
Fdata = (RSS0 - RSS1)/(nu00 - nu11)
Fdata = (Fdata/(RSS1/nu11))
Ftheor = qf(0.95, (nu00 - nu11), nu11) #95% confidence level..
Ftheor
## [1] 1.248
Fdata
## [1] 1810605
## Fdata > Ftheor - reject null - i.e., data fits a local poly model
## better than best glm model
(b) Perform ANOVA and model diagnostics
par(mfrow = c(2, 2))
error = residuals(bestmodel)
Y_hat = precip - error
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
# nonparametric PDF
sm.density(error, add = T, col = "blue")
# Homoskedastic: residuals versus y_hat
plot(Y_hat, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
c) Compute cross-validated and fitted estimates at each observation points and plot them against the observed values.
n = length(precip)
ycv = 1:n
index = 1:n
X = cbind(lat, long)
for (i in 1:n) {
# get all the data points by dropping the ith point
index1 = index[index != i]
xx = X[index1]
yy = precip[index1]
# the dropped point
xp = X[i]
zl = locfit(yy ~ xx, alpha = bestalpha, deg = 1, kern = "bisq", scale = TRUE,
family = bestfamily, link = "log")
ycv[i] = predict(zl, xp)
}
par(mfrow = c(2, 1))
plot(precip, ycv, ylab = "cross validated estimates")
plot(precip, Y_hat, ylab = "fitted estimates")
(d) Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
n = length(precip)
nsim = 500
# initialize vectors
rmse = 1:nsim
rsq = 1:nsim
index = 1:n
N10 = round(0.1 * n)
X = cbind(lat, long)
for (i in 1:nsim) {
drop = sample(c(1:n), N10)
keep = setdiff(index, drop)
xx = X[keep]
y = precip[keep]
xx = as.data.frame(xx)
zz = locfit(y ~ ., data = xx, alpha = bestalpha, deg = 1, kern = "bisq",
scale = TRUE, family = bestfamily, link = "log")
xx = X[drop]
yhat = predict(zz, newdata = as.data.frame(xx))
resid = precip[drop] - yhat
rmse[i] = sqrt(mean((precip[drop] - yhat)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yhat)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
(e) Estimate the precipitation and the standard errors on the DEM grid and show them as 3-D plots as in (v) above
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
ypred = predict(bestmodel, newdata = X, type = "response", se.fit = TRUE)
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
par(mfrow = c(1, 2))
ypred.mat = matrix(ypred$fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Precipitation")
contour(long1, lat1, t(ypred.mat), add = T)
ypred.mat = matrix(ypred$se.fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat), main = "Estimated Errors")
contour(long1, lat1, t(ypred.mat), add = T)
Estimate the spatial surface using Kriging i. Fit a variogram using co-kriging (i.e., using latitude, longitude and elevation)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = as.data.frame(cbind(lat, long, elevation)) #predictor variables
par(mar = c(4, 4, 1, 1))
look <- vgram(X, precip, N = 15, lon.lat = FALSE)
bplot.xy(look$d, look$vgram, breaks = look$breaks, outline = FALSE, xlab = "distance (degrees)",
ylab = "variogram")
lines(look$centers, look$stats["mean", ], col = 4)
points(look$centers, look$stats[2, ], col = "blue")
par(mfrox = c(1, 1))
## Warning: "mfrox" is not a graphical parameter
xd = look$centers
ym = look$stats[2, ]
sigma = 1
nls.fit = nls(ym ~ sigma^2 + rho * (1 - exp(-xd/theta)), start = list(rho = max(ym),
theta = quantile(xd, 0.1)), control = list(maxiter = 200))
pars = as.data.frame(coefficients(nls.fit))
rho = pars[1, 1]
theta = pars[2, 1]
xr = round(max(xd) + sd(xd))
dgrid <- seq(0, xr, , 400)
lines(dgrid, sigma^2 + rho * (1 - exp(-1 * dgrid/theta)), col = "blue", lwd = 3)
ii. Compute cross-validated and fitted estimates at each observation points and plot them against the observed values.
# get the cross validated estimates..
n = length(precip)
ypred = 1:n
index = 1:n
for (i in 1:n) {
# get all the data points by dropping the ith point
index1 = index[index != i]
x1 = lat[index1]
x2 = long[index1]
xx = cbind(x1, x2)
yy = precip[index1]
zz = elevation[index1]
z1 = Krig(xx, yy, Z = zz, rho = rho, theta = theta, sigma2 = sigma, m = 2)
# the dropped point
zp = elevation[i]
xp = X[i, 1:2]
ypred[i] = predict.Krig(z1, x = as.matrix(xp), Z = as.matrix(zp), drop.Z = FALSE)
}
par(mfrow = c(1, 1))
plot(precip, ypred, ylab = "cross validated estimates")
iv. Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
n = length(precip)
nsim = 500
# initialize vectors
rmse = 1:nsim
rsq = 1:nsim
index = 1:n
N10 = round(0.1 * n)
for (i in 1:nsim) {
drop = sample(c(1:n), N10)
keep = setdiff(index, drop)
xx1 = lat[keep]
xx2 = long[keep]
xx = cbind(xx1, xx2)
zz = elevation[keep]
y = precip[keep]
z1 = Krig(xx, y, Z = zz, rho = rho, theta = theta, sigma2 = sigma, m = 2)
xp = X[drop, 1:2]
zp = elevation[drop]
ypred[i] = predict.Krig(z1, x = as.matrix(xp), Z = as.matrix(zp), drop.Z = FALSE)
resid = precip[drop] - ypred
rmse[i] = sqrt(mean((precip[drop] - ypred)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - ypred)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
v. Predict on DEM. PLot model estimates and Model errors.
zz = Krig(X[, 1:2], precip, Z = elevation, rho = rho, theta = theta, sigma2 = sigma,
m = 2)
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
### spatial map of estimates and errors - modify to add beautify..
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
nrs = length(long1)
ncs = length(lat1)
par(mfrow = c(1, 2))
yp = predict.Krig(zz, x = X[, 1:2], Z = X[, 3], drop.Z = FALSE)
zmat = matrix(yp, nrow = ncs, ncol = nrs)
image.plot(long1, lat1, t(zmat), zlim = range(precip), main = "Estimated Precipitation")
contour(long1, lat1, t(zmat), add = T)
yp.se = predict.se.Krig(zz, x = X[, 1:2], Z = X[, 3], drop.Z = FALSE)
zmat = matrix(yp.se, nrow = ncs, ncol = nrs)
image.plot(long1, lat1, t(zmat), main = "Estimated Error")
contour(long1, lat1, t(zmat), add = T)
Fit a hierarchal spatial model on the errors of glm fit.
i. Look at variogram and fit model
X = cbind(long, lat, elevation)
zglm = glm(precip ~ ., data = as.data.frame(X), family = Gamma(link = "identity"))
bestmodel = stepAIC(zglm, trace = F)
yres = bestmodel$resid
xs = cbind(long, lat)
nobs = length(yres)
par(mfrow = c(1, 1))
sigma = 1
look <- vgram(xs, yres, N = 15, lon.lat = FALSE)
bplot.xy(look$d, look$vgram, breaks = look$breaks, outline = FALSE, xlab = "distance (degrees)",
ylab = "variogram")
points(look$centers, look$stats[2, ], col = "blue")
# fit of exponential by nonlinear least squares
xd <- look$centers
ym <- look$stats[2, ]
sigma <- 1 #
nls.fit <- nls(ym ~ sigma^2 + rho * (1 - exp(-xd/theta)), start = list(rho = max(look$stats[2,
]), theta = quantile(xd, 0.1)), control = list(maxiter = 200))
pars <- coefficients(nls.fit)
rho <- pars[1]
theta <- pars[2]
xr = round(max(xd) + sd(xd))
dgrid <- seq(0, xr, , 400)
lines(dgrid, sigma^2 + rho * (1 - exp(-1 * dgrid/theta)), col = "blue", lwd = 3)
zz = Krig(xs, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
yp = predict(bestmodel, type = "response") + predict.Krig(zz)
ii. Look at residuals of model
par(mfrow = c(2, 2))
error = precip - yp
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
## Warning: sd(<matrix>) is deprecated. Use apply(*, 2, sd) instead.
# nonparametric PDF
sm.density(error, add = T, col = "blue")
# Homoskedastic: residuals versus y_hat
plot(yp, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
iii. Cross Validation: drop 1 point, fit model, predict that point
# get the cross validated estimates..
X = as.matrix(long)
X2 = as.matrix(cbind(lat, long))
X3 = as.matrix(cbind(lat, long, elevation))
Y = precip
N = length(Y)
yest = rep(c(1:N))
index = 1:N
for (i in 1:N) {
index1 = index[-i]
Xval = X[index1, ]
Yval = Y[index1]
XVal2 = X2[index1, ]
Xval3 = X3[index1, ]
### fit GLM and get residuals..
zglm = glm(Yval ~ ., data = as.data.frame(Xval), family = Gamma(link = "identity"))
yres = zglm$resid
zz = Krig(XVal2, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
Xval = X[i]
yest[i] = predict(zglm, newdata = as.data.frame(Xval), type = "response") +
predict.Krig(zz, t(X2[i, ]))
}
par(mfrow = c(1, 1))
plot(Y, yest, xlim = range(Y, yest), ylim = range(Y, yest), xlab = "True value",
ylab = "x-validated estimate", main = "Drop 1 Predictions")
lines(Y, Y)
iv. Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
N = length(precip)
Np = round(0.1 * N)
nsim = 5
rmse = c()
rsq = c()
index = 1:N
X = as.matrix(long)
X2 = as.matrix(cbind(lat, long))
X3 = as.matrix(cbind(lat, long, elevation))
for (i in 1:nsim) {
drop = round(runif(Np, 1, N)) # select random 10% of points
keep = setdiff(index, drop)
Xdrop = X[drop, ]
Xdrop2 = X2[drop, ]
Xdrop3 = X3[drop, ]
Ykeep = precip[keep]
Xkeep = X[keep, ]
Xkeep2 = X2[keep, ]
Xkeep3 = X3[keep, ]
zglm = glm(Ykeep ~ ., data = as.data.frame(Xkeep), family = Gamma(link = "identity"))
yres = zglm$resid
zz = Krig(Xkeep2, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
Xkeep = Xdrop
yp = predict(zglm, newdata = as.data.frame(Xkeep), type = "response") +
predict.Krig(zz, Xdrop2)
resid = precip[drop] - yp
rmse[i] = sqrt(mean((precip[drop] - yp)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yp)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
v. Predict on DEM
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
xll = cbind(long, lat)
xe = cbind(long)
### fit GLM and get residuals..
zglm = glm(precip ~ ., data = as.data.frame(xe), family = Gamma(link = "identity"))
yres = zglm$res
zz = Krig(xll, yres, rho = rho, theta = theta, sigma2 = sigma, m = 1)
krigy1 = predict(zz, x = xll)
# now predict on dem values
predpts = read.table("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
lat = predpts$V1
lon = predpts$V2
elev = predpts$V3
xe = cbind(lon)
xll = cbind(lon, lat)
glm = predict(zglm, newdata = as.data.frame(xe), se.fit = TRUE, type = "response")
krigy = predict.Krig(zz, x = xll)
glmy = glm$fit
par(mfrow = c(2, 1))
yp = glmy + krigy
zmat = matrix(yp, nrow = ncs, ncol = nrs)
image.plot(long1, lat1, t(zmat), zlim = range(precip), main = "Estimated Precipitation")
contour(long1, lat1, t(zmat), add = T)
glmse = glm$se.fit
krigse = predict.se.Krig(zz, x = xll)
yp = glmse + krigse
zmat = matrix(yp, nrow = ncs, ncol = nrs)
image.plot(long1, lat1, t(zmat), zlim = range(precip), main = "Estimated Error")
contour(long1, lat1, t(zmat), add = T)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
xll = cbind(long, lat)
xe = cbind(long)
### fit GLM and get residuals..
zglm = glm(precip ~ ., data = as.data.frame(xe), family = Gamma(link = "identity"))
yres = zglm$res
zz = Krig(xll, yres, rho = rho, theta = theta, sigma2 = sigma, m = 1)
# now predict on dem values
predpts = read.table("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
lat = predpts$V1
lon = predpts$V2
elev = predpts$V3
xe = cbind(lon)
xll = cbind(lon, lat)
glmy = predict(zglm, newdata = as.data.frame(xe), se.fit = TRUE)
glmest = glm$fit
krigy = predict(zz, x = xll)
yp = glmest + krigy
krigse = predict.se(zz, x = xll)
glmse = glmy$se.fit
all.se = glmse + krigse
### spatial map of estimates and errors - modify to add beautify..
par(mfrow = c(2, 1))
xlon = sort(unique(lon))
nrs = length(xlon)
ylat = sort(unique(lat))
ncs = length(ylat)
# estimates
zmat = matrix(yp, nrow = ncs, ncol = nrs)
image.plot(xlon, ylat, t(zmat), zlim = range(precip))
contour(xlon, ylat, t(zmat), add = T)
# errors
zmat = matrix(all.se, nrow = ncs, ncol = nrs)
image.plot(xlon, ylat, t(zmat))
contour(xlon, ylat, t(zmat), add = T)
Daily Precipitation at the 491 locations in Colorado is available for three days January 11, 12 and 13, 1997. Select one of the days (try Jan 11 or 12), convert the daily rainfall at each station into a binary variable of no precipitation (0) and a nonzero precipitation amount (1).
# January 11
mydata1 = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_pcp_daily_1997_01_11.dat"),
ncol = 4, byrow = T)
lat = mydata1[, 1]
long = mydata1[, 2]
elevation = mydata1[, 3]
precip = mydata1[, 4]
X = cbind(lat, long, elevation, precip)
# remove all rows where precip=-9999
threshhold = -1
X = subset(X, X[, 4] > threshhold)
# Convert matrix to binary based on values in column 4 (precip)
X[, 4][X[, 4] > 0] <- 1 #in column 4, if the value is greater than 0 in, replace with 1
X[, 4][X[, 4] < 1] <- 0 #in column 4, if the value is less than 1 in, replace with 0
# this is the new matirix
precip = X[, 4]
a) Fit a 'best' GLM with the appropriate link function using one of the objective functions.
# using AIC
X = as.data.frame(X[, 1:3])
zz = glm(precip ~ ., data = X, family = binomial(link = "probit"))
bestmodel = stepAIC(zz)
## Start: AIC=343.3
## precip ~ lat + long + elevation
##
## Df Deviance AIC
## <none> 335 343
## - long 1 340 346
## - elevation 1 347 353
## - lat 1 356 362
X = as.data.frame(X[, 1:3])
zz = glm(precip ~ ., data = X, family = binomial(link = "cloglog"))
bestmodel = stepAIC(zz)
## Start: AIC=344.5
## precip ~ lat + long + elevation
##
## Df Deviance AIC
## <none> 336 344
## - long 1 339 345
## - elevation 1 348 354
## - lat 1 357 363
X = as.data.frame(X[, 1:3])
zz = glm(precip ~ ., data = X, family = binomial(link = "logit"))
bestmodel = stepAIC(zz)
## Start: AIC=342.9
## precip ~ lat + long + elevation
##
## Df Deviance AIC
## <none> 335 343
## - long 1 340 346
## - elevation 1 347 353
## - lat 1 356 362
# AIC shows that 'logit' has the lowest AIC. AIC also shows that all
# varibles are good predictors
b) Estimate the function on the DEM grade and plot the surface.Compare them with the surface plot of the elevation and also with the results from Slater and Clark (2006) Figure 4. They use a quasi-local GLM approach but I would imagine the results should be similar.
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
ypred = predict.glm(bestmodel, newdata = X, type = "response", se.fit = TRUE)
par(mfrow = c(1, 1))
ypred.mat = matrix(ypred$fit, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat))
contour(long1, lat1, t(ypred.mat), add = T)
Repeat 8 using local glm.
# January 11
mydata1 = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_pcp_daily_1997_01_11.dat"),
ncol = 4, byrow = T)
lat = mydata1[, 1]
long = mydata1[, 2]
elevation = mydata1[, 3]
precip = mydata1[, 4]
X = cbind(lat, long, elevation, precip)
# remove all rows where precip=-9999
threshhold = -1
X = subset(X, X[, 4] > threshhold)
# Convert matrix to binary based on values in column 4 (precip)
X[, 4][X[, 4] > 0] <- 1 #in column 4, if the value is greater than 0 in, replace with 1
X[, 4][X[, 4] < 1] <- 0 #in column 4, if the value is less than 1 in, replace with 0
# this is the new matirix
precip = X[, 4]
lat = X[, 1]
long = X[, 2]
elevation = X[, 3]
X = cbind(lat, long, elevation)
a) Fit a 'best' local GLM with the appropriate link function using one of the objective functions.
mystud1 = thebest(X, precip, 1, "binomial")
mystud2 = thebest(X, precip, 2, "binomial")
mystud1$gcv
## [1] 0.1225
mystud2$gcv
## [1] 0.01567
# degree 2 is best
bestalpha = mystud2$bestalpha
# Find best predictors
combs = leaps(X, precip)
combos = combs$which
ncombos = length(combos[, 1])
z1 = 1:ncombos
for (i in 1:ncombos) {
zz = gcvplot(precip ~ X[, combos[i, ]], alpha = bestalpha, deg = 2, kern = "bisq",
ev = dat(), scale = TRUE, family = "binomial")
z1[i] = as.vector(zz$values)
}
z2 = order(z1)[1]
bestpredictors = combs$which[z2, ]
bestpredictors
## 1 2 3
## TRUE TRUE TRUE
# Now find best link.
z3 = gcvplot(precip ~ X, alpha = bestalpha, deg = 2, kern = "bisq", ev = dat(),
scale = TRUE, family = "binomial", link = "logit")
z5 = gcvplot(precip ~ X, alpha = bestalpha, deg = 2, kern = "bisq", ev = dat(),
scale = TRUE, family = "binomial", link = "ident")
# the lowest gcv for each link
sort(z3$values)[1]
## [1] 0.01567
sort(z5$values)[1]
## [1] -0.9508
# use link='logit'
bestmodel = locfit(precip ~ lat + long + elevation, alpha = bestalpha, deg = 2,
kern = "bisq", scale = TRUE, family = "binomial", link = "logit")
b) Estimate the function on the DEM grade and plot the surface.Compare them with the surface plot of the elevation and also with the results from Slater and Clark (2006) Figure 4. They use a quasi-local GLM approach but I would imagine the results should be similar.
predpoints = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat"),
ncol = 3, byrow = T)
X = as.data.frame(predpoints)
names(X) = c("lat", "long", "elevation")
ypred = predict(bestmodel, newdata = as.data.frame(X))
par(mfrow = c(1, 1))
lat1 = sort(unique(X[, 1]))
long1 = sort(unique(X[, 2]))
ypred.mat = matrix(ypred, nrow = length(lat1), ncol = length(long1))
image.plot(long1, lat1, t(ypred.mat))
contour(long1, lat1, t(ypred.mat), add = T)
Fit a hierarchal spatial model with Local Polynomial
i. Look at variogram and fit model
mydata = matrix(scan("http://cires.colorado.edu/~aslater/CVEN_6833/colo_precip.dat"),
ncol = 4, byrow = T)
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = cbind(lat, long, elevation)
# from question 5
bestalpha = 0.06629328
bestfamily = "gamma"
zloc = locfit(precip ~ lat + long + elevation, alpha = bestalpha, deg = 2, kern = "bisq",
scale = TRUE, family = bestfamily, link = "log")
yres = residuals(zloc, type = "response")
xs = cbind(long, lat)
par(mfrow = c(1, 1))
sigma = 1
look <- vgram(xs, yres, N = 15, lon.lat = FALSE)
bplot.xy(look$d, look$vgram, breaks = look$breaks, outline = FALSE, xlab = "distance (degrees)",
ylab = "variogram")
points(look$centers, look$stats[2, ], col = "blue")
xd <- look$centers
ym <- look$stats[2, ]
rho = 20000
theta = 0.1
xr = round(max(xd) + sd(xd))
dgrid <- seq(0, xr, , 400)
lines(dgrid, sigma^2 + rho * (1 - exp(-1 * dgrid/theta)), col = "blue", lwd = 3)
zz = Krig(xs, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
ypred = predict(zloc, X, se.fit = TRUE)
yp = ypred$fit + predict.Krig(zz)
ii. Look at residuals of model
par(mfrow = c(2, 2))
error = precip - yp
hist(error, probability = T)
# fit and plot a normal distribution to the residuals..
lines(error[order(error)], dnorm(error[order(error)], mean = mean(error), sd = sd(error)),
col = "red")
## Warning: sd(<matrix>) is deprecated. Use apply(*, 2, sd) instead.
# nonparametric PDF
sm.density(error, add = T, col = "blue")
# Homoskedastic: residuals versus y_hat
plot(yp, error)
abline(h = 0, col = "red")
title("Homoskedasticity")
# Check normality of residuals
qqnorm(error, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Normal QQPlot of Residuals")
qqline(error, col = "red")
# Autocorrelation test
acf(error)
iii. Cross Validation: drop 1 point, fit model, predict that point
# get the cross validated estimates..
X2 = as.matrix(cbind(lat, long))
X3 = as.matrix(cbind(lat, long, elevation))
N = length(precip)
yest = rep(c(1:N))
index = 1:N
for (i in 1:N) {
index1 = index[-i]
YVal = precip[index1]
XVal2 = X2[index1, ]
XVal3 = X3[index1, ]
### fit GLM and get residuals..
zloc = locfit(YVal ~ XVal3, alpha = bestalpha, deg = 2, kern = "bisq", scale = TRUE,
family = bestfamily, link = "log")
yres = residuals(zloc, type = "response")
zz = Krig(XVal2, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
XVal = X[i]
yest[i] = predict(zglm, newdata = as.data.frame(XVal)) + predict.Krig(zz,
t(X2[i, ]))
}
par(mfrow = c(1, 1))
plot(Y, yest, xlim = range(Y, yest), ylim = range(Y, yest), xlab = "True value",
ylab = "x-validated estimate", main = "Drop 1 Predictions")
lines(Y, Y)
iv. Drop 10% of observations, fit the model (i.e., the 'best' model from i. above) to the rest of the data and predict the dropped points. Compute RMSE and R2 and show them as boxplots.
### Drop 10% of points, fit the model and predict the dropped points..
N = length(precip)
Np = round(0.1 * N)
nsim = 5
rmse = c()
rsq = c()
index = 1:N
X = as.matrix(long)
X2 = as.matrix(cbind(lat, long))
X3 = as.matrix(cbind(lat, long, elevation))
for (i in 1:nsim) {
drop = round(runif(Np, 1, N)) # select random 10% of points
keep = setdiff(index, drop)
Xdrop = X[drop, ]
Xdrop2 = X2[drop, ]
Xdrop3 = X3[drop, ]
Ykeep = precip[keep]
Xkeep = X[keep, ]
Xkeep2 = X2[keep, ]
Xkeep3 = X3[keep, ]
zloc = locfit(Ykeep ~ Xkeep3, alpha = bestalpha, deg = 2, kern = "bisq",
scale = TRUE, family = bestfamily, link = "log")
yres = residuals(zloc, type = "response")
zz = Krig(Xkeep2, yres, rho = rho, theta = theta, m = 1, sigma2 = sigma)
Xkeep3 = Xdrop3
yp = predict(zloc, newdata = as.data.frame(Xkeep3)) + predict.Krig(zz, Xdrop2)
resid = precip[drop] - yp
rmse[i] = sqrt(mean((precip[drop] - yp)^2))
SST = sum((precip[drop] - mean(precip[drop]))^2)
SSR = sum((precip[drop] - yp)^2)
rsq[i] = 1 - (SSR/SST)
}
par(mfrow = c(1, 2))
# boxplot rmse
zz = myboxplot(rmse, xlab = "", ylab = "Rmse", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rmse), col = "red", cex = 1.25, pch = 16)
title(main = "RMSE: predicted values\nof the 10% dropped")
# boxplot rsq
zz = myboxplot(rsq, xlab = "", ylab = "R-Squared", plot = F)
z1 = bxp(zz, xlab = "", ylab = "Median", cex = 1.25)
points(z1, mean(rsq), col = "red", cex = 1.25, pch = 16)
title(main = "R-Squared: predicted values\nof the 10% dropped")
v. Predict on DEM
lat = mydata[, 1]
long = mydata[, 2]
elevation = mydata[, 3]
precip = mydata[, 4]
X = cbind(lat, long, elevation)
X2 = cbind(long, lat)
### fit locfit and get residuals..
zloc = locfit(precip ~ X, alpha = bestalpha, deg = 2, kern = "bisq", scale = TRUE,
family = bestfamily, link = "log")
yres = residuals(zloc, type = "response")
zz = Krig(X2, yres, rho = rho, theta = theta, sigma2 = sigma, m = 1)
# now predict on dem values
predpts = read.table("http://cires.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
lat = predpts$V1
lon = predpts$V2
elev = predpts$V3
X = cbind(lat, lon, elev)
X2 = cbind(lon, lat)
loc = predict(zloc, newdata = as.data.frame(X), se.fit = TRUE)
locfit = loc$fit
krigy = predict(zz, x = X2)
yp = locfit + krigy
locse = loc$se.fit
krigse = predict.se(zz, x = xll)
all.se = locse + krigse
### spatial map of estimates and errors - modify to add beautify..
par(mfrow = c(2, 1))
xlon = sort(unique(lon))
nrs = length(xlon)
ylat = sort(unique(lat))
ncs = length(ylat)
# estimates
zmat = matrix(yp, nrow = ncs, ncol = nrs)
image.plot(xlon, ylat, t(zmat), zlim = range(precip))
contour(xlon, ylat, t(zmat), add = T)
# errors
zmat = matrix(all.se, nrow = ncs, ncol = nrs)
image.plot(xlon, ylat, t(zmat))
contour(xlon, ylat, t(zmat), add = T)
Models Explored:
Linear Model (Normal)
Generalized Linear Model
Local Polynomial
Local Polynomial with distribution defined
Co-Kriging: Spatial Model on dependent variable
Hierarchial Kriging