library(akima)
library(fields)
library(locfit)
library(MASS)
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/SSTread1.txt")
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/SSTwinter12Read1.txt")
thebest = function(X, Y, deg, family) {
nvar = length(X[1, ])
N = length(Y)
minalpha = 2 * (nvar * deg + 1)/N
alpha_grid = seq(minalpha, 1, by = 0.05)
zz = gcvplot(Y ~ X, alpha = alpha_grid, deg = deg, kern = "bisq", ev = dat(),
scale = TRUE, family = family)
mygcv = sort(zz$values)[1]
# return(mygcv)
alpha = alpha_grid
# pick the best alpha from the best family that gives the least GCV
z6 = order(zz$values)[1]
bestalpha = alpha[z6]
myresult = list(gcv = mygcv, bestalpha = bestalpha, family = family)
}
############ SST, all months ##################
nrows = 72
ncols = 36
ntimemonths = 1332
data = readBin("http://iridl.ldeo.columbia.edu/expert/SOURCES/.KAPLAN/.EXTENDED/.v2/.ssta/T/%28Jan%201900%29%28Dec%202011%29RANGE/T/12/boxAverage/T/12/STEP/data.r4",
what = "numeric", n = (nrows * ncols * ntimemonths), size = 4, endian = "swap")
mine = SSTread(nrows, ncols, ntimemonths, data)
SSTallavg = mine$mydata
SSTallavg = SSTallavg[1:111, ] # 1900- 2011
xygrid = mine$xygrid
nglobe = mine$nglobe
indexgrid = mine$indexgrid
############# SST, winter months ###############
ntimemonths = 1344
data12 = readBin("http://iridl.ldeo.columbia.edu/expert/SOURCES/.KAPLAN/.EXTENDED/.v2/.ssta/T/%28Dec%201900%29%28Dec%202012%29RANGE/T/4/boxAverage/T/12/STEP/data.r4",
what = "numeric", n = (nrows * ncols * ntimemonths), size = 4, endian = "swap")
mine1 = SSTread(nrows, ncols, ntimemonths, data12)
SSTwinter = mine1$mydata
SSTwinter = SSTwinter[1:111, ] # 1900 - 2011
xygridW = mine1$xygrid
indexgridW = mine1$indexgrid
SSTwinter12 = mine1$mydata # 1900- 2012
############### Precip data ######################
Precipdata = t(matrix(scan("http://iridl.ldeo.columbia.edu/expert/expert/SOURCES/.NOAA/.NCDC/.CIRS/.ClimateDivision/.pcp/T/%28Dec%201900%29%28Dec%202012%29RANGE/T/4/boxAverage/T/12/STEP/IDIV/201/202/203/204/205/206/207/401/402/403/404/405/406/407/502/504/505/1001/1002/1003/1004/1005/1006/1007/1008/1009/1010/2401/2402/2403/2404/2405/2406/2407/2601/2602/2603/2604/2901/2902/2904/2905/2906/2907/2908/3501/3502/3503/3504/3505/3506/3507/3508/3509/4201/4202/4203/4204/4205/4206/4207/4501/4502/4503/4504/4505/4506/4507/4508/4509/4510/4801/4802/4803/4804/4805/4806/4807/4808/4809/4810/VALUES/data.ch"),
ncol = 112, byrow = T))
regions = c(201, 202, 203, 204, 205, 206, 207, 401, 402, 403, 404, 405, 406,
407, 502, 504, 505, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2601, 2602, 2603, 2604,
2901, 2902, 2904, 2905, 2906, 2907, 2908, 3501, 3502, 3503, 3504, 3505,
3506, 3507, 3508, 3509, 4201, 4202, 4203, 4204, 4205, 4206, 4207, 4501,
4502, 4503, 4504, 4505, 4506, 4507, 4508, 4509, 4510, 4801, 4802, 4803,
4804, 4805, 4806, 4807, 4808, 4809, 4810) # These are regions associated with Western US
# rename column headers
colnames(Precipdata) = regions
# rename row names
years = 1901:2012
rownames(Precipdata) = years
WestPrecip = Precipdata[1:111, ] # 1900- 2011
WestPrecip12 = Precipdata[1:112, ] # 1900- 2012
# AZ precip (associated to regions 200-207). Also, scale data.
AZprecip = WestPrecip[, 1:7] # 1900- 2011
AZprecip12 = WestPrecip12[, 1:7] # 1900- 2012
AZprecip12 = scale(AZprecip12)
AZprecip12SD = attr(AZprecip12, "scaled:scale")
AZprecip12Mean = attr(AZprecip12, "scaled:center")
# Lat Long of grid points of Western US .
wcdivlocs = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/WesternUS-coords.txt"),
ncol = 5, byrow = T)
idx = 1:length(regions)
for (i in length(regions)) {
idx[i] = which(wcdivlocs[, 1] == regions[i])
}
# put lat long in same order as WestPrecip data
long = -wcdivlocs[idx, 4]
lat = wcdivlocs[idx, 3]
zs = var(SSTallavg) #variance matrix
zsvd = svd(zs) #Eigen decomposition
pcs = t(t(zsvd$u) %*% t(SSTallavg)) #PCs
lambdas = (zsvd$d/sum(zsvd$d)) #Eigen Values/ Fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "EigenValues")
points(1:40, lambdas[1:40], col = "red")
# Use the first 3 modes as it explains over 70% of the variance
# Plot Spatial and Temporal Trends
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
nrows = 72
ncols = 36
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygrid[, 2]))
ylat = sort(unique(xygrid[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1901:2011, pcs[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
nmodes = length(zsvd$u[1, ]) # number of modes
ndrop = c(1) # modes to drop
E = matrix(0, nrow = nmodes, ncol = nmodes) #Eigen vector
E[, ndrop] = zsvd$u[, ndrop]
sstanavgdrop = pcs %*% t(E)
sstanrem = SSTallavg - sstanavgdrop
# Now perform PCA on sstanrem
zs1 = var(sstanrem) # var matrix
zsvd1 = svd(zs1) #Eigen decomposition
pcs1 = t(t(zsvd1$u) %*% t(sstanrem)) # PCs
lambdas = (zsvd1$d/sum(zsvd1$d)) #Eigen Values/Fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "EigenValues")
points(1:40, lambdas[1:40], col = "red")
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygrid[, 2]))
ylat = sort(unique(xygrid[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd1$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1901:2011, pcs[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
sst_1900_1935 = SSTallavg[1:35, ]
zs2 = var(sst_1900_1935) # var matrix
zsvd2 = svd(zs2) #Eigen decomposition
pcs2 = t(t(zsvd2$u) %*% t(sst_1900_1935)) #PCs
lambdas = (zsvd2$d/sum(zsvd2$d)) #Eigen Values/ Fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "EigenValues")
points(1:40, lambdas[1:40], col = "red")
# Plot Spatial and Temporal trends for first 3 modes/ associated
# eigenvectors
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygrid[, 2]))
ylat = sort(unique(xygrid[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd2$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1901:1935, pcs2[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
sst_1936_1975 = SSTallavg[36:75, ]
zs2 = var(sst_1936_1975)
zsvd2 = svd(zs2) #Eigen decomposition
pcs2 = t(t(zsvd2$u) %*% t(sst_1936_1975)) # PCs
lambdas = (zsvd2$d/sum(zsvd2$d)) #Eigen Values/ Fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "EigenValues")
points(1:40, lambdas[1:40], col = "red")
# Plot Spatial and Temporal trends for first 4 modes/ associated
# eigenvectors
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygrid[, 2]))
ylat = sort(unique(xygrid[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd2$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1936:1975, pcs2[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
sst_1976_2011 = SSTallavg[76:111, ]
zs2 = var(sst_1976_2011) # variance matrix
zsvd2 = svd(zs2) # egien decomposition
pcs2 = t(t(zsvd2$u) %*% t(sst_1976_2011)) #PCs
lambdas = (zsvd2$d/sum(zsvd2$d)) #Eigen Values/ Fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "EigenValues")
points(1:40, lambdas[1:40], col = "red")
# Plot Spatial and Temporal trends for first 4 modes/ associated
# eigenvectors
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygrid[, 2]))
ylat = sort(unique(xygrid[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd2$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1976:2011, pcs2[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
The patterns between the different years seems not to have significantly changed.
# PCA on the seasonal SST
zs = var(SSTwinter) #variance matrix
zsvd = svd(zs) #Eigen decomposition..
pcsSST = t(t(zsvd$u) %*% t(SSTwinter)) #Principal Components
lambdas = (zsvd$d/sum(zsvd$d)) #Eigen Values/fraction variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Frac. Var. explained")
points(1:40, lambdas[1:40], col = "red")
# the data is on a grid so fill the entire globaal grid with NaN and then
# populate the ocean grids with the Eigen vector
xlong = sort(unique(xygridW[, 2]))
ylat = sort(unique(xygridW[, 1]))
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
xlong = sort(unique(xygridW[, 2]))
ylat = sort(unique(xygridW[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgridW] = zsvd$u[, i]
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 70))
title(main = titles1[i])
contour(xlong, ylat, (zmat), ylim = range(-40, 70), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = T)
plot(1901:2011, pcsSST[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
# PCA on Western Precip
zs = var(WestPrecip) #var matrix
zsvd = svd(zs) #Eigen decomposition
pcsPrecip = t(t(zsvd$u) %*% t(WestPrecip)) #PCs
lambdas = (zsvd$d/sum(zsvd$d)) #Eigen Values/ Fraction Variance
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Western US Precip")
points(1:40, lambdas[1:40], col = "red")
# Spatial and Temporal maps
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
z = interp(long, lat, zsvd$u[, i], duplicate = "strip")
surface(z, xlim = range(long), ylim = range(lat))
title(main = titles1[i])
map("usa", add = T)
map("state", add = T)
title(main = titles1[i])
plot(1901:2011, pcsPrecip[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
par(mfrow = c(3, 3))
for (i in 1:3) {
for (j in 1:3) {
plot(pcsSST[, i], pcsPrecip[, j])
# fit local polynomial
mystud1 = thebest(as.matrix(pcsSST[, i]), pcsPrecip[, j], 1, "qguassian")
bestalpha = mystud1$bestalpha
fit = locfit(pcsPrecip[, j] ~ pcsSST[, i], alpha = bestalpha, deg = 1,
kern = "bisq", scale = TRUE)
plot(fit, add = T, col = "red")
title(main = paste("SST PC", i, ", WestPrecip PC", j))
}
}
pc2tpcpcorr = scan("http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCDC/.CIRS/.ClimateDivision/.pcp/T/%28Dec%201990%29%28Dec%202012%29RANGE/T/4/boxAverage/T/12/STEP/IDIV/201/202/203/204/205/206/207/401/402/403/404/405/406/407/502/504/505/1001/1002/1003/1004/1005/1006/1007/1008/1009/1010/2401/2402/2403/2404/2405/2406/2407/2601/2602/2603/2604/2901/2902/2904/2905/2906/2907/2908/3501/3502/3503/3504/3505/3506/3507/3508/3509/4201/4202/4203/4204/4205/4206/4207/4501/4502/4503/4504/4505/4506/4507/4508/4509/4510/4801/4802/4803/4804/4805/4806/4807/4808/4809/4810/VALUES/SOURCES/.KAPLAN/.EXTENDED/.v2/.ssta/T/%28Dec%201900%29%28Mar%202012%29RANGE/T/4/boxAverage/T/12/STEP%5BT%5Dstandardize%5BX/Y%5D%5BT%5Dsvd/ev/2/RANGE/.Ts%5BT%5Dcorrelate/data.ch")
par(mfrow = c(1, 1))
quilt.plot(long, lat, pc2tpcpcorr, xlim = range(-125, -100))
US(add = TRUE, col = "grey", lwd = 2, xlim = range(-125, -100))
# Precip data 1975 to 2012
West_1975_2012 = WestPrecip12[76:112, ]
zs = var(West_1975_2012) #var matrix
zsvd = svd(zs) #Eigen Decomposition
pcsPrecip = t(t(zsvd$u) %*% t(West_1975_2012)) #PCs
lambdas = (zsvd$d/sum(zsvd$d)) #Eigen Values
par(mfrow = c(1, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Western US Precip")
points(1:40, lambdas[1:40], col = "red")
# Spatial and Temporal maps
titles1 = c("Spatial: First Eignevector", "Spatial: Second Eignevector", "Spatial: Third Eignevector")
titles2 = c("Temporal: First Mode", "Temporal: Second Mode", "Temporal: Third Mode")
par(mfrow = c(3, 2))
for (i in 1:length(titles1)) {
z = interp(long, lat, zsvd$u[, i], duplicate = "strip")
surface(z, xlim = range(long), ylim = range(lat))
title(main = titles1[i])
map("usa", add = T)
map("state", add = T)
title(main = titles1[i])
plot(1975:2011, pcsPrecip[, i], type = "l", xlab = "year")
title(main = titles2[i])
}
# Perform SVD
C = var(WestPrecip, SSTwinter)
zz = svd(C)
# f1 and g1 are the matrix of time coefficients
f1 = as.matrix(WestPrecip) %*% zz$u
g1 = SSTwinter %*% zz$v
# hetrogenous correlation pattern.
J = min(length(SSTwinter[1, ]), length(WestPrecip[1, ]))
par(mfrow = c(1, 1))
plot(1:J, ((zz$d)^2)/sum((zz$d)^2), xlab = "Modes", ylab = "Sq. Covariance",
col = "red")
### Spatial map the first hetro corrln map. TC of rain correlated on SST
par(mfrow = c(3, 2))
for (i in 1:3) {
corrpats = cor(f1[, i], SSTwinter)
# the data is on a grid so fill the entire globaal grid with NaN and then
# populate the ocean grids with the Eigen vector
xlong = sort(unique(xygridW[, 2]))
ylat = sort(unique(xygridW[, 1]))
zfull = rep(NaN, nglobe)
zfull[indexgridW] = corrpats
zmat = matrix(zfull, nrow = nrows, ncol = ncols)
image.plot(xlong, ylat, zmat, ylim = range(-40, 40))
contour(xlong, ylat, (zmat), ylim = range(-40, 40), add = TRUE, nlev = 6,
lwd = 2)
map("world2", add = TRUE)
title(main = "Hetrogeneous Corr. Map - TC1", i, "of Rain with SST")
### Spatial map the first hetro corrln map. TC of SST correlated on Rain
corrpatr = cor(g1[, i], WestPrecip)
z = interp(long, lat, corrpatr, duplicate = "strip")
surface(z, xlim = range(long), ylim = range(lat))
map("usa", add = T)
map("state", add = T)
title(main = paste("Hetrogeneous Corr. Map - TC", i, "of SST with Rain"))
}
# Do only up to 2010- as to do latter problems
############# AZ precip 1900-2010 ###################
zs = var(AZprecip12[1:110, ])
zsvd = svd(zs) #Eigen decomposition
evect12 = zsvd$u #Eigen vector
pcsAZprecip12 = t(t(zsvd$u) %*% t(AZprecip12[1:110, ])) #PCs
lambdas = (zsvd$d/sum(zsvd$d))
par(mfrow = c(2, 1))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Western US Precip",
main = "Eigen Spectrum AZ Precip")
points(1:40, lambdas[1:40], col = "red")
############### SSTwinter data 1900-2010 ######################
zs = var(SSTwinter12[1:110, ])
zsvd = svd(zs) #Eigen decomposition
pcsSSTwinter12 = t(t(zsvd$u) %*% t(SSTwinter12[1:110, ])) #PCs
lambdas = (zsvd$d/sum(zsvd$d))
plot(1:40, lambdas[1:40], type = "l", xlab = "Modes", ylab = "Western US Precip",
main = "Eigen Spectrum SST")
points(1:40, lambdas[1:40], col = "red")
# Fit a model. Use first 10 modes of SSTwinter and 1st mode of AZprecip.
X = as.matrix(pcsSSTwinter12[, 1:10])
lmfit = lm(pcsAZprecip12[, 1] ~ X)
bestmodelPC1 = stepAIC(lmfit, trace = F)
# ANOVA
summary(bestmodelPC1)
##
## Call:
## lm(formula = pcsAZprecip12[, 1] ~ 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.543 -1.178 0.643 1.731 3.720
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0267 0.2413 -0.11 0.91
##
## Residual standard error: 2.53 on 109 degrees of freedom
N = length(pcsAZprecip12[, 1])
# for(i in 1:N) {
pc1pred = predict(bestmodelPC1)
# the remaining PCs of AZPrecip
PC2 = pcsAZprecip12[, 3]
pc2pred = rnorm(N, mean = mean(PC2), sd = sd(PC2))
PC3 = pcsAZprecip12[, 3]
pc3pred = rnorm(N, mean = mean(PC3), sd = sd(PC3))
PC4 = pcsAZprecip12[, 4]
pc4pred = rnorm(N, mean = mean(PC4), sd = sd(PC4))
PC5 = pcsAZprecip12[, 5]
pc5pred = rnorm(N, mean = mean(PC5), sd = sd(PC5))
PC6 = pcsAZprecip12[, 6]
pc6pred = rnorm(N, mean = mean(PC6), sd = sd(PC6))
PC7 = pcsAZprecip12[, 7]
pc7pred = rnorm(N, mean = mean(PC7), sd = sd(PC7))
pcppred = cbind(pc1pred, pc2pred, pc3pred, pc4pred, pc5pred, pc6pred, pc7pred)
AZpred12 = pcppred %*% t(evect12)
# Scale it back
AZpred12 = AZpred12 * AZprecip12SD + AZprecip12Mean
# Look at first 3 locations
par(mfrow = c(3, 2))
for (i in 1:3) {
rmse = sqrt(mean((AZprecip12[1:110, i] - AZpred12[, i])^2))
SST = sum((AZprecip12[1:110, i] - mean(AZprecip12[1:110, i]))^2)
SSR = sum((AZprecip12[1:110, i] - AZpred12[, i])^2)
rsq = 1 - (SSR/SST)
assign(paste("location_", i, "_rmse", sep = ""), rmse)
assign(paste("location_", i, "_rsq", sep = ""), rsq)
}
location_1_rmse
## [1] 1.637
location_1_rsq
## [1] -1.67
location_2_rmse
## [1] 1.608
location_2_rsq
## [1] -1.574
location_3_rmse
## [1] 1.632
location_3_rsq
## [1] -1.652
# plot predicted Precip at each location
par(mfrow = c(3, 3))
for (i in 1:length(AZprecip12[1, ])) {
plot(AZprecip12[1:110, i], AZpred12[, i], xlab = "Orignial AZ Precip", ylab = "Predicted AZ Precip",
main = paste("Actucal vs Predicted", i, "th Column"))
lines(AZprecip12[1:110, i], AZprecip12[1:110, i], col = "red")
}
SSTwinter2011 = SSTwinter12[111, ]
# PC 2011 SSTs. Use first 10 modes
zs = var(SSTwinter2011)
zsvd = svd(zs)
evect2011 = zsvd$u #EigenVector of 2011
pcsSSTwinter2011 = t(t(zsvd$u) %*% t(SSTwinter2011)) #PCs
# predict PC1 from bestmodel with leading 10 modes
pc1pred2011 = predict(bestmodelPC1, newdata = as.data.frame(t(pcsSSTwinter2011[1:10])))
# the remaining PCs can be obtained as
PC2 = pcsAZprecip12[, 2]
pc2pred = mean(PC2)
PC3 = pcsAZprecip12[, 3]
pc3pred = mean(PC3)
PC4 = pcsAZprecip12[, 4]
pc4pred = mean(PC4)
PC5 = pcsAZprecip12[, 5]
pc5pred = mean(PC5)
PC6 = pcsAZprecip12[, 6]
pc6pred = mean(PC6)
PC7 = pcsAZprecip12[, 7]
pc7pred = mean(PC7)
pcppred = cbind(pc1pred2011, pc2pred, pc3pred, pc4pred, pc5pred, pc6pred, pc7pred)
AZpred2011 = pcppred %*% t(evect12)
# Scale it back
AZprecip12Mean = apply(AZprecip12, 2, mean)
AZprecip12SD = apply(AZprecip12, 2, sd)
AZpred2011 = AZpred2011 * AZprecip12SD + AZprecip12Mean
par(mfrow = c(2, 1))
AZprecip12 = WestPrecip12[, 1:7] # 1900- 2012
AZprecip2011 = AZprecip12[111, ]
# Compare Predicted AZprecip 2011 to orginal data
plot(t(AZprecip2011), AZpred2011, xlab = "Orignial AZ Precip", ylab = "Predicted AZ Precip",
main = "Actucal vs Predicted Precip 2011")
lines(t(AZprecip2011), t(AZprecip2011), col = "red")
################## PC 2012 SSTs. Use first 10 modes ################
SSTwinter2012 = SSTwinter12[112, ]
zs = var(SSTwinter2012)
zsvd = svd(zs)
evect2011 = zsvd$u #EigenVector
pcsSSTwinter2012 = t(t(zsvd$u) %*% t(SSTwinter2012)) #PCs
# predict PC1 from bestmodel with leading 10 modes
pc1pred2012 = predict(bestmodelPC1, newdata = as.data.frame(t(pcsSSTwinter2012[1:10])))
# the remaining PCs can be obtained as
PC2 = pcsAZprecip12[, 2]
pc2pred = mean(PC2)
PC3 = pcsAZprecip12[, 3]
pc3pred = mean(PC3)
PC4 = pcsAZprecip12[, 4]
pc4pred = mean(PC4)
PC5 = pcsAZprecip12[, 5]
pc5pred = mean(PC5)
PC6 = pcsAZprecip12[, 6]
pc6pred = mean(PC6)
PC7 = pcsAZprecip12[, 7]
pc7pred = mean(PC7)
pcppred = cbind(pc1pred2012, pc2pred, pc3pred, pc4pred, pc5pred, pc6pred, pc7pred)
AZpred2012 = pcppred %*% t(evect12)
# Scale it back
AZprecip12Mean = apply(AZprecip12, 2, mean)
AZprecip12SD = apply(AZprecip12, 2, sd)
AZpred2012 = AZpred2012 * AZprecip12SD + AZprecip12Mean
AZprecip12 = WestPrecip12[, 1:7] # 1900- 2012
AZprecip2012 = AZprecip12[112, ]
plot(t(AZprecip2012), AZpred2012, ylim = range(AZpred2012), xlab = "Orignial AZ Precip",
ylab = "Predicted AZ Precip", main = "Actucal vs Predicted Precip 2012")
lines(t(AZprecip2012), t(AZprecip2012), col = "red")
zs = var(SSTwinter12[1:110, ])
zsvd = svd(zs) #Eigen decomposition
pcsSSTwinter12 = t(t(zsvd$u) %*% t(SSTwinter12[1:110, ])) #PCs
######## Perform CCA on the first 10 PCs of SST ##############
sstPc = pcsSSTwinter12[, 1:10]
X1 = scale(sstPc)
M = dim(sstPc)[2]
J = dim(AZprecip12[1:110, ])[2]
J = min(M, J)
N = length(AZprecip12[1:110, 1])
Qx1 = qr.Q(qr(X1))
Qy1 = qr.Q(qr(AZprecip12[1:110, ]))
T11 = qr.R(qr(X1))
T22 = qr.R(qr(AZprecip12[1:110, ]))
VV = t(Qx1) %*% Qy1
BB = svd(VV)$v
AA = svd(VV)$u
BB = solve(T22) %*% svd(VV)$v * sqrt(N - 1)
wm1 = AZprecip12 %*% BB
AA = solve(T11) %*% svd(VV)$u * sqrt(N - 1)
vm1 = X1 %*% AA
cancorln = svd(VV)$d[1:J] #canonical correlation
Fyy = var(AZprecip12[1:110, ]) %*% BB
# ........................................ Prediction.
betahat = solve(t(AA) %*% t(X1) %*% X1 %*% AA) %*% t(AA) %*% t(X1) %*% AZprecip12[1:110,
]
ypred = X1 %*% AA %*% betahat
# scale back the data mean forecast
ypred = t(t(ypred) * AZprecip12SD + AZprecip12Mean)
# plot predicted Precip at each location
par(mfrow = c(3, 3))
for (i in 1:7) {
plot(AZprecip12[1:110, i], ypred[, i], xlab = "Orignial AZ Precip", ylab = "Predicted AZ Precip",
main = paste("Actucal vs Predicted", i, "location"))
lines(AZprecip12[1:110, i], AZprecip12[1:110, i], col = "red")
}
# R2 and rmse. Look at first 3 locations
par(mfrow = c(3, 2))
for (i in 1:3) {
rmse = sqrt(mean((AZprecip12[1:110, i] - ypred[, i])^2))
SST = sum((AZprecip12[1:110, i] - mean(AZprecip12[, i]))^2)
SSR = sum((AZprecip12[1:110, i] - ypred[, i])^2)
rsq = 1 - (SSR/SST)
assign(paste("location_", i, "_rmse", sep = ""), rmse)
assign(paste("location_", i, "_rsq", sep = ""), rsq)
}
location_1_rmse
## [1] 0.5479
location_1_rsq
## [1] 0.07174
location_2_rmse
## [1] 0.5662
location_2_rsq
## [1] 0.06369
location_3_rmse
## [1] 0.8292
location_3_rsq
## [1] 0.08126
zs = var(SSTwinter12[111, ])
zsvd = svd(zs) #Eigen decomposition
pcsSSTwinter2011 = t(t(zsvd$u) %*% t(SSTwinter12[111, ])) #PCs
zs = var(SSTwinter12[112, ])
zsvd = svd(zs) #Eigen decomposition
pcsSSTwinter2012 = t(t(zsvd$u) %*% t(SSTwinter12[112, ])) #PCs
# Predict 2011
sstPc = as.matrix(t(pcsSSTwinter2011[1:10]))
ypred = sstPc %*% AA %*% betahat
ypred2011 = t(t(ypred) * AZprecip12SD + AZprecip12Mean) # scale back the data
# Predict 2012
sstPc = as.matrix(t(pcsSSTwinter2012[1:10]))
ypred = sstPc %*% AA %*% betahat
ypred2012 = t(t(ypred) * AZprecip12SD + AZprecip12Mean) # scale back the data
par(mfrow = c(2, 1))
# Compare Predicted AZprecip 2011 to orginal data
plot(t(AZprecip2011), ypred2011, xlab = "Orignial AZ Precip", ylab = "Predicted AZ Precip",
main = "Actucal vs Predicted Precip 2011")
lines(t(AZprecip2011), t(AZprecip2011), col = "red")
plot(t(AZprecip2012), ypred2012, xlab = "Orignial AZ Precip", ylab = "Predicted AZ Precip",
main = "Actucal vs Predicted Precip 2012")
lines(t(AZprecip2012), t(AZprecip2012), col = "red")
library(mclust)
detach("package:mclust", unload = TRUE)
library(fields)
library(cluster)
library(ggplot2)
WestPrecip = Precipdata[1:111, ] # 1900- 2011
WestPrecip = matrix(apply(WestPrecip, 2, mean), 81, 1)
x = WestPrecip
nk = 20 #number of clusters to consider
wss <- (nrow(x) - 1) * sum(apply(x, 2, var))
for (i in 2:nk) {
wss[i] <- sum(kmeans(x, centers = i)$withinss)
}
par(mfrow = c(1, 1))
qplot(1:nk, wss, type = "b", xlab = "Number of Clusters", ylab = "Within groups sum of squares") +
theme_bw()
WSS begins to staurate at about 5 clusters
kbest = 5
zclust = kmeans(x, centers = kbest)
plot(long, lat, col = c(zclust$cluster), xlab = "", ylab = "")
US(add = TRUE, col = "grey", lwd = 2, xlim = range(-125, -100))
x = cbind(long, lat, WestPrecip)
nk = 20 #number of clusters to consider
wss <- (nrow(x) - 1) * sum(apply(x, 2, var))
for (i in 2:nk) {
wss[i] <- sum(kmeans(x, centers = i)$withinss)
}
par(mfrow = c(1, 1))
qplot(1:nk, wss, type = "b", xlab = "Number of Clusters", ylab = "Within groups sum of squares") +
theme_bw()
The optimal clusters in this case is 10
xbest = 10
zclust = kmeans(x, centers = kbest)
plot(long, lat, col = c(zclust$cluster), xlab = "", ylab = "")
US(add = TRUE, col = "grey", lwd = 2, xlim = range(-125, -100))
Surprisingly, the years, i.e. 1900-1935, 1936-1975, 1976-2012, don't seem to be singificantly different. The first three modes in each grouping look similar.
The first mode of the winter SST data is much different than the first mode of the full year SST. Minor relationships can be seen with the related graphs, however the correlation plot tells a different story
SVD on covariance of Precip and SSTwinter show a relationship.
CCA forcasting is significantly better than PCA forcasting as the scatter plots for 2011 and 2012 predictions from CCA fall closer to the best fit line. This is expected becase CCA seeks to maximize correlation as opposed to covariance.
The clustering with lat, long included looks much better than clustering with just the Precipitation Data.The clusters in the lat,long,Precip map are more even and grouped by geogrpahic location, which logically makes sense.