# HW 3 Q 4 JLCY, 19 Dec 2013
############################################################
# This code is to model a lag-1 Modified Nonparametric K-nearest neighbor
# model for a monthly streamflow time series.
# clear variables
rm(list = ls())
library(locfit)
## locfit 1.5-9.1 2013-03-22
Qnumber = 12
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\loaddata.r")
## Package `sm', version 2.2-5: type help(sm) for summary information
############################################################
nmons = nyrs * 12 # number of values to be generated
jansim = matrix(0, ncol = nsim, nrow = nyrs)
febsim = matrix(0, ncol = nsim, nrow = nyrs)
marsim = matrix(0, ncol = nsim, nrow = nyrs)
aprsim = matrix(0, ncol = nsim, nrow = nyrs)
maysim = matrix(0, ncol = nsim, nrow = nyrs)
junsim = matrix(0, ncol = nsim, nrow = nyrs)
julsim = matrix(0, ncol = nsim, nrow = nyrs)
augsim = matrix(0, ncol = nsim, nrow = nyrs)
sepsim = matrix(0, ncol = nsim, nrow = nyrs)
octsim = matrix(0, ncol = nsim, nrow = nyrs)
novsim = matrix(0, ncol = nsim, nrow = nyrs)
decsim = matrix(0, ncol = nsim, nrow = nyrs)
mxsp = 1:nsim1
mxdef = 1:nsim1
maxs = 1:nsim1
maxd = 1:nsim1
KNNflows = matrix(0, nmons, nsim) # 101*12 x 250
annflow = 1:nyrs
for (i in 1:nyrs) annflow[i] = sum(LFmonflow[i, 1:12])
x = annflow
############################################################
# fit lowest and get the residuals series
# set variables
resids = matrix(0, nrow = nyrs1, ncol = 12)
alpha = seq(0.2, 0.8, by = 0.05)
alpha1 = seq(0.2, 0.8, by = 0.05)
alpha2 = c(alpha, alpha1)
n = length(alpha1)
simpdf = matrix(0, nrow = nsim, ncol = nevals)
# find best locfit
for (i in 1:12) {
i1 = i - 1
if (i == 1) {
# Jan
z1 = gcvplot(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = alpha,
deg = 1)
z2 = gcvplot(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = alpha,
deg = 2)
z3 = order(c(z1$values, z2$values))
deg1 = 1
if (z3[1] > n)
deg1 = 2
# bestalpha & bestdeg
bestalpha = alpha2[z3[1]]
bestdeg = deg1
# use Dec data to model Jan
zz = locfit(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = bestalpha,
deg = bestdeg)
x1l = zz
resids[, i] = residuals(zz)
} else {
# Feb to Dec
z1 = gcvplot(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = alpha,
deg = 1)
z2 = gcvplot(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = alpha,
deg = 2)
z3 = order(c(z1$values, z2$values))
deg1 = 1
if (z3[1] > n)
deg1 = 2
# bestalpha & bestdeg
bestalpha = alpha2[z3[1]]
bestdeg = deg1
zz = locfit(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = bestalpha,
deg = bestdeg)
if (i == 2)
x2l = zz
if (i == 3)
x3l = zz
if (i == 4)
x4l = zz
if (i == 5)
x5l = zz
if (i == 6)
x6l = zz
if (i == 7)
x7l = zz
if (i == 8)
x8l = zz
if (i == 9)
x9l = zz
if (i == 10)
x10l = zz
if (i == 11)
x11l = zz
if (i == 12)
x12l = zz
resids[, i] = residuals(zz)
}
}
############################################################
# get the weight metric. Here we are using 1/k weight function
kk = sqrt(nyrs) # number nearest neighbours 'K'
kk = round(kk)
W = 1:kk
W = 1/W
W = W/sum(W)
W = cumsum(W)
for (k in 1:nsim) {
xsim = 1:nmons # 101*12
# for the first month of the first year - pick a value at random from one of
# the nyrs years.
i = round(runif(1, 1, nyrs)) # random year
xsim[1] = LFmonflow[i, 1]
xprev = xsim[1]
for (i in 2:nmons) {
j = i%%12
if (j == 0)
j = 12
j1 = j - 1
# get the distance
if (j == 1) {
xx = abs(xprev - LFmonflow[1:nyrs1, 12])
} else {
xx = abs(xprev - LFmonflow[1:nyrs1, j1])
}
# order the distance
xz = order(xx)
# select the first K neighbors
xz = xz[1:kk]
# pick a neighbor out of the K neighbros based on the weight metric W
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
i1 = xz[xx[1]]
if (j == 1)
xm = predict(x1l, xprev)
if (j == 2)
xm = predict(x2l, xprev)
if (j == 3)
xm = predict(x3l, xprev)
if (j == 4)
xm = predict(x4l, xprev)
if (j == 5)
xm = predict(x5l, xprev)
if (j == 6)
xm = predict(x6l, xprev)
if (j == 7)
xm = predict(x7l, xprev)
if (j == 8)
xm = predict(x8l, xprev)
if (j == 9)
xm = predict(x9l, xprev)
if (j == 10)
xm = predict(x10l, xprev)
if (j == 11)
xm = predict(x11l, xprev)
if (j == 12)
xm = predict(x12l, xprev)
xsim[i] = xm + resids[i1, j]
# print(c(i,j,xprev,xm,resids[i1,j]))
xprev = xsim[i]
}
############################################################
simdismon = matrix(xsim, nrow = 12)
simdismon = t(simdismon)
KNNflows[, k] = xsim
jansim[1:nyrs, k] = simdismon[, 1]
febsim[1:nyrs, k] = simdismon[, 2]
marsim[1:nyrs, k] = simdismon[, 3]
aprsim[1:nyrs, k] = simdismon[, 4]
maysim[1:nyrs, k] = simdismon[, 5]
junsim[1:nyrs, k] = simdismon[, 6]
julsim[1:nyrs, k] = simdismon[, 7]
augsim[1:nyrs, k] = simdismon[, 8]
sepsim[1:nyrs, k] = simdismon[, 9]
octsim[1:nyrs, k] = simdismon[, 10]
novsim[1:nyrs, k] = simdismon[, 11]
decsim[1:nyrs, k] = simdismon[, 12]
annsim = apply(simdismon, 1, sum)
simpdf[k, ] = sm.density(maysim[, k], eval.points = xeval, display = "none")$estimate
annsimpdf[k, ] = sm.density(annsim, eval.points = annxeval, display = "none")$estimate
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\sim_basicstat.r")
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\sim_droughtstat.r")
}
############################################################
# find out statistics
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\droughtstat.r")
## Max of obs drought = 9.144
## Max of sim drought = 19.76 , Standard deivation = 2.284
##
## Max of obs surplus = 10.83
## Max of sim surplus = 24.53 , Standard deivation = 1.163
##
## Sum of obs drought = 162.6
## Sum of obs surplus = 202.8
##
## Average length of obs drought in years = 1.96
##
## Min length of obs drought in years = 1
##
## Max length of obs drought in years = 5
## Max length of sim drought in years = 34 , Standard deivation = 5.213
##
## Average length of obs surplus in years = 2.042
##
## Min length of obs surplus in years = 1
##
## Max length of obs surplus in years = 6
## Max length of sim surplus in years = 7 , Standard deivation = 0.3523
##
############################################################
par(ask = TRUE)
# plot may flow
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\mayflow.r")
# plot annual flow
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\annflow.r")
par(ask = TRUE)
# plot boxplots
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\boxplots6.r")
############################################################