# HW 3 Q 4 JLCY, 19 Dec 2013

############################################################ 

# This code is to model a lag-1 Modified Nonparametric K-nearest neighbor
# model for a monthly streamflow time series.

# clear variables
rm(list = ls())

library(locfit)
## locfit 1.5-9.1    2013-03-22

Qnumber = 12

source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\loaddata.r")
## Package `sm', version 2.2-5: type help(sm) for summary information

############################################################ 

nmons = nyrs * 12  # number of values to be generated

jansim = matrix(0, ncol = nsim, nrow = nyrs)
febsim = matrix(0, ncol = nsim, nrow = nyrs)
marsim = matrix(0, ncol = nsim, nrow = nyrs)
aprsim = matrix(0, ncol = nsim, nrow = nyrs)
maysim = matrix(0, ncol = nsim, nrow = nyrs)
junsim = matrix(0, ncol = nsim, nrow = nyrs)
julsim = matrix(0, ncol = nsim, nrow = nyrs)
augsim = matrix(0, ncol = nsim, nrow = nyrs)
sepsim = matrix(0, ncol = nsim, nrow = nyrs)
octsim = matrix(0, ncol = nsim, nrow = nyrs)
novsim = matrix(0, ncol = nsim, nrow = nyrs)
decsim = matrix(0, ncol = nsim, nrow = nyrs)

mxsp = 1:nsim1
mxdef = 1:nsim1
maxs = 1:nsim1
maxd = 1:nsim1

KNNflows = matrix(0, nmons, nsim)  # 101*12 x 250

annflow = 1:nyrs

for (i in 1:nyrs) annflow[i] = sum(LFmonflow[i, 1:12])

x = annflow

############################################################ 

# fit lowest and get the residuals series

# set variables
resids = matrix(0, nrow = nyrs1, ncol = 12)
alpha = seq(0.2, 0.8, by = 0.05)
alpha1 = seq(0.2, 0.8, by = 0.05)
alpha2 = c(alpha, alpha1)
n = length(alpha1)

simpdf = matrix(0, nrow = nsim, ncol = nevals)

# find best locfit
for (i in 1:12) {
    i1 = i - 1
    if (i == 1) {
        # Jan

        z1 = gcvplot(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = alpha, 
            deg = 1)
        z2 = gcvplot(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = alpha, 
            deg = 2)

        z3 = order(c(z1$values, z2$values))
        deg1 = 1
        if (z3[1] > n) 
            deg1 = 2

        # bestalpha & bestdeg
        bestalpha = alpha2[z3[1]]
        bestdeg = deg1

        # use Dec data to model Jan
        zz = locfit(LFmonflow[2:nyrs, 1] ~ LFmonflow[1:nyrs1, 12], alpha = bestalpha, 
            deg = bestdeg)
        x1l = zz

        resids[, i] = residuals(zz)
    } else {
        # Feb to Dec

        z1 = gcvplot(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = alpha, 
            deg = 1)
        z2 = gcvplot(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = alpha, 
            deg = 2)

        z3 = order(c(z1$values, z2$values))
        deg1 = 1
        if (z3[1] > n) 
            deg1 = 2

        # bestalpha & bestdeg
        bestalpha = alpha2[z3[1]]
        bestdeg = deg1

        zz = locfit(LFmonflow[1:nyrs1, i] ~ LFmonflow[1:nyrs1, i1], alpha = bestalpha, 
            deg = bestdeg)

        if (i == 2) 
            x2l = zz
        if (i == 3) 
            x3l = zz
        if (i == 4) 
            x4l = zz
        if (i == 5) 
            x5l = zz
        if (i == 6) 
            x6l = zz
        if (i == 7) 
            x7l = zz
        if (i == 8) 
            x8l = zz
        if (i == 9) 
            x9l = zz
        if (i == 10) 
            x10l = zz
        if (i == 11) 
            x11l = zz
        if (i == 12) 
            x12l = zz

        resids[, i] = residuals(zz)
    }
}

############################################################ 

# get the weight metric. Here we are using 1/k weight function

kk = sqrt(nyrs)  # number nearest neighbours 'K'
kk = round(kk)

W = 1:kk
W = 1/W
W = W/sum(W)
W = cumsum(W)

for (k in 1:nsim) {
    xsim = 1:nmons  # 101*12

    # for the first month of the first year - pick a value at random from one of
    # the nyrs years.

    i = round(runif(1, 1, nyrs))  # random year
    xsim[1] = LFmonflow[i, 1]
    xprev = xsim[1]

    for (i in 2:nmons) {
        j = i%%12
        if (j == 0) 
            j = 12
        j1 = j - 1

        # get the distance
        if (j == 1) {
            xx = abs(xprev - LFmonflow[1:nyrs1, 12])
        } else {
            xx = abs(xprev - LFmonflow[1:nyrs1, j1])
        }

        # order the distance
        xz = order(xx)

        # select the first K neighbors
        xz = xz[1:kk]

        # pick a neighbor out of the K neighbros based on the weight metric W
        xx = runif(1, 0, 1)
        xy = c(xx, W)
        xx = rank(xy)
        i1 = xz[xx[1]]

        if (j == 1) 
            xm = predict(x1l, xprev)
        if (j == 2) 
            xm = predict(x2l, xprev)
        if (j == 3) 
            xm = predict(x3l, xprev)
        if (j == 4) 
            xm = predict(x4l, xprev)
        if (j == 5) 
            xm = predict(x5l, xprev)
        if (j == 6) 
            xm = predict(x6l, xprev)
        if (j == 7) 
            xm = predict(x7l, xprev)
        if (j == 8) 
            xm = predict(x8l, xprev)
        if (j == 9) 
            xm = predict(x9l, xprev)
        if (j == 10) 
            xm = predict(x10l, xprev)
        if (j == 11) 
            xm = predict(x11l, xprev)
        if (j == 12) 
            xm = predict(x12l, xprev)

        xsim[i] = xm + resids[i1, j]
        # print(c(i,j,xprev,xm,resids[i1,j]))

        xprev = xsim[i]
    }

    ############################################################ 

    simdismon = matrix(xsim, nrow = 12)
    simdismon = t(simdismon)

    KNNflows[, k] = xsim

    jansim[1:nyrs, k] = simdismon[, 1]
    febsim[1:nyrs, k] = simdismon[, 2]
    marsim[1:nyrs, k] = simdismon[, 3]
    aprsim[1:nyrs, k] = simdismon[, 4]
    maysim[1:nyrs, k] = simdismon[, 5]
    junsim[1:nyrs, k] = simdismon[, 6]
    julsim[1:nyrs, k] = simdismon[, 7]
    augsim[1:nyrs, k] = simdismon[, 8]
    sepsim[1:nyrs, k] = simdismon[, 9]
    octsim[1:nyrs, k] = simdismon[, 10]
    novsim[1:nyrs, k] = simdismon[, 11]
    decsim[1:nyrs, k] = simdismon[, 12]

    annsim = apply(simdismon, 1, sum)

    simpdf[k, ] = sm.density(maysim[, k], eval.points = xeval, display = "none")$estimate
    annsimpdf[k, ] = sm.density(annsim, eval.points = annxeval, display = "none")$estimate

    source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\sim_basicstat.r")

    source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\sim_droughtstat.r")

}

############################################################ 

# find out statistics
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\droughtstat.r")
## Max of obs drought =  9.144 
## Max of sim drought =  19.76 , Standard deivation =  2.284 
##  
## Max of obs surplus =  10.83 
## Max of sim surplus =  24.53 , Standard deivation =  1.163 
##  
## Sum of obs drought =  162.6 
## Sum of obs surplus =  202.8 
##  
## Average length of obs drought in years =  1.96 
##  
## Min length of obs drought in years =  1 
##  
## Max length of obs drought in years =  5 
## Max length of sim drought in years =  34 , Standard deivation =  5.213 
##  
## Average length of obs surplus in years =  2.042 
##  
## Min length of obs surplus in years =  1 
##  
## Max length of obs surplus in years =  6 
## Max length of sim surplus in years =  7 , Standard deivation =  0.3523 
## 

############################################################ 

par(ask = TRUE)

# plot may flow
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\mayflow.r")

plot of chunk unnamed-chunk-1


# plot annual flow
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\annflow.r")

plot of chunk unnamed-chunk-1


par(ask = TRUE)

# plot boxplots
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\boxplots6.r")

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1


############################################################