Rainwater Analysis and Simulation

Read the data and load packages.

mydata=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain.csv") 
for (i in 1:length(mydata$Amt)){
if(mydata$Amt[i]>0) {mydata$rained[i]=1
}else {mydata$rained[i]=0}
}

##########Add the Date and the Week###########
mydata$date=as.Date(paste(mydata$Year, mydata$Month, mydata$Day, sep='-'))
mydata$Week=as.integer(strftime(mydata$date, format="%V"))
##############################################

#######Generate a subset for Days with Rain###
mydata2=subset(mydata, rained==1,select=c("Amt","Month", "Year", "Day"))
##############################################

#######Load Libraries#########################
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.5.1
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(qcc)
## Warning: package 'qcc' was built under R version 3.5.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
library(psych)
## Warning: package 'psych' was built under R version 3.5.1
library(MASS)
## Warning: package 'MASS' was built under R version 3.5.1
## 
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 3.5.1
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.5.1
## Loading required package: npsurv
## Loading required package: lsei
library(pscl)
## Warning: package 'pscl' was built under R version 3.5.1
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
## 
##     aml
## The following object is masked from 'package:psych':
## 
##     logit
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(fGarch)
## Warning: package 'fGarch' was built under R version 3.5.1
## Loading required package: timeDate
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.5.1
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:zoo':
## 
##     time<-
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.5.1
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:psych':
## 
##     tr
library(pROC)
## Warning: package 'pROC' was built under R version 3.5.1
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(censReg)
## Warning: package 'censReg' was built under R version 3.5.1
## Loading required package: maxLik
## Warning: package 'maxLik' was built under R version 3.5.1
## Loading required package: miscTools
## Warning: package 'miscTools' was built under R version 3.5.1
## 
## Attaching package: 'miscTools'
## The following object is masked from 'package:fBasics':
## 
##     triang
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
## 
## Please cite the 'censReg' package as:
## Henningsen, Arne (2017). censReg: Censored Regression (Tobit) Models. R package version 0.5. http://CRAN.R-Project.org/package=censReg.
## 
## If you have questions, suggestions, or comments regarding the 'censReg' package, please use a forum or 'tracker' at the R-Forge site of the 'sampleSelection' project:
## https://r-forge.r-project.org/projects/sampleselection/
library(tscount)
## Warning: package 'tscount' was built under R version 3.5.1
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.5.1
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:timeSeries':
## 
##     outlier
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:psych':
## 
##     outlier
library(mice)
## Warning: package 'mice' was built under R version 3.5.1
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(caret)
## Warning: package 'caret' was built under R version 3.5.1
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(tensorflow)
## Warning: package 'tensorflow' was built under R version 3.5.1
## 
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
## 
##     train
library(keras)
## Warning: package 'keras' was built under R version 3.5.1
library(Amelia)
## Warning: package 'Amelia' was built under R version 3.5.1
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.5.1
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.1
## 
## Attaching package: 'e1071'
## The following objects are masked from 'package:timeDate':
## 
##     kurtosis, skewness
library(xgboost)
## Warning: package 'xgboost' was built under R version 3.5.1
library(AnalyzeFMRI)
## Warning: package 'AnalyzeFMRI' was built under R version 3.5.1
## Loading required package: R.matlab
## Warning: package 'R.matlab' was built under R version 3.5.1
## R.matlab v3.6.2 (2018-09-26) successfully loaded. See ?R.matlab for help.
## 
## Attaching package: 'R.matlab'
## The following object is masked from 'package:keras':
## 
##     evaluate
## The following object is masked from 'package:tensorflow':
## 
##     evaluate
## The following objects are masked from 'package:base':
## 
##     getOption, isOpen
## Loading required package: fastICA
## Warning: package 'fastICA' was built under R version 3.5.1
## Loading required package: tcltk
## Loading required package: tkrplot
## Warning: package 'tkrplot' was built under R version 3.5.1
library(EBImage)
## Warning: package 'EBImage' was built under R version 3.5.1
library(psych)
library(ResourceSelection)
## ResourceSelection 0.3-2   2017-02-28
library(MASS)
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:fBasics':
## 
##     densityPlot
## The following object is masked from 'package:boot':
## 
##     logit
## The following object is masked from 'package:psych':
## 
##     logit
library(ggplot2)
library(adabag)
## Warning: package 'adabag' was built under R version 3.5.1
## Loading required package: rpart
## 
## Attaching package: 'rpart'
## The following object is masked from 'package:survival':
## 
##     solder
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.5.1
## Loading required package: doParallel
## Warning: package 'doParallel' was built under R version 3.5.1
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.5.1
## Loading required package: parallel
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 3.5.1
library(class)
library(simmer)
## Warning: package 'simmer' was built under R version 3.5.1
## 
## Attaching package: 'simmer'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:lmtest':
## 
##     reset
library(simmer.plot)
## Warning: package 'simmer.plot' was built under R version 3.5.1
## 
## Attaching package: 'simmer.plot'
## The following objects are masked from 'package:simmer':
## 
##     get_mon_arrivals, get_mon_attributes, get_mon_resources
library(parallel)
library(fpp)
library(reticulate)
## Warning: package 'reticulate' was built under R version 3.5.1
set.seed(1234)
memory.size(max=TRUE)
## [1] 296.56
##############################################

Descriptive Statistics

We evaluated rainfall by daily, weekly, monthly, and yearly periods.

###########Cross Tabs########################
myagg0=aggregate(Amt~Week, data=mydata, mean)       #Weekly Average,  53 Obs
myagg0a=aggregate(Amt~Year+Week, data=mydata, mean) #Weekly Rollup, 3737 Obs
myagg1=aggregate(Amt~Year+Month, data=mydata, mean) #Monthly Rollup, 856 months
myagg2=aggregate(Amt~Year,data=mydata, mean)        #Yearly Average, 72 Obs
myagg3=aggregate(Amt~Day, data=mydata, mean)        #By Day of Month Average
myagg4=aggregate(Amt~Month+Day, data=mydata, mean)  #By Day, 366 observations
myagg5=aggregate(Amt~Month, data=mydata, mean)      #Monthly Averages, 12 Obs
myagg6=aggregate(rained~Day+Month, data=mydata, mean) #% Rained by day, 366 observations 

#############################################

##########Baseline Descriptives##############
a1=unlist(describe(mydata$Amt))
a2=unlist(describe(mydata2$Amt))
a2a=unlist(describe(myagg0$Amt))
a2b=unlist(describe(myagg0a$Amt))
a3=unlist(describe(myagg1$Amt))
a4=unlist(describe(myagg2$Amt))
a5=unlist(describe(myagg3$Amt))
a6=unlist(describe(myagg4$Amt))
a7=unlist(describe(myagg5$Amt))
a8=unlist(describe(mydata$rained))
a9=unlist(describe(myagg6$rained)) 
a=cbind(a1,a2,a2a, a2b, a3,a4,a5,a6,a7,a8,a9)
colnames(a)=c("Daily Rainfall, All Days", "Daily Rainfall, Rainy Days", 
              "Avg. Rain by Week", "Avg Rain Sequenced Week",
              "Avg. Rain by Sequenced Month", "Avg. Rain by Year", 
              "Avg. Ran by Day of Month", "Avg. Rain by Day",
              "Avg. Rain by Month", "Proportion Rained Overall", "% Rain by Day of Year")
write.csv(t(a),"C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/desc2.csv" )

#############################################

Descriptive Charts

We looked at histograms and boxplots by various time elements.

############Zero Inflated Analysis###########
p=ggplot(mydata, aes(Amt, fill = rained)) +
  geom_histogram(aes(y=..count../sum(..count..))) +
  facet_grid(rained ~ ., margins=TRUE, scales="free_y")

p + labs(title = "Distribution of Rain")+labs(y="Proportion")+labs(x="Rain in Inches") +xlim(-.1,2)+
guides(fill=guide_legend(title="Did it rain? 0=No"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 362 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing missing values (geom_bar).

########Baseline Histograms##################
h1=hist(mydata$Amt, plot=FALSE)$counts
h1a=hist(myagg0$Amt, plot=FALSE)$counts
h1b=hist(myagg0a$Amt, plot=FALSE)$counts
h2=hist(myagg1$Amt, plot=FALSE)$counts
h3=hist(myagg2$Amt, plot=FALSE)$counts
h4=hist(myagg3$Amt, plot=FALSE)$counts
h5=hist(myagg4$Amt, plot=FALSE)$counts
h6=hist(myagg5$Amt, plot=FALSE)$counts

n1=hist(mydata$Amt, plot=FALSE)$breaks
n1a=hist(myagg0$Amt, plot=FALSE)$breaks
n1b=hist(myagg0a$Amt, plot=FALSE)$breaks
n2=hist(myagg1$Amt, plot=FALSE)$breaks
n3=hist(myagg2$Amt, plot=FALSE)$breaks
n4=hist(myagg3$Amt, plot=FALSE)$breaks
n5=hist(myagg4$Amt, plot=FALSE)$breaks
n6=hist(myagg5$Amt, plot=FALSE)$breaks

x1=h1/sum(h1)
names(x1)=n1[1:length(n1)-1]
x1a=h1a/sum(h1a)
names(x1a)=n1a[1:length(n1a)-1]
x1b=h1b/sum(h1b)
names(x1b)=n1b[1:length(n1b)-1]
x2=h2/sum(h2)
names(x2)=n2[1:length(n2)-1]
x3=h3/sum(h3)
names(x3)=n3[1:length(n3)-1]
x4=h4/sum(h4)
names(x4)=n4[1:length(n4)-1]
x5=h5/sum(h5)
names(x5)=n5[1:length(n5)-1]
x6=h6/sum(h6)
names(x6)=n6[1:length(n6)-1]

par(mfrow=c(3,2))
barplot(x1b, ylim=c(0,1), space=0, col="light yellow", main="Avg. Rain by Week, n=53", 
        xlab="Rain in Inches")
barplot(x1a, ylim=c(0,1), space=0, col="yellow", main="Avg. Rain by Sequenced Week, n=3,737", 
        xlab="Rain in Inches")

barplot(x6,ylim=c(0,1), space=0, col="light green", main="Avg. Rain by Month, n=31", 
        xlab="Rain in Inches")
barplot(x3,ylim=c(0,1), space=0, col="green", main="Avg. Rain by Sequenced Month, n=856", 
        xlab="Rain in Inches")

barplot(x4,ylim=c(0,1), space=0, col="orange", main="Avg. Rain by Year, n=72", 
        xlab="Rain in Inches")
barplot(x5,ylim=c(0,1), space=0, col="red", main="Avg. Rain by Day of Month, n=31", 
        xlab="Rain in Inches")

#############################################

#######Notched Box Plot & Trend Check########
par(mfrow=c(1,1))

boxplot(myagg1$Amt~myagg1$Month, col=c(0,0,0,0,2,2,0,0,2,2,0,0),
        notch=TRUE, main="Notched Boxplot of Mean Daily Rainfall by Month (856 observations)", 
        xlab="Month", ylab="Rain in Inches", horizontal=FALSE, ylim=c(0, .4))
lines(myagg5$Month, myagg5$Amt, lty=2, col="blue")
legend("topleft", legend=c("Average"), col=c("blue"), lty=2)

boxplot(myagg0a$Amt~myagg0a$Week, col=seq(1:12), 
        notch=TRUE, main="Notched Boxplot of Weekly Rainfall", 
        xlab="Month", ylab="Rain in Inches", horizontal=TRUE)
## Warning in bxp(list(stats = structure(c(0, 0, 0.015, 0.0642857142857143, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

myagg2a=aggregate(Amt~Year,data=mydata, sum)

plot(myagg2a$Amt[2:72]~myagg2a$Year[2:72], type="l",
        main="Time Plot of Total Yearly Rainfall", 
        xlab="Year", ylab="Rain in Inches",ylim=c(0,55))
abline(lm(myagg2a$Amt[2:72]~myagg2a$Year[2:72]), col="red", lty=1)
lines(lowess(myagg2a$Year[2:72], myagg2a$Amt[2:72]), col="blue", lty=2)
legend("topleft", legend=c("Linear", "Loess"), col=c("red", "blue"), lty=1:2)

myagg0b=aggregate(Amt~Week, data=mydata, mean)
mylm=lm(myagg0b$Amt~myagg0b$Week+I(myagg0b$Week^2)+I(myagg0b$Week^3)+I(myagg0b$Week^4)+
            I(myagg0b$Week^5)+I(myagg0b$Week^6)+
             I(myagg0b$Week^7)+I(myagg0b$Week^8)+
             I(myagg0b$Week^9)+I(myagg0b$Week^10))
plot(myagg0b$Amt~myagg0b$Week, type="l",
        main="Time Plot of Mean Weekly Rainfall", 
        xlab="Week", ylab="Rain in Inches", ylim=c(0,.15))

abline(lm(myagg0b$Amt~myagg0b$Week), col="red", lty=1)
lines(myagg0b$Week, mylm$fitted.values, col="green",lty=2)
lines(lowess(myagg0b$Week, myagg0b$Amt), col="blue", lty=3)
legend("topleft", legend=c("Linear", "Polynomial","Loess"), col=c("red", "green", "blue"), lty=1:3)

#############################################

Time Series Decomposition

We broke down the time series by various frequencies and decomposed them.

#########Baseline Time Series################
myts1=ts(mydata$Amt, start=c(1946,9),frequency=365)  #daily
myts2=ts(myagg1$Amt, start=c(1946,9), frequency=12)  #monthly
myts3=ts(myagg2$Amt, start=c(1946,9), frequency=1)   #yearly
#############################################

#######Baseline Decomposition################
mydec1=decompose(myts1)
mydec2=decompose(myts2)
plot(mydec1)

plot(mydec2)

#############################################
rm(myagg1)
rm(myagg2)
rm(mydec1)
rm(mydec2)
plot(myts1, main="Daily Rainfall", ylab="Inches of Rain")

Auto Correlation Functions

We evaluated time dependency based on graphics.

par(mfrow=c(2,1))
myacf=acf(myts1)
mypacf=pacf(myts1)

newacf=as.vector(myacf$acf[2:45])
names(newacf)=seq(1:44)
Box.test(myts1, type="Ljung-Box", lag=10)$p.value
## [1] 0
barplot(newacf, ylim=c(-.2,.2), xlim=c(1,44),main="Autocorrelation Function", width=1,
        xlab="Lag", ylab="Correlation")
lines(seq(1:44), rep(0,44),col="red")
lines(seq(1:44), rep(1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
lines(seq(1:44), rep(-1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)

newpacf=as.vector(mypacf$acf[1:44])
names(newpacf)=seq(1:44)
barplot(newpacf, ylim=c(-.2,.2), xlim=c(1,44),main="Partial Autocorrelation Function", width=1,
        xlab="Lag", ylab="Correlation")
lines(seq(1:44), rep(0,44),col="red")
lines(seq(1:44), rep(1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
lines(seq(1:44), rep(-1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)

Naive Models

We start by evaluating naive models.

#####Naive##########################
naive=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain2.csv")
myts1=ts(naive$Amt, frequency=365)
train1=ts(myts1[1:18141],start=c(1946,1,1), frequency=365)
test1=ts(myts1[18142:25915],start=c(1996,9,14), frequency=365)

mynaive=naive(train1)
mynaive2=forecast(mynaive,365)

Seasonal Naive

Next, we look at seasonal naive models.

#####Seasonal Naive##########################

myn1=snaive(train1)
myn2=snaive(test1)
myn3=snaive(myts1)
accuracy(myn1)
##                         ME      RMSE       MAE  MPE MAPE MASE      ACF1
## Training set -0.0001085734 0.4727667 0.1530564 -Inf  Inf    1 0.1372529
accuracy(myn2)
##                        ME      RMSE       MAE  MPE MAPE MASE     ACF1
## Training set -0.000520988 0.5534272 0.1670833 -Inf  Inf    1 0.207811
accuracy(myn3)
##                        ME      RMSE       MAE  MPE MAPE MASE     ACF1
## Training set 0.0003917808 0.4960856 0.1566133 -Inf  Inf    1 0.163213

ETS Models

Then we look at Error Trend Seasonality models.

######ETS Forecast###########################
par(mfrow=c(1,1))
myets=stlf(train1, method="ets")
myetsf=forecast(myets,365)
myetsa=accuracy(myetsf, test1)
plot(myetsf)

myets$model #ETS(A,N,N)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = x, model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend) 
## 
##   Smoothing parameters:
##     alpha = 0.0052 
## 
##   Initial states:
##     l = 0.052 
## 
##   sigma:  0.304
## 
##      AIC     AICc      BIC 
## 134692.5 134692.5 134715.9
myetsa
##                         ME      RMSE       MAE MPE MAPE      MASE
## Training set -3.030006e-05 0.3039925 0.1251364 NaN  Inf 0.8175835
## Test set      3.123006e-02 0.3114883 0.1422783 NaN  Inf 0.9295810
##                   ACF1 Theil's U
## Training set 0.1354259        NA
## Test set     0.1444785       NaN
#############################################

ARIMA Models

ARIMA models follow.

######ETS Forecast###########################
par(mfrow=c(1,1))
myarima=stlf(train1, method="arima")
myarimaf=forecast(myarima,365)
myarimaa=accuracy(myarimaf, test1)
plot(myarima)

myarima$model  #AR5 after differencing
## Series: x 
## ARIMA(1,1,0) with drift 
## 
## Coefficients:
##           ar1   drift
##       -0.4416  0.0000
## s.e.   0.0067  0.0018
## 
## sigma^2 estimated as 0.128:  log likelihood=-7091.28
## AIC=14188.57   AICc=14188.57   BIC=14211.99
myarimaa
##                         ME     RMSE       MAE MPE MAPE      MASE
## Training set -0.0000019965 0.357703 0.1682972 NaN  Inf 1.0995766
## Test set      0.0712763588 0.317976 0.1521066 NaN  Inf 0.9937944
##                    ACF1 Theil's U
## Training set -0.1297951        NA
## Test set      0.1443016       NaN
#############################################

Volatility Models

Look at volatility.

mygarch=garchFit(data=myts1, formula=~arma(3,1,0)+garch(1,1),cond.dist="QMLE", trace=FALSE)
#QMLE assumes the normal distribution and runs quasi-maximum likelhood estimation.  
coef(mygarch)
##         mu        ar1        ar2        ar3      omega     alpha1 
## 0.05295248 0.24156939 0.03142640 0.01213119 0.01448042 0.14476714 
##      beta1 
## 0.79024242
summary(mygarch)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~arma(3, 1, 0) + garch(1, 1), data = myts1, 
##     cond.dist = "QMLE", trace = FALSE) 
## 
## Mean and Variance Equation:
##  data ~ arma(3, 1, 0) + garch(1, 1)
## <environment: 0x000000003c2e56f8>
##  [data = myts1]
## 
## Conditional Distribution:
##  QMLE 
## 
## Coefficient(s):
##       mu       ar1       ar2       ar3     omega    alpha1     beta1  
## 0.052952  0.241569  0.031426  0.012131  0.014480  0.144767  0.790242  
## 
## Std. Errors:
##  robust 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu      0.052952    0.003421   15.477  < 2e-16 ***
## ar1     0.241569    0.020633   11.708  < 2e-16 ***
## ar2     0.031426    0.012993    2.419  0.01558 *  
## ar3     0.012131    0.012619    0.961  0.33637    
## omega   0.014480    0.005182    2.794  0.00520 ** 
## alpha1  0.144767    0.046492    3.114  0.00185 ** 
## beta1   0.790242    0.060644   13.031  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -7533.194    normalized:  -0.2906886 
## 
## Description:
##  Thu Nov 08 17:50:59 2018 by user: lfult 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value   
##  Jarque-Bera Test   R    Chi^2  15139184  0         
##  Shapiro-Wilk Test  R    W      NA        NA        
##  Ljung-Box Test     R    Q(10)  16.95673  0.0753269 
##  Ljung-Box Test     R    Q(15)  25.58261  0.04264562
##  Ljung-Box Test     R    Q(20)  29.29234  0.08216074
##  Ljung-Box Test     R^2  Q(10)  5.049554  0.8878433 
##  Ljung-Box Test     R^2  Q(15)  12.49004  0.6416223 
##  Ljung-Box Test     R^2  Q(20)  13.338    0.8624153 
##  LM Arch Test       R    TR^2   11.93035  0.4512896 
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## 0.5819174 0.5841222 0.5819172 0.5826298
plot(mygarch@residuals)

rm(mygarch)

Zero-inflated Models

Look at zero inflated models.

newestdata=naive[,-c(1)]
newestdata$Month=as.factor(newestdata$Month)
newestdata$Day=as.factor(newestdata$Day)
newestdata$rained=newestdata$Amt
newestdata$rained[newestdata$rained>0]=1

train2=newestdata[1:18141,]
test2=newestdata[18142:length(newestdata$Amt),]
m1 <- glm(rained~Month+Year+Day, data = train2, family = binomial(link = logit))
m2 <- glm(Amt ~ Month+Year+Day, data = subset(train2, rained == 1), family = Gamma(link = log))




pred1 <- exp(predict(m1, test2))
pred2 <- exp(predict(m2, test2))
pred=pred1*pred2
residuals=test2$Amt-pred
me2=mean(residuals)
mae2=mean(abs(residuals))
myres=residuals^2
mymean=mean(myres)
rmse2=sqrt(mymean)
myacf=acf(residuals)

me2
## [1] -0.05575168
mae2
## [1] 0.1923451
rmse2
## [1] 0.3925828

Consider Time Series with Neural Net

Using External regressors of

mynnetdata=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain2.csv")
mymonth=mynnetdata$Month
myday=mynnetdata$Day
mymonthtrain=mymonth[1:18141]
mydaytrain=myday[1:18141]
newmat=as.matrix(cbind(mymonthtrain,mydaytrain))
mymonthtest=mymonth[18142:25915]
mydaytest=myday[18142:25915]
testmat=as.matrix(cbind(mymonthtest,mydaytest))
par(mfrow=c(2,2))
nnetdata=ts(mynnetdata$Amt, frequency=365)
mytrain=ts(nnetdata[1:18141], frequency=365)
mytest=ts(nnetdata[18142:25915], frequency=365)


for (i in 1:5){
fit = nnetar(mytrain,1,0,i)
print(accuracy(fit))
print(accuracy(forecast(mytest, model=fit)))
}
##                         ME      RMSE       MAE  MPE MAPE      MASE
## Training set -3.523269e-05 0.3268366 0.1296631 -Inf  Inf 0.8471593
##                    ACF1
## Training set 0.02315117
##                       ME      RMSE       MAE  MPE MAPE      MASE      ACF1
## Training set 0.007418162 0.3796378 0.1356071 -Inf  Inf 0.8116139 0.1085279
##                         ME      RMSE       MAE  MPE MAPE      MASE
## Training set -7.419475e-05 0.3264067 0.1291711 -Inf  Inf 0.8439449
##                     ACF1
## Training set 0.001134151
##                       ME    RMSE      MAE  MPE MAPE      MASE       ACF1
## Training set 0.007275209 0.37849 0.134927 -Inf  Inf 0.8075435 0.08926648
##                         ME      RMSE     MAE  MPE MAPE      MASE
## Training set -0.0001129182 0.3263355 0.12916 -Inf  Inf 0.8438719
##                       ACF1
## Training set -0.0002277007
##                       ME      RMSE       MAE  MPE MAPE      MASE
## Training set 0.007142711 0.3784578 0.1349968 -Inf  Inf 0.8079613
##                    ACF1
## Training set 0.08745968
##                         ME      RMSE       MAE  MPE MAPE      MASE
## Training set -8.742632e-05 0.3263047 0.1291122 -Inf  Inf 0.8435597
##                       ACF1
## Training set -0.0002408176
##                       ME      RMSE       MAE  MPE MAPE      MASE
## Training set 0.007094559 0.3784996 0.1350138 -Inf  Inf 0.8080631
##                    ACF1
## Training set 0.08747423
##                         ME      RMSE       MAE  MPE MAPE      MASE
## Training set -0.0001524993 0.3262893 0.1291539 -Inf  Inf 0.8438319
##                     ACF1
## Training set 1.84578e-05
##                       ME      RMSE       MAE  MPE MAPE      MASE
## Training set 0.006935154 0.3785834 0.1351687 -Inf  Inf 0.8089901
##                    ACF1
## Training set 0.08670584
totmat=as.matrix(mymonth,myday)
totalfit=nnetar(nnetdata,5,0,12, xreg=totmat)

Try Some Stochastic Models Based on Probability of Rain

##############One Period Stochastic############
myprop=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/PRain2.csv")
raindist=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rainfalldist.csv")

z=rep(0,7774)
a=rep(0,7774)
myamt=rep(0,7774)
for (i in 1:7774){
  z[i]=i%%365+1
}

for (i in 1:7774){
  pr=myprop$Prain[z[i]]
  a[i]=rbinom(1,1,pr)
  myvec=na.omit(raindist[,z[i]])
  myamt[i]=sample(myvec,1)
  
}
tottest=a*myamt


res_test=test1-tottest
mean(res_test)
## [1] 0.008937484
mean(abs(res_test))
## [1] 0.1581271
sqrt(mean(res_test^2))
## [1] 0.508748
##############Two Period Stochastic############
myprop2=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/PRain.csv")
raindist2=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rainfalldist2.csv")

z1=rep(0,7774)
a1=rep(0,7774)
myamt1=rep(0,7774)

for (i in 1:7774){
  z1[i]=i%%365+1
}

for (i in 1:7774){
  pr=myprop2$Prain[z[i]]
  a1[i]=rbinom(1,1,pr)
  myvec1=raindist2[,z[i]:z[i]+14]
  myvec1=as.matrix(myvec1)
  dim(myvec1)=c(ncol(myvec1)*nrow(myvec1))
  myvec1=as.vector(myvec1)
  myvec1=na.omit(myvec1)
  myamt1[i]=sample(myvec1,1)
  
}
totrain1=a1*myamt1


res_train=test1-totrain1

mean(res_train)
## [1] 0.002886545
mean(abs(res_train))
## [1] 0.162637
sqrt(mean(res_train^2))
## [1] 0.54034