Read the data and load packages.
mydata=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain.csv")
for (i in 1:length(mydata$Amt)){
if(mydata$Amt[i]>0) {mydata$rained[i]=1
}else {mydata$rained[i]=0}
}
##########Add the Date and the Week###########
mydata$date=as.Date(paste(mydata$Year, mydata$Month, mydata$Day, sep='-'))
mydata$Week=as.integer(strftime(mydata$date, format="%V"))
##############################################
#######Generate a subset for Days with Rain###
mydata2=subset(mydata, rained==1,select=c("Amt","Month", "Year", "Day"))
##############################################
#######Load Libraries#########################
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.5.1
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(qcc)
## Warning: package 'qcc' was built under R version 3.5.1
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
library(psych)
## Warning: package 'psych' was built under R version 3.5.1
library(MASS)
## Warning: package 'MASS' was built under R version 3.5.1
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 3.5.1
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.5.1
## Loading required package: npsurv
## Loading required package: lsei
library(pscl)
## Warning: package 'pscl' was built under R version 3.5.1
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:psych':
##
## logit
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(fGarch)
## Warning: package 'fGarch' was built under R version 3.5.1
## Loading required package: timeDate
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.5.1
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:zoo':
##
## time<-
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.5.1
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:psych':
##
## tr
library(pROC)
## Warning: package 'pROC' was built under R version 3.5.1
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(censReg)
## Warning: package 'censReg' was built under R version 3.5.1
## Loading required package: maxLik
## Warning: package 'maxLik' was built under R version 3.5.1
## Loading required package: miscTools
## Warning: package 'miscTools' was built under R version 3.5.1
##
## Attaching package: 'miscTools'
## The following object is masked from 'package:fBasics':
##
## triang
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
##
## Please cite the 'censReg' package as:
## Henningsen, Arne (2017). censReg: Censored Regression (Tobit) Models. R package version 0.5. http://CRAN.R-Project.org/package=censReg.
##
## If you have questions, suggestions, or comments regarding the 'censReg' package, please use a forum or 'tracker' at the R-Forge site of the 'sampleSelection' project:
## https://r-forge.r-project.org/projects/sampleselection/
library(tscount)
## Warning: package 'tscount' was built under R version 3.5.1
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.5.1
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:timeSeries':
##
## outlier
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:psych':
##
## outlier
library(mice)
## Warning: package 'mice' was built under R version 3.5.1
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
##
## melanoma
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(caret)
## Warning: package 'caret' was built under R version 3.5.1
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(tensorflow)
## Warning: package 'tensorflow' was built under R version 3.5.1
##
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
##
## train
library(keras)
## Warning: package 'keras' was built under R version 3.5.1
library(Amelia)
## Warning: package 'Amelia' was built under R version 3.5.1
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.5.1
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.1
##
## Attaching package: 'e1071'
## The following objects are masked from 'package:timeDate':
##
## kurtosis, skewness
library(xgboost)
## Warning: package 'xgboost' was built under R version 3.5.1
library(AnalyzeFMRI)
## Warning: package 'AnalyzeFMRI' was built under R version 3.5.1
## Loading required package: R.matlab
## Warning: package 'R.matlab' was built under R version 3.5.1
## R.matlab v3.6.2 (2018-09-26) successfully loaded. See ?R.matlab for help.
##
## Attaching package: 'R.matlab'
## The following object is masked from 'package:keras':
##
## evaluate
## The following object is masked from 'package:tensorflow':
##
## evaluate
## The following objects are masked from 'package:base':
##
## getOption, isOpen
## Loading required package: fastICA
## Warning: package 'fastICA' was built under R version 3.5.1
## Loading required package: tcltk
## Loading required package: tkrplot
## Warning: package 'tkrplot' was built under R version 3.5.1
library(EBImage)
## Warning: package 'EBImage' was built under R version 3.5.1
library(psych)
library(ResourceSelection)
## ResourceSelection 0.3-2 2017-02-28
library(MASS)
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:fBasics':
##
## densityPlot
## The following object is masked from 'package:boot':
##
## logit
## The following object is masked from 'package:psych':
##
## logit
library(ggplot2)
library(adabag)
## Warning: package 'adabag' was built under R version 3.5.1
## Loading required package: rpart
##
## Attaching package: 'rpart'
## The following object is masked from 'package:survival':
##
## solder
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.5.1
## Loading required package: doParallel
## Warning: package 'doParallel' was built under R version 3.5.1
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 3.5.1
## Loading required package: parallel
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 3.5.1
library(class)
library(simmer)
## Warning: package 'simmer' was built under R version 3.5.1
##
## Attaching package: 'simmer'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:lmtest':
##
## reset
library(simmer.plot)
## Warning: package 'simmer.plot' was built under R version 3.5.1
##
## Attaching package: 'simmer.plot'
## The following objects are masked from 'package:simmer':
##
## get_mon_arrivals, get_mon_attributes, get_mon_resources
library(parallel)
library(fpp)
library(reticulate)
## Warning: package 'reticulate' was built under R version 3.5.1
set.seed(1234)
memory.size(max=TRUE)
## [1] 296.56
##############################################
We evaluated rainfall by daily, weekly, monthly, and yearly periods.
###########Cross Tabs########################
myagg0=aggregate(Amt~Week, data=mydata, mean) #Weekly Average, 53 Obs
myagg0a=aggregate(Amt~Year+Week, data=mydata, mean) #Weekly Rollup, 3737 Obs
myagg1=aggregate(Amt~Year+Month, data=mydata, mean) #Monthly Rollup, 856 months
myagg2=aggregate(Amt~Year,data=mydata, mean) #Yearly Average, 72 Obs
myagg3=aggregate(Amt~Day, data=mydata, mean) #By Day of Month Average
myagg4=aggregate(Amt~Month+Day, data=mydata, mean) #By Day, 366 observations
myagg5=aggregate(Amt~Month, data=mydata, mean) #Monthly Averages, 12 Obs
myagg6=aggregate(rained~Day+Month, data=mydata, mean) #% Rained by day, 366 observations
#############################################
##########Baseline Descriptives##############
a1=unlist(describe(mydata$Amt))
a2=unlist(describe(mydata2$Amt))
a2a=unlist(describe(myagg0$Amt))
a2b=unlist(describe(myagg0a$Amt))
a3=unlist(describe(myagg1$Amt))
a4=unlist(describe(myagg2$Amt))
a5=unlist(describe(myagg3$Amt))
a6=unlist(describe(myagg4$Amt))
a7=unlist(describe(myagg5$Amt))
a8=unlist(describe(mydata$rained))
a9=unlist(describe(myagg6$rained))
a=cbind(a1,a2,a2a, a2b, a3,a4,a5,a6,a7,a8,a9)
colnames(a)=c("Daily Rainfall, All Days", "Daily Rainfall, Rainy Days",
"Avg. Rain by Week", "Avg Rain Sequenced Week",
"Avg. Rain by Sequenced Month", "Avg. Rain by Year",
"Avg. Ran by Day of Month", "Avg. Rain by Day",
"Avg. Rain by Month", "Proportion Rained Overall", "% Rain by Day of Year")
write.csv(t(a),"C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/desc2.csv" )
#############################################
We looked at histograms and boxplots by various time elements.
############Zero Inflated Analysis###########
p=ggplot(mydata, aes(Amt, fill = rained)) +
geom_histogram(aes(y=..count../sum(..count..))) +
facet_grid(rained ~ ., margins=TRUE, scales="free_y")
p + labs(title = "Distribution of Rain")+labs(y="Proportion")+labs(x="Rain in Inches") +xlim(-.1,2)+
guides(fill=guide_legend(title="Did it rain? 0=No"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 362 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing missing values (geom_bar).
########Baseline Histograms##################
h1=hist(mydata$Amt, plot=FALSE)$counts
h1a=hist(myagg0$Amt, plot=FALSE)$counts
h1b=hist(myagg0a$Amt, plot=FALSE)$counts
h2=hist(myagg1$Amt, plot=FALSE)$counts
h3=hist(myagg2$Amt, plot=FALSE)$counts
h4=hist(myagg3$Amt, plot=FALSE)$counts
h5=hist(myagg4$Amt, plot=FALSE)$counts
h6=hist(myagg5$Amt, plot=FALSE)$counts
n1=hist(mydata$Amt, plot=FALSE)$breaks
n1a=hist(myagg0$Amt, plot=FALSE)$breaks
n1b=hist(myagg0a$Amt, plot=FALSE)$breaks
n2=hist(myagg1$Amt, plot=FALSE)$breaks
n3=hist(myagg2$Amt, plot=FALSE)$breaks
n4=hist(myagg3$Amt, plot=FALSE)$breaks
n5=hist(myagg4$Amt, plot=FALSE)$breaks
n6=hist(myagg5$Amt, plot=FALSE)$breaks
x1=h1/sum(h1)
names(x1)=n1[1:length(n1)-1]
x1a=h1a/sum(h1a)
names(x1a)=n1a[1:length(n1a)-1]
x1b=h1b/sum(h1b)
names(x1b)=n1b[1:length(n1b)-1]
x2=h2/sum(h2)
names(x2)=n2[1:length(n2)-1]
x3=h3/sum(h3)
names(x3)=n3[1:length(n3)-1]
x4=h4/sum(h4)
names(x4)=n4[1:length(n4)-1]
x5=h5/sum(h5)
names(x5)=n5[1:length(n5)-1]
x6=h6/sum(h6)
names(x6)=n6[1:length(n6)-1]
par(mfrow=c(3,2))
barplot(x1b, ylim=c(0,1), space=0, col="light yellow", main="Avg. Rain by Week, n=53",
xlab="Rain in Inches")
barplot(x1a, ylim=c(0,1), space=0, col="yellow", main="Avg. Rain by Sequenced Week, n=3,737",
xlab="Rain in Inches")
barplot(x6,ylim=c(0,1), space=0, col="light green", main="Avg. Rain by Month, n=31",
xlab="Rain in Inches")
barplot(x3,ylim=c(0,1), space=0, col="green", main="Avg. Rain by Sequenced Month, n=856",
xlab="Rain in Inches")
barplot(x4,ylim=c(0,1), space=0, col="orange", main="Avg. Rain by Year, n=72",
xlab="Rain in Inches")
barplot(x5,ylim=c(0,1), space=0, col="red", main="Avg. Rain by Day of Month, n=31",
xlab="Rain in Inches")
#############################################
#######Notched Box Plot & Trend Check########
par(mfrow=c(1,1))
boxplot(myagg1$Amt~myagg1$Month, col=c(0,0,0,0,2,2,0,0,2,2,0,0),
notch=TRUE, main="Notched Boxplot of Mean Daily Rainfall by Month (856 observations)",
xlab="Month", ylab="Rain in Inches", horizontal=FALSE, ylim=c(0, .4))
lines(myagg5$Month, myagg5$Amt, lty=2, col="blue")
legend("topleft", legend=c("Average"), col=c("blue"), lty=2)
boxplot(myagg0a$Amt~myagg0a$Week, col=seq(1:12),
notch=TRUE, main="Notched Boxplot of Weekly Rainfall",
xlab="Month", ylab="Rain in Inches", horizontal=TRUE)
## Warning in bxp(list(stats = structure(c(0, 0, 0.015, 0.0642857142857143, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
myagg2a=aggregate(Amt~Year,data=mydata, sum)
plot(myagg2a$Amt[2:72]~myagg2a$Year[2:72], type="l",
main="Time Plot of Total Yearly Rainfall",
xlab="Year", ylab="Rain in Inches",ylim=c(0,55))
abline(lm(myagg2a$Amt[2:72]~myagg2a$Year[2:72]), col="red", lty=1)
lines(lowess(myagg2a$Year[2:72], myagg2a$Amt[2:72]), col="blue", lty=2)
legend("topleft", legend=c("Linear", "Loess"), col=c("red", "blue"), lty=1:2)
myagg0b=aggregate(Amt~Week, data=mydata, mean)
mylm=lm(myagg0b$Amt~myagg0b$Week+I(myagg0b$Week^2)+I(myagg0b$Week^3)+I(myagg0b$Week^4)+
I(myagg0b$Week^5)+I(myagg0b$Week^6)+
I(myagg0b$Week^7)+I(myagg0b$Week^8)+
I(myagg0b$Week^9)+I(myagg0b$Week^10))
plot(myagg0b$Amt~myagg0b$Week, type="l",
main="Time Plot of Mean Weekly Rainfall",
xlab="Week", ylab="Rain in Inches", ylim=c(0,.15))
abline(lm(myagg0b$Amt~myagg0b$Week), col="red", lty=1)
lines(myagg0b$Week, mylm$fitted.values, col="green",lty=2)
lines(lowess(myagg0b$Week, myagg0b$Amt), col="blue", lty=3)
legend("topleft", legend=c("Linear", "Polynomial","Loess"), col=c("red", "green", "blue"), lty=1:3)
#############################################
We broke down the time series by various frequencies and decomposed them.
#########Baseline Time Series################
myts1=ts(mydata$Amt, start=c(1946,9),frequency=365) #daily
myts2=ts(myagg1$Amt, start=c(1946,9), frequency=12) #monthly
myts3=ts(myagg2$Amt, start=c(1946,9), frequency=1) #yearly
#############################################
#######Baseline Decomposition################
mydec1=decompose(myts1)
mydec2=decompose(myts2)
plot(mydec1)
plot(mydec2)
#############################################
rm(myagg1)
rm(myagg2)
rm(mydec1)
rm(mydec2)
plot(myts1, main="Daily Rainfall", ylab="Inches of Rain")
We evaluated time dependency based on graphics.
par(mfrow=c(2,1))
myacf=acf(myts1)
mypacf=pacf(myts1)
newacf=as.vector(myacf$acf[2:45])
names(newacf)=seq(1:44)
Box.test(myts1, type="Ljung-Box", lag=10)$p.value
## [1] 0
barplot(newacf, ylim=c(-.2,.2), xlim=c(1,44),main="Autocorrelation Function", width=1,
xlab="Lag", ylab="Correlation")
lines(seq(1:44), rep(0,44),col="red")
lines(seq(1:44), rep(1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
lines(seq(1:44), rep(-1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
newpacf=as.vector(mypacf$acf[1:44])
names(newpacf)=seq(1:44)
barplot(newpacf, ylim=c(-.2,.2), xlim=c(1,44),main="Partial Autocorrelation Function", width=1,
xlab="Lag", ylab="Correlation")
lines(seq(1:44), rep(0,44),col="red")
lines(seq(1:44), rep(1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
lines(seq(1:44), rep(-1.96*sd(myts1)/sqrt(length(myts1)),44), col="blue", lty=2)
We start by evaluating naive models.
#####Naive##########################
naive=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain2.csv")
myts1=ts(naive$Amt, frequency=365)
train1=ts(myts1[1:18141],start=c(1946,1,1), frequency=365)
test1=ts(myts1[18142:25915],start=c(1996,9,14), frequency=365)
mynaive=naive(train1)
mynaive2=forecast(mynaive,365)
Next, we look at seasonal naive models.
#####Seasonal Naive##########################
myn1=snaive(train1)
myn2=snaive(test1)
myn3=snaive(myts1)
accuracy(myn1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.0001085734 0.4727667 0.1530564 -Inf Inf 1 0.1372529
accuracy(myn2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.000520988 0.5534272 0.1670833 -Inf Inf 1 0.207811
accuracy(myn3)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0003917808 0.4960856 0.1566133 -Inf Inf 1 0.163213
Then we look at Error Trend Seasonality models.
######ETS Forecast###########################
par(mfrow=c(1,1))
myets=stlf(train1, method="ets")
myetsf=forecast(myets,365)
myetsa=accuracy(myetsf, test1)
plot(myetsf)
myets$model #ETS(A,N,N)
## ETS(A,N,N)
##
## Call:
## ets(y = x, model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend)
##
## Smoothing parameters:
## alpha = 0.0052
##
## Initial states:
## l = 0.052
##
## sigma: 0.304
##
## AIC AICc BIC
## 134692.5 134692.5 134715.9
myetsa
## ME RMSE MAE MPE MAPE MASE
## Training set -3.030006e-05 0.3039925 0.1251364 NaN Inf 0.8175835
## Test set 3.123006e-02 0.3114883 0.1422783 NaN Inf 0.9295810
## ACF1 Theil's U
## Training set 0.1354259 NA
## Test set 0.1444785 NaN
#############################################
ARIMA models follow.
######ETS Forecast###########################
par(mfrow=c(1,1))
myarima=stlf(train1, method="arima")
myarimaf=forecast(myarima,365)
myarimaa=accuracy(myarimaf, test1)
plot(myarima)
myarima$model #AR5 after differencing
## Series: x
## ARIMA(1,1,0) with drift
##
## Coefficients:
## ar1 drift
## -0.4416 0.0000
## s.e. 0.0067 0.0018
##
## sigma^2 estimated as 0.128: log likelihood=-7091.28
## AIC=14188.57 AICc=14188.57 BIC=14211.99
myarimaa
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0000019965 0.357703 0.1682972 NaN Inf 1.0995766
## Test set 0.0712763588 0.317976 0.1521066 NaN Inf 0.9937944
## ACF1 Theil's U
## Training set -0.1297951 NA
## Test set 0.1443016 NaN
#############################################
Look at volatility.
mygarch=garchFit(data=myts1, formula=~arma(3,1,0)+garch(1,1),cond.dist="QMLE", trace=FALSE)
#QMLE assumes the normal distribution and runs quasi-maximum likelhood estimation.
coef(mygarch)
## mu ar1 ar2 ar3 omega alpha1
## 0.05295248 0.24156939 0.03142640 0.01213119 0.01448042 0.14476714
## beta1
## 0.79024242
summary(mygarch)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~arma(3, 1, 0) + garch(1, 1), data = myts1,
## cond.dist = "QMLE", trace = FALSE)
##
## Mean and Variance Equation:
## data ~ arma(3, 1, 0) + garch(1, 1)
## <environment: 0x000000003c2e56f8>
## [data = myts1]
##
## Conditional Distribution:
## QMLE
##
## Coefficient(s):
## mu ar1 ar2 ar3 omega alpha1 beta1
## 0.052952 0.241569 0.031426 0.012131 0.014480 0.144767 0.790242
##
## Std. Errors:
## robust
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.052952 0.003421 15.477 < 2e-16 ***
## ar1 0.241569 0.020633 11.708 < 2e-16 ***
## ar2 0.031426 0.012993 2.419 0.01558 *
## ar3 0.012131 0.012619 0.961 0.33637
## omega 0.014480 0.005182 2.794 0.00520 **
## alpha1 0.144767 0.046492 3.114 0.00185 **
## beta1 0.790242 0.060644 13.031 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -7533.194 normalized: -0.2906886
##
## Description:
## Thu Nov 08 17:50:59 2018 by user: lfult
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 15139184 0
## Shapiro-Wilk Test R W NA NA
## Ljung-Box Test R Q(10) 16.95673 0.0753269
## Ljung-Box Test R Q(15) 25.58261 0.04264562
## Ljung-Box Test R Q(20) 29.29234 0.08216074
## Ljung-Box Test R^2 Q(10) 5.049554 0.8878433
## Ljung-Box Test R^2 Q(15) 12.49004 0.6416223
## Ljung-Box Test R^2 Q(20) 13.338 0.8624153
## LM Arch Test R TR^2 11.93035 0.4512896
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 0.5819174 0.5841222 0.5819172 0.5826298
plot(mygarch@residuals)
rm(mygarch)
Look at zero inflated models.
newestdata=naive[,-c(1)]
newestdata$Month=as.factor(newestdata$Month)
newestdata$Day=as.factor(newestdata$Day)
newestdata$rained=newestdata$Amt
newestdata$rained[newestdata$rained>0]=1
train2=newestdata[1:18141,]
test2=newestdata[18142:length(newestdata$Amt),]
m1 <- glm(rained~Month+Year+Day, data = train2, family = binomial(link = logit))
m2 <- glm(Amt ~ Month+Year+Day, data = subset(train2, rained == 1), family = Gamma(link = log))
pred1 <- exp(predict(m1, test2))
pred2 <- exp(predict(m2, test2))
pred=pred1*pred2
residuals=test2$Amt-pred
me2=mean(residuals)
mae2=mean(abs(residuals))
myres=residuals^2
mymean=mean(myres)
rmse2=sqrt(mymean)
myacf=acf(residuals)
me2
## [1] -0.05575168
mae2
## [1] 0.1923451
rmse2
## [1] 0.3925828
Using External regressors of
mynnetdata=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rain2.csv")
mymonth=mynnetdata$Month
myday=mynnetdata$Day
mymonthtrain=mymonth[1:18141]
mydaytrain=myday[1:18141]
newmat=as.matrix(cbind(mymonthtrain,mydaytrain))
mymonthtest=mymonth[18142:25915]
mydaytest=myday[18142:25915]
testmat=as.matrix(cbind(mymonthtest,mydaytest))
par(mfrow=c(2,2))
nnetdata=ts(mynnetdata$Amt, frequency=365)
mytrain=ts(nnetdata[1:18141], frequency=365)
mytest=ts(nnetdata[18142:25915], frequency=365)
for (i in 1:5){
fit = nnetar(mytrain,1,0,i)
print(accuracy(fit))
print(accuracy(forecast(mytest, model=fit)))
}
## ME RMSE MAE MPE MAPE MASE
## Training set -3.523269e-05 0.3268366 0.1296631 -Inf Inf 0.8471593
## ACF1
## Training set 0.02315117
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.007418162 0.3796378 0.1356071 -Inf Inf 0.8116139 0.1085279
## ME RMSE MAE MPE MAPE MASE
## Training set -7.419475e-05 0.3264067 0.1291711 -Inf Inf 0.8439449
## ACF1
## Training set 0.001134151
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.007275209 0.37849 0.134927 -Inf Inf 0.8075435 0.08926648
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0001129182 0.3263355 0.12916 -Inf Inf 0.8438719
## ACF1
## Training set -0.0002277007
## ME RMSE MAE MPE MAPE MASE
## Training set 0.007142711 0.3784578 0.1349968 -Inf Inf 0.8079613
## ACF1
## Training set 0.08745968
## ME RMSE MAE MPE MAPE MASE
## Training set -8.742632e-05 0.3263047 0.1291122 -Inf Inf 0.8435597
## ACF1
## Training set -0.0002408176
## ME RMSE MAE MPE MAPE MASE
## Training set 0.007094559 0.3784996 0.1350138 -Inf Inf 0.8080631
## ACF1
## Training set 0.08747423
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0001524993 0.3262893 0.1291539 -Inf Inf 0.8438319
## ACF1
## Training set 1.84578e-05
## ME RMSE MAE MPE MAPE MASE
## Training set 0.006935154 0.3785834 0.1351687 -Inf Inf 0.8089901
## ACF1
## Training set 0.08670584
totmat=as.matrix(mymonth,myday)
totalfit=nnetar(nnetdata,5,0,12, xreg=totmat)
##############One Period Stochastic############
myprop=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/PRain2.csv")
raindist=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rainfalldist.csv")
z=rep(0,7774)
a=rep(0,7774)
myamt=rep(0,7774)
for (i in 1:7774){
z[i]=i%%365+1
}
for (i in 1:7774){
pr=myprop$Prain[z[i]]
a[i]=rbinom(1,1,pr)
myvec=na.omit(raindist[,z[i]])
myamt[i]=sample(myvec,1)
}
tottest=a*myamt
res_test=test1-tottest
mean(res_test)
## [1] 0.008937484
mean(abs(res_test))
## [1] 0.1581271
sqrt(mean(res_test^2))
## [1] 0.508748
##############Two Period Stochastic############
myprop2=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/PRain.csv")
raindist2=read.csv("C:/Users/lfult/OneDrive - Texas State University/Publications-Complete & Working/Sustainability/rainfalldist2.csv")
z1=rep(0,7774)
a1=rep(0,7774)
myamt1=rep(0,7774)
for (i in 1:7774){
z1[i]=i%%365+1
}
for (i in 1:7774){
pr=myprop2$Prain[z[i]]
a1[i]=rbinom(1,1,pr)
myvec1=raindist2[,z[i]:z[i]+14]
myvec1=as.matrix(myvec1)
dim(myvec1)=c(ncol(myvec1)*nrow(myvec1))
myvec1=as.vector(myvec1)
myvec1=na.omit(myvec1)
myamt1[i]=sample(myvec1,1)
}
totrain1=a1*myamt1
res_train=test1-totrain1
mean(res_train)
## [1] 0.002886545
mean(abs(res_train))
## [1] 0.162637
sqrt(mean(res_train^2))
## [1] 0.54034