Time Series analysis
Packages Importing
Now we Import the Data Available And Create a time series function
using only the past 30 years data we create the time series.
Assam <- read_excel("Assam.xlsx")
Time series
# Time series Plot
Assam%>% group_by(Year,Month) %>%
summarise(Rainfall=sum(Rainfall)) %>% ungroup %>% transmute(Rainfall) %>%
ts(start=c(1901,1),freq=12) -> rain_ts
## `summarise()` regrouping output by 'Year' (override with `.groups` argument)
rain_ts %>% window(c(1901,1),c(1902,12))
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1901 223.0 464.1 1.2 19.5 27.1 430.6 524.9 30.6 207.0 115.6 163.7 291.4
## 1902 350.0 536.0 1.3 10.2 9.3 510.8 620.7 105.6 262.1 7.8 97.0 441.3
rain_ts1<-window(rain_ts, start=c(1995,1), end=c(2015, 12))
rain_ts1
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1995 145.7 543.3 7.1 39.7 8.7 558.4 663.3 42.0 299.1 94.0 80.7 404.0
## 1996 99.2 421.8 0.2 23.2 12.8 499.6 397.7 101.6 415.6 3.5 267.2 224.4
## 1997 131.9 315.5 35.4 40.7 18.2 511.2 569.9 91.8 241.8 12.6 36.4 366.8
## 1998 185.7 549.5 0.4 31.5 11.8 604.4 581.2 136.8 250.6 48.9 190.0 212.3
## 1999 150.6 487.5 2.3 0.5 2.4 596.7 429.0 38.7 466.8 14.1 208.6 196.9
## 2000 238.5 513.2 0.4 13.7 20.5 313.3 540.2 65.9 375.1 22.2 83.9 299.9
## 2001 198.6 295.6 2.3 39.7 5.5 451.1 399.8 30.9 266.4 20.5 201.4 266.8
## 2002 279.9 312.5 7.9 5.7 20.2 565.1 502.5 83.3 286.9 69.4 49.5 209.7
## 2003 213.8 290.7 18.6 41.5 9.5 481.2 551.4 96.9 235.8 13.0 231.8 250.0
## 2004 340.8 286.2 6.6 19.5 15.3 848.4 397.9 72.8 342.2 6.5 340.8 317.2
## 2005 207.5 501.9 1.3 30.5 22.8 474.5 373.9 172.2 281.2 8.9 218.6 174.9
## 2006 169.0 209.7 10.7 40.9 0.7 350.3 395.9 20.8 325.7 25.1 97.6 252.3
## 2007 242.7 308.0 4.0 73.7 1.5 682.7 489.7 23.5 219.8 47.7 164.7 494.7
## 2008 156.2 508.6 1.6 14.5 29.7 493.2 414.6 107.5 210.6 2.2 133.4 267.5
## 2009 134.9 533.8 4.0 11.6 7.9 426.3 321.1 44.9 246.5 9.0 135.9 192.3
## 2010 389.3 387.0 7.6 3.1 0.5 415.1 569.7 99.6 393.4 11.6 116.2 318.3
## 2011 92.1 302.6 3.5 11.4 11.1 395.8 316.0 109.0 238.3 11.9 30.2 221.6
## 2012 279.1 289.2 2.3 6.9 15.2 444.3 729.7 28.8 185.8 17.1 199.4 411.6
## 2013 112.8 289.7 2.0 9.6 1.1 367.8 286.2 44.0 346.7 1.0 126.3 229.3
## 2014 51.5 484.6 0.4 28.3 2.0 374.4 426.4 29.3 351.1 3.0 35.0 420.2
## 2015 250.9 590.9 15.2 15.5 13.4 300.1 558.5 37.5 332.5 14.0 62.6 279.9
Plotting the Timeseries data
autoplot(rain_ts1) + ylab("Rainfall (mm2)") + xlab("Datetime") +
scale_x_date(date_labels = '%b - %Y', breaks = '2 year', minor_breaks = '2 month') +
theme_bw() + ggtitle("Assam Meghalaya Rainfall 1995 - 2016")

Decomposition using stl()
This Shows the Compostition of the data i.e its Seasonal, trend and irregular trend
decomp <- stl(rain_ts1[,1], s.window = 'periodic')
Plot decomposition
autoplot(decomp) + theme_bw() + scale_x_date(date_labels = '%b - %Y', breaks = '2 year', minor_breaks = '2 month') +
ggtitle("Remainder")

check for trend strength and seasonal strength
Tt <- trendcycle(decomp)
St <- seasonal(decomp)
Rt <- remainder(decomp)
Trend Strength Calculation
Ft <- round(max(0,1 - (var(Rt)/var(Tt + Rt))),1)
Seasonal Strength Calculation
Fs <- round(max(0,1 - (var(Rt)/var(St + Rt))),1)
Creating a data frame
data.frame('Trend Strength' = Ft , 'Seasonal Strength' = Fs)
note: 0.1 trend and 0.9 seasonal observed(seasonal analysis is done)
We choose the seasonal plot and analysis is done
Seasonal Plot
seasonplot(rain_ts1, year.labels = TRUE, col = 1:21,labelgap = -0.17,
main = "Seasonal Plot", ylab= "Rainfall (mm2)")

Some easy to view seasonal plots.
Seasonal Sub-Series Plot
seasplot(rain_ts1, outplot = 3, trend = FALSE,
main = "Seasonal Subseries Plot", ylab= "Rainfall (mm2)")

## Results of statistical testing
## Presence of trend not tested.
## Evidence of seasonality: TRUE (pval: 0)
Seasonal Density
seasplot(rain_ts1, outplot = 5, trend = FALSE,
main = "Seasonal Box Plot", ylab= "Rainfall (mm2)")

## Results of statistical testing
## Presence of trend not tested.
## Evidence of seasonality: TRUE (pval: 0)
We create 2 sets one to train another to use test
Create Train Set
rain_train <- window(rain_ts1, end = c(2014,12))
Create Test Set
rain_test <- window(rain_ts1, start = c(2015,1))
Conducting statistical tests for ARIMA model
Kwiatkowski–Phillips–Schmidt–Shin Test
summary(ur.kpss(rain_train))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.316
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Dickey-Fuller Test
summary(ur.df(rain_train))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -357.41 -89.95 40.76 212.05 838.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.38274 0.06260 -6.114 3.99e-09 ***
## z.diff.lag -0.22497 0.06324 -3.557 0.000453 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 232.3 on 236 degrees of freedom
## Multiple R-squared: 0.2863, Adjusted R-squared: 0.2803
## F-statistic: 47.34 on 2 and 236 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -6.1143
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
ARIMA MODEL
acf2(rain_train)

## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.03 -0.20 -0.32 -0.24 0.36 0.10 0.31 -0.22 -0.33 -0.19 0.00 0.76
## PACF -0.03 -0.21 -0.35 -0.40 0.15 -0.11 0.35 -0.07 -0.05 -0.33 -0.13 0.56
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.02 -0.18 -0.30 -0.23 0.29 0.06 0.32 -0.23 -0.31 -0.19 -0.01 0.75
## PACF 0.01 0.02 0.02 -0.01 -0.17 -0.17 0.07 -0.14 -0.02 -0.12 -0.05 0.29
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.00 -0.19 -0.30 -0.22 0.31 0.10 0.30 -0.21 -0.29 -0.17 -0.01 0.72
## PACF 0.12 -0.03 -0.03 0.03 0.02 0.02 0.01 0.02 0.05 0.01 -0.11 0.13
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF -0.01 -0.18 -0.27 -0.19 0.28 0.08 0.25 -0.19 -0.27 -0.17 0 0.65
## PACF 0.00 -0.05 0.04 0.11 -0.02 0.02 -0.11 -0.06 0.00 -0.01 0 0.05
checking for best model
fit1 <- Arima(rain_train, order = c(1,0,2), seasonal = c(1,0,2))
fit2 <- Arima(rain_train, order = c(2,0,2), seasonal = c(2,0,2))
fit3 <- Arima(rain_train, order = c(1,0,1), seasonal = c(1,0,1))
fit4 <- Arima(rain_train, order = c(0,0,1), seasonal = c(2,1,2))
fit5 <- Arima(rain_train, order = c(0,0,2), seasonal = c(0,0,2))
fit6 <- auto.arima(rain_train, stepwise = FALSE,
approximation = FALSE)
comparing aicc values lesser the better by creating a data frane
data.frame('Model-1' = fit1$aicc, 'Model-2' = fit2$aicc,
'Model-3' = fit3$aicc,
'Model-4' = fit4$aicc,
'Model-5' = fit5$aicc,'Auto.Arima'= fit6$aicc,
row.names = "AICc Value")
model 4 gives the lowest so we chose model 4
checking residuals for the forecast
checkresiduals(fit4)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(2,1,2)[12]
## Q* = 34.214, df = 19, p-value = 0.01734
##
## Model df: 5. Total lags used: 24
the residual have some increasing and decreasing pattern hence can be use for forecasting.(noise)
Computing forecasting
Creating the Model
ARIMA_Model <- Arima(rain_ts1, order = c(0,0,1),
seasonal = c(2,1,2))
ARIMA Model Forecast. we forecast for the next 5 years i.e next 60 months
autoplot(forecast(ARIMA_Model, h=60)) + theme_bw() +
ylab("Rainfall (mm2)") + xlab("Datetime") +
scale_x_date(date_labels = '%b - %Y',
breaks = '2 year', minor_breaks = '2 month') +
theme_bw() + ggtitle("Assam Meghalaya Rainfall Forecast
ARIMA Model")

The blue coloured lines are rainfall predicted by the model.