Time Series analysis

Packages Importing

Now we Import the Data Available And Create a time series function

using only the past 30 years data we create the time series.

Assam <- read_excel("Assam.xlsx")

Time series

# Time series Plot
Assam%>%  group_by(Year,Month) %>% 
  summarise(Rainfall=sum(Rainfall)) %>% ungroup %>% transmute(Rainfall) %>% 
  ts(start=c(1901,1),freq=12) -> rain_ts
## `summarise()` regrouping output by 'Year' (override with `.groups` argument)
rain_ts %>% window(c(1901,1),c(1902,12))
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1901 223.0 464.1   1.2  19.5  27.1 430.6 524.9  30.6 207.0 115.6 163.7 291.4
## 1902 350.0 536.0   1.3  10.2   9.3 510.8 620.7 105.6 262.1   7.8  97.0 441.3
rain_ts1<-window(rain_ts, start=c(1995,1), end=c(2015, 12))

rain_ts1
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1995 145.7 543.3   7.1  39.7   8.7 558.4 663.3  42.0 299.1  94.0  80.7 404.0
## 1996  99.2 421.8   0.2  23.2  12.8 499.6 397.7 101.6 415.6   3.5 267.2 224.4
## 1997 131.9 315.5  35.4  40.7  18.2 511.2 569.9  91.8 241.8  12.6  36.4 366.8
## 1998 185.7 549.5   0.4  31.5  11.8 604.4 581.2 136.8 250.6  48.9 190.0 212.3
## 1999 150.6 487.5   2.3   0.5   2.4 596.7 429.0  38.7 466.8  14.1 208.6 196.9
## 2000 238.5 513.2   0.4  13.7  20.5 313.3 540.2  65.9 375.1  22.2  83.9 299.9
## 2001 198.6 295.6   2.3  39.7   5.5 451.1 399.8  30.9 266.4  20.5 201.4 266.8
## 2002 279.9 312.5   7.9   5.7  20.2 565.1 502.5  83.3 286.9  69.4  49.5 209.7
## 2003 213.8 290.7  18.6  41.5   9.5 481.2 551.4  96.9 235.8  13.0 231.8 250.0
## 2004 340.8 286.2   6.6  19.5  15.3 848.4 397.9  72.8 342.2   6.5 340.8 317.2
## 2005 207.5 501.9   1.3  30.5  22.8 474.5 373.9 172.2 281.2   8.9 218.6 174.9
## 2006 169.0 209.7  10.7  40.9   0.7 350.3 395.9  20.8 325.7  25.1  97.6 252.3
## 2007 242.7 308.0   4.0  73.7   1.5 682.7 489.7  23.5 219.8  47.7 164.7 494.7
## 2008 156.2 508.6   1.6  14.5  29.7 493.2 414.6 107.5 210.6   2.2 133.4 267.5
## 2009 134.9 533.8   4.0  11.6   7.9 426.3 321.1  44.9 246.5   9.0 135.9 192.3
## 2010 389.3 387.0   7.6   3.1   0.5 415.1 569.7  99.6 393.4  11.6 116.2 318.3
## 2011  92.1 302.6   3.5  11.4  11.1 395.8 316.0 109.0 238.3  11.9  30.2 221.6
## 2012 279.1 289.2   2.3   6.9  15.2 444.3 729.7  28.8 185.8  17.1 199.4 411.6
## 2013 112.8 289.7   2.0   9.6   1.1 367.8 286.2  44.0 346.7   1.0 126.3 229.3
## 2014  51.5 484.6   0.4  28.3   2.0 374.4 426.4  29.3 351.1   3.0  35.0 420.2
## 2015 250.9 590.9  15.2  15.5  13.4 300.1 558.5  37.5 332.5  14.0  62.6 279.9

Plotting the Timeseries data

autoplot(rain_ts1) + ylab("Rainfall (mm2)") + xlab("Datetime") + 
  scale_x_date(date_labels = '%b - %Y', breaks = '2 year', minor_breaks = '2 month') +
  theme_bw() + ggtitle("Assam Meghalaya Rainfall 1995 - 2016")

Decomposition using stl()

This Shows the Compostition of the data i.e its Seasonal, trend and irregular trend

decomp <- stl(rain_ts1[,1], s.window = 'periodic')

Plot decomposition

autoplot(decomp) + theme_bw() + scale_x_date(date_labels = '%b - %Y', breaks = '2 year', minor_breaks = '2 month') +
  ggtitle("Remainder")

check for trend strength and seasonal strength

Tt <- trendcycle(decomp)
St <- seasonal(decomp)
Rt <- remainder(decomp)

Trend Strength Calculation

Ft <- round(max(0,1 - (var(Rt)/var(Tt + Rt))),1)

Seasonal Strength Calculation

Fs <- round(max(0,1 - (var(Rt)/var(St + Rt))),1)

Creating a data frame

data.frame('Trend Strength' = Ft , 'Seasonal Strength' = Fs)

note: 0.1 trend and 0.9 seasonal observed(seasonal analysis is done)

We choose the seasonal plot and analysis is done

Seasonal Plot

seasonplot(rain_ts1, year.labels = TRUE, col = 1:21,labelgap = -0.17,
           main =  "Seasonal Plot", ylab= "Rainfall (mm2)")

Some easy to view seasonal plots.

Seasonal Sub-Series Plot

seasplot(rain_ts1, outplot = 3, trend = FALSE, 
         main = "Seasonal Subseries Plot", ylab= "Rainfall (mm2)")

## Results of statistical testing
## Presence of trend not tested.
## Evidence of seasonality: TRUE  (pval: 0)

Seasonal Density

seasplot(rain_ts1, outplot = 5, trend = FALSE, 
         main = "Seasonal Box Plot", ylab= "Rainfall (mm2)")

## Results of statistical testing
## Presence of trend not tested.
## Evidence of seasonality: TRUE  (pval: 0)

We create 2 sets one to train another to use test

Create Train Set

rain_train <- window(rain_ts1, end = c(2014,12))

Create Test Set

rain_test <- window(rain_ts1, start = c(2015,1))

Conducting statistical tests for ARIMA model

Kwiatkowski–Phillips–Schmidt–Shin Test

summary(ur.kpss(rain_train))
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.316 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Dickey-Fuller Test

summary(ur.df(rain_train)) 
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -357.41  -89.95   40.76  212.05  838.01 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -0.38274    0.06260  -6.114 3.99e-09 ***
## z.diff.lag -0.22497    0.06324  -3.557 0.000453 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 232.3 on 236 degrees of freedom
## Multiple R-squared:  0.2863, Adjusted R-squared:  0.2803 
## F-statistic: 47.34 on 2 and 236 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -6.1143 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

ARIMA MODEL

acf2(rain_train)

##       [,1]  [,2]  [,3]  [,4] [,5]  [,6] [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  -0.03 -0.20 -0.32 -0.24 0.36  0.10 0.31 -0.22 -0.33 -0.19  0.00  0.76
## PACF -0.03 -0.21 -0.35 -0.40 0.15 -0.11 0.35 -0.07 -0.05 -0.33 -0.13  0.56
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF  -0.02 -0.18 -0.30 -0.23  0.29  0.06  0.32 -0.23 -0.31 -0.19 -0.01  0.75
## PACF  0.01  0.02  0.02 -0.01 -0.17 -0.17  0.07 -0.14 -0.02 -0.12 -0.05  0.29
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.00 -0.19 -0.30 -0.22  0.31  0.10  0.30 -0.21 -0.29 -0.17 -0.01  0.72
## PACF  0.12 -0.03 -0.03  0.03  0.02  0.02  0.01  0.02  0.05  0.01 -0.11  0.13
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF  -0.01 -0.18 -0.27 -0.19  0.28  0.08  0.25 -0.19 -0.27 -0.17     0  0.65
## PACF  0.00 -0.05  0.04  0.11 -0.02  0.02 -0.11 -0.06  0.00 -0.01     0  0.05

checking for best model

fit1 <- Arima(rain_train, order = c(1,0,2), seasonal = c(1,0,2))
fit2 <- Arima(rain_train, order = c(2,0,2), seasonal = c(2,0,2))
fit3 <- Arima(rain_train, order = c(1,0,1), seasonal = c(1,0,1))
fit4 <- Arima(rain_train, order = c(0,0,1), seasonal = c(2,1,2))
fit5 <- Arima(rain_train, order = c(0,0,2), seasonal = c(0,0,2))
fit6 <- auto.arima(rain_train, stepwise = FALSE, 
                   approximation = FALSE)

comparing aicc values lesser the better by creating a data frane

data.frame('Model-1' = fit1$aicc, 'Model-2' = fit2$aicc, 
           'Model-3' = fit3$aicc,
           'Model-4' = fit4$aicc, 
           'Model-5' =  fit5$aicc,'Auto.Arima'= fit6$aicc,
           row.names =   "AICc Value")

model 4 gives the lowest so we chose model 4

checking residuals for the forecast

checkresiduals(fit4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,1)(2,1,2)[12]
## Q* = 34.214, df = 19, p-value = 0.01734
## 
## Model df: 5.   Total lags used: 24

the residual have some increasing and decreasing pattern hence can be use for forecasting.(noise)

Computing forecasting

Creating the Model

ARIMA_Model <- Arima(rain_ts1, order = c(0,0,1), 
                     seasonal = c(2,1,2))

ARIMA Model Forecast. we forecast for the next 5 years i.e next 60 months

autoplot(forecast(ARIMA_Model, h=60)) + theme_bw() +
  ylab("Rainfall (mm2)") + xlab("Datetime") + 
  scale_x_date(date_labels = '%b - %Y', 
               breaks = '2 year', minor_breaks = '2 month') +
  theme_bw() + ggtitle("Assam Meghalaya Rainfall Forecast  
  ARIMA Model")

The blue coloured lines are rainfall predicted by the model.