MRF Stock Prediction using Auto Arima Forecast

AUTO REGRESSIVE INTEGRATED MOVING AVERAGE MODELS

#install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
# install.packages("forecast",repos = "http://cran.us.r-project.org")
# install.packages("fpp",repos = "http://cran.us.r-project.org")
# install.packages("smooth",repos = "http://cran.us.r-project.org")
# install.packages("readxl",repos = "http://cran.us.r-project.org")
# install.packages("tseries",repos = "http://cran.us.r-project.org")
# install.packages("lubridate",repos = "http://cran.us.r-project.org")
# install.packages("dlypr",repos = "http://cran.us.r-project.org")

library(forecast)
## Warning: package 'forecast' was built under R version 3.5.1
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.1
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(smooth)
## Warning: package 'smooth' was built under R version 3.5.1
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 3.5.1
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
library(tseries)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.1
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:smooth':
## 
##     hm
## The following object is masked from 'package:base':
## 
##     date
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Using Arima Model - 
Stock<-read_excel(file.choose()) # read the Stock data
class(Stock)
## [1] "tbl_df"     "tbl"        "data.frame"
stockMon <- as.yearmon(Stock$Date)
stockMon <- as.data.frame(stockMon)
class(stockMon) 
## [1] "data.frame"
stockdat <- cbind(stockMon,Stock$Price)
agg <- aggregate(stockdat, by = list(stockdat$stockMon), FUN = median)
stockFin <- agg$`Stock$Price`

# ACF, PACF and Dickey-Fuller Test
acf(stockFin, lag.max = 20)

pacf(stockFin, lag.max = 20)

difflnstock=diff(stockFin,1)
difflnstock
##   [1]   -59.250    -5.575    28.425    12.200    -4.000    11.050    56.125
##   [8]   244.175     7.950   205.350    59.400    -0.750   212.100   122.000
##  [15]   460.425   -51.725  -288.450  -173.925   -67.150  -189.025  -127.675
##  [22]    37.375   505.150   174.150   -37.575   -30.275   432.800    20.900
##  [29]   199.675   286.600  -202.475  -215.325   -45.550   -93.725   259.300
##  [36]   477.425   -79.150  -185.525    -9.275  -115.425   338.975    66.475
##  [43]   571.200  -420.350  -876.125    98.375  1073.700   258.400   586.400
##  [50]  -127.975     0.850   -66.400  -337.375  -533.900   -44.300   523.600
##  [57]    42.000   219.975  -267.100   -35.825   615.075  2862.425   -47.500
##  [64]  -927.000 -1663.000  -562.500    22.150    89.350  -288.000  -623.050
##  [71]   172.050  -172.000  -925.500  -500.025    35.525   -86.000  -157.000
##  [78]   -35.050   580.050   195.000   993.950     6.050   895.100   919.900
##  [85]   481.175   287.825    56.000   260.000  -475.850   550.250   697.800
##  [92]   222.950   155.350   290.150  -246.850  1469.500   265.200  -424.450
##  [99] -1495.350  -677.650  -719.150   480.900   209.225   341.325  -175.800
## [106]   668.250  -602.800    55.500  -296.050   141.325   256.900   221.825
## [113]  2203.700   441.250  1101.050  -412.800  -160.700  -236.200   517.300
## [120]  -455.650    27.250   -96.100  2183.100   996.500  -661.200  -683.200
## [127]   275.400  2669.200  -611.400 -1467.200   177.000   -29.000  2198.500
## [134]  1461.900  2666.300   -62.700  -223.200  1916.800  1347.300
adf.test(stockFin)
## Warning in adf.test(stockFin): p-value greater than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  stockFin
## Dickey-Fuller = 0.45594, Lag order = 5, p-value = 0.99
## alternative hypothesis: stationary
adf.test(difflnstock)
## Warning in adf.test(difflnstock): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  difflnstock
## Dickey-Fuller = -4.4661, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
stock2 <- cbind(agg$stockMon,agg$`Stock$Price`)
plot(stock2)

class(stockFin)
## [1] "numeric"
Stock <- as.ts(stockFin)
View(Stock)
class(Stock)
## [1] "ts"
Stock1 <- ts(Stock,start=c(2002,9),end=c(2014,4),frequency=12)


start(Stock1)
## [1] 2002    9
end(Stock1)
## [1] 2014    4
class(Stock1)
## [1] "ts"
sum(is.na(Stock1))
## [1] 0
summary(Stock1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   843.8  2394.3  3999.2  5712.9  7293.5 22290.7
View(Stock1)

# decomdata<- decompose(Stock1, "additive")
decomdata<- decompose(Stock1, "multiplicative")
plot(decomdata)

plot(decomdata$seasonal)

plot(decomdata$trend)

plot(decomdata$random)

# EDA on the Original Data
plot(Stock1)
abline(reg=lm(Stock1~time(Stock1)))

cycle(Stock1)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2002                                   9  10  11  12
## 2003   1   2   3   4   5   6   7   8   9  10  11  12
## 2004   1   2   3   4   5   6   7   8   9  10  11  12
## 2005   1   2   3   4   5   6   7   8   9  10  11  12
## 2006   1   2   3   4   5   6   7   8   9  10  11  12
## 2007   1   2   3   4   5   6   7   8   9  10  11  12
## 2008   1   2   3   4   5   6   7   8   9  10  11  12
## 2009   1   2   3   4   5   6   7   8   9  10  11  12
## 2010   1   2   3   4   5   6   7   8   9  10  11  12
## 2011   1   2   3   4   5   6   7   8   9  10  11  12
## 2012   1   2   3   4   5   6   7   8   9  10  11  12
## 2013   1   2   3   4   5   6   7   8   9  10  11  12
## 2014   1   2   3   4
# Boxplot by Cycle
boxplot(Stock1~cycle(Stock1,xlab = "Date", ylab = "Stock Price(100's)",
                        main = "Monthly Boxplot of Stock Price from Sep 2002 to Apr 2014"))

# Use Auto Arima for the Best Model 
Newmodel <- auto.arima(Stock1)
Newmodel
## Series: Stock1 
## ARIMA(3,2,1) 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1
##       0.1809  -0.1041  -0.2424  -0.9117
## s.e.  0.0959   0.0871   0.0928   0.0665
## 
## sigma^2 estimated as 467815:  log likelihood=-1095.76
## AIC=2201.52   AICc=2201.97   BIC=2216.16
# Use the trace function to understand the determine the best p,d,q values that were selected.

auto.arima(Stock1, ic = "aic", trace = TRUE)
## 
##  ARIMA(2,2,2)(1,0,1)[12]                    : Inf
##  ARIMA(0,2,0)                               : 2263.143
##  ARIMA(1,2,0)(1,0,0)[12]                    : 2252.928
##  ARIMA(0,2,1)(0,0,1)[12]                    : 2213.665
##  ARIMA(0,2,1)(1,0,1)[12]                    : Inf
##  ARIMA(0,2,1)                               : 2211.67
##  ARIMA(1,2,1)                               : 2206.86
##  ARIMA(1,2,0)                               : 2250.931
##  ARIMA(1,2,2)                               : 2208.074
##  ARIMA(2,2,2)                               : 2206.299
##  ARIMA(2,2,2)(1,0,0)[12]                    : Inf
##  ARIMA(2,2,2)(0,0,1)[12]                    : 2208.266
##  ARIMA(3,2,2)                               : 2202.714
##  ARIMA(3,2,1)                               : 2201.52
##  ARIMA(2,2,0)                               : 2247.611
##  ARIMA(4,2,2)                               : 2203.449
##  ARIMA(3,2,1)(1,0,0)[12]                    : 2203.488
##  ARIMA(3,2,1)(0,0,1)[12]                    : 2203.475
##  ARIMA(3,2,1)(1,0,1)[12]                    : 2204.344
##  ARIMA(2,2,1)                               : 2206.43
##  ARIMA(4,2,1)                               : 2201.826
##  ARIMA(3,2,0)                               : 2222.544
## 
##  Best model: ARIMA(3,2,1)
## Series: Stock1 
## ARIMA(3,2,1) 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1
##       0.1809  -0.1041  -0.2424  -0.9117
## s.e.  0.0959   0.0871   0.0928   0.0665
## 
## sigma^2 estimated as 467815:  log likelihood=-1095.76
## AIC=2201.52   AICc=2201.97   BIC=2216.16
# tseries evaluation

plot.ts(Newmodel$residuals)

acf(ts(Newmodel$residuals),main = 'ACF Residual')

pacf(ts(Newmodel$residuals),main = 'PACF Residual')

# Forecast for next 10 year
Pass_Forecast <- forecast(Newmodel,Level=c(95),h=10*12)
## Warning in forecast.Arima(Newmodel, Level = c(95), h = 10 * 12): The non-
## existent Level arguments will be ignored.
Pass_Forecast
##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## May 2014       23152.79 22276.25  24029.33 21812.24  24493.35
## Jun 2014       23467.71 22051.35  24884.07 21301.58  25633.85
## Jul 2014       23872.18 22053.17  25691.19 21090.25  26654.12
## Aug 2014       24467.43 22397.92  26536.94 21302.38  27632.48
## Sep 2014       25220.49 22932.29  27508.70 21720.99  28720.00
## Oct 2014       25960.54 23438.42  28482.66 22103.30  29817.79
## Nov 2014       26635.56 23845.95  29425.17 22369.22  30901.90
## Dec 2014       27261.92 24189.77  30334.08 22563.47  31960.37
## Jan 2015       27889.40 24534.78  31244.02 22758.96  33019.85
## Feb 2015       28537.92 24905.19  32170.64 22982.15  34093.69
## Mar 2015       29201.91 25290.30  33113.52 23219.62  35184.20
## Apr 2015       29866.24 25670.18  34062.31 23448.92  36283.57
## May 2015       30523.93 26036.30  35011.56 23660.70  37387.16
## Jun 2015       31176.63 26391.51  35961.75 23858.42  38494.84
## Jul 2015       31829.03 26741.85  36916.21 24048.86  39609.19
## Aug 2015       32483.51 27090.15  37876.86 24235.09  40731.93
## Sep 2015       33139.60 27435.63  38843.58 24416.13  41863.08
## Oct 2015       33795.85 27776.39  39815.30 24589.89  43001.81
## Nov 2015       34451.45 28111.53  40791.36 24755.38  44147.51
## Dec 2015       35106.52 28441.28  41771.76 24912.92  45300.12
## Jan 2016       35761.53 28766.26  42756.80 25063.19  46459.87
## Feb 2016       36416.74 29086.82  43746.66 25206.60  47626.88
## Mar 2016       37072.12 29402.94  44741.30 25343.13  48801.12
## Apr 2016       37727.53 29714.48  45740.57 25472.63  49982.42
## May 2016       38382.87 30021.38  46744.36 25595.07  51170.67
## Jun 2016       39038.16 30323.69  47752.63 25710.52  52365.80
## Jul 2016       39693.44 30621.51  48765.37 25819.11  53567.77
## Aug 2016       40348.74 30914.91  49782.57 25920.94  54776.54
## Sep 2016       41004.05 31203.94  50804.17 26016.07  55992.04
## Oct 2016       41659.37 31488.61  51830.13 26104.54  57214.21
## Nov 2016       42314.69 31768.97  52860.41 26186.40  58442.97
## Dec 2016       42970.00 32045.04  53894.95 26261.72  59678.27
## Jan 2017       43625.30 32316.88  54933.72 26330.57  60920.04
## Feb 2017       44280.61 32584.53  55976.69 26393.00  62168.22
## Mar 2017       44935.92 32848.03  57023.82 26449.08  63422.76
## Apr 2017       45591.23 33107.41  58075.06 26498.87  64683.60
## May 2017       46246.54 33362.70  59130.38 26542.41  65950.68
## Jun 2017       46901.85 33613.96  60189.75 26579.77  67223.94
## Jul 2017       47557.16 33861.20  61253.12 26611.00  68503.32
## Aug 2017       48212.47 34104.48  62320.47 26636.16  69788.79
## Sep 2017       48867.78 34343.81  63391.75 26655.29  71080.27
## Oct 2017       49523.09 34579.25  64466.94 26668.45  72377.73
## Nov 2017       50178.40 34810.81  65546.00 26675.70  73681.11
## Dec 2017       50833.71 35038.53  66628.89 26677.07  74990.35
## Jan 2018       51489.02 35262.45  67715.59 26672.62  76305.42
## Feb 2018       52144.33 35482.59  68806.07 26662.40  77626.27
## Mar 2018       52799.64 35698.99  69900.30 26646.45  78952.84
## Apr 2018       53454.95 35911.67  70998.24 26624.81  80285.09
## May 2018       54110.26 36120.66  72099.86 26597.54  81622.98
## Jun 2018       54765.57 36326.00  73205.15 26564.68  82966.47
## Jul 2018       55420.88 36527.70  74314.06 26526.26  84315.50
## Aug 2018       56076.19 36725.81  75426.58 26482.33  85670.05
## Sep 2018       56731.50 36920.34  76542.67 26432.94  87030.06
## Oct 2018       57386.81 37111.32  77662.31 26378.12  88395.51
## Nov 2018       58042.12 37298.77  78785.47 26317.91  89766.34
## Dec 2018       58697.43 37482.73  79912.13 26252.34  91142.52
## Jan 2019       59352.74 37663.21  81042.27 26181.47  92524.01
## Feb 2019       60008.05 37840.25  82175.85 26105.32  93910.78
## Mar 2019       60663.36 38013.86  83312.87 26023.93  95302.79
## Apr 2019       61318.67 38184.07  84453.28 25937.34  96700.00
## May 2019       61973.98 38350.89  85597.07 25845.59  98102.38
## Jun 2019       62629.29 38514.37  86744.22 25748.70  99509.89
## Jul 2019       63284.60 38674.50  87894.70 25646.71 100922.50
## Aug 2019       63939.91 38831.33  89048.49 25539.65 102340.17
## Sep 2019       64595.22 38984.86  90205.58 25427.56 103762.88
## Oct 2019       65250.53 39135.13  91365.94 25310.46 105190.60
## Nov 2019       65905.84 39282.14  92529.54 25188.40 106623.28
## Dec 2019       66561.15 39425.92  93696.38 25061.40 108060.90
## Jan 2020       67216.46 39566.49  94866.43 24929.48 109503.44
## Feb 2020       67871.77 39703.88  96039.67 24792.69 110950.85
## Mar 2020       68527.08 39838.09  97216.08 24651.05 112403.11
## Apr 2020       69182.39 39969.14  98395.64 24504.58 113860.20
## May 2020       69837.70 40097.07  99578.33 24353.33 115322.07
## Jun 2020       70493.01 40221.87 100764.15 24197.30 116788.72
## Jul 2020       71148.32 40343.58 101953.06 24036.54 118260.10
## Aug 2020       71803.63 40462.21 103145.05 23871.07 119736.20
## Sep 2020       72458.94 40577.78 104340.11 23700.91 121216.97
## Oct 2020       73114.25 40690.29 105538.21 23526.09 122702.41
## Nov 2020       73769.56 40799.78 106739.34 23346.64 124192.48
## Dec 2020       74424.87 40906.26 107943.48 23162.58 125687.16
## Jan 2021       75080.18 41009.74 109150.62 22973.93 127186.43
## Feb 2021       75735.49 41110.23 110360.75 22780.73 128690.25
## Mar 2021       76390.80 41207.76 111573.84 22582.99 130198.61
## Apr 2021       77046.11 41302.35 112789.87 22380.74 131711.48
## May 2021       77701.42 41393.99 114008.85 22174.00 133228.84
## Jun 2021       78356.73 41482.72 115230.74 21962.80 134750.66
## Jul 2021       79012.04 41568.54 116455.54 21747.15 136276.93
## Aug 2021       79667.35 41651.48 117683.22 21527.09 137807.61
## Sep 2021       80322.66 41731.53 118913.79 21302.62 139342.70
## Oct 2021       80977.97 41808.73 120147.21 21073.78 140882.16
## Nov 2021       81633.28 41883.07 121383.49 20840.58 142425.98
## Dec 2021       82288.59 41954.58 122622.59 20603.05 143974.13
## Jan 2022       82943.90 42023.28 123864.52 20361.21 145526.59
## Feb 2022       83599.21 42089.16 125109.26 20115.07 147083.35
## Mar 2022       84254.52 42152.25 126356.79 19864.66 148644.38
## Apr 2022       84909.83 42212.56 127607.10 19609.99 150209.67
## May 2022       85565.14 42270.10 128860.18 19351.09 151779.19
## Jun 2022       86220.45 42324.89 130116.01 19087.98 153352.92
## Jul 2022       86875.76 42376.93 131374.59 18820.67 154930.85
## Aug 2022       87531.07 42426.24 132635.90 18549.19 156512.95
## Sep 2022       88186.38 42472.83 133899.93 18273.54 158099.22
## Oct 2022       88841.69 42516.72 135166.66 17993.76 159689.62
## Nov 2022       89497.00 42557.91 136436.09 17709.86 161284.14
## Dec 2022       90152.31 42596.42 137708.20 17421.85 162882.77
## Jan 2023       90807.62 42632.25 138982.98 17129.76 164485.48
## Feb 2023       91462.93 42665.43 140260.43 16833.59 166092.26
## Mar 2023       92118.24 42695.96 141540.52 16533.38 167703.10
## Apr 2023       92773.55 42723.85 142823.25 16229.13 169317.96
## May 2023       93428.86 42749.11 144108.61 15920.87 170936.85
## Jun 2023       94084.17 42771.76 145396.58 15608.60 172559.73
## Jul 2023       94739.48 42791.80 146687.16 15292.35 174186.60
## Aug 2023       95394.79 42809.24 147980.33 14972.14 175817.44
## Sep 2023       96050.10 42824.10 149276.09 14647.96 177452.23
## Oct 2023       96705.41 42836.39 150574.43 14319.86 179090.96
## Nov 2023       97360.72 42846.12 151875.32 13987.83 180733.61
## Dec 2023       98016.03 42853.29 153178.77 13651.89 182380.16
## Jan 2024       98671.34 42857.91 154484.76 13312.07 184030.61
## Feb 2024       99326.65 42860.00 155793.29 12968.37 185684.93
## Mar 2024       99981.96 42859.57 157104.34 12620.81 187343.11
## Apr 2024      100637.27 42856.63 158417.91 12269.40 189005.13
plot(Pass_Forecast)

# Test your final model

Box.test(Newmodel$resid, lag = 2, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 0.058414, df = 2, p-value = 0.9712
Box.test(Newmodel$resid, lag = 15, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 13.924, df = 15, p-value = 0.5313
Box.test(Newmodel$resid, lag = 40, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 41.12, df = 40, p-value = 0.4213