Libraries used in the project
#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
#install.packages("DataExplorer")
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.4.3
#install.packages("tsutils")
library(tsutils)
## Warning: package 'tsutils' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#install.packages("forecast")
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
#install.packages("smooth")
library(smooth)
## Warning: package 'smooth' was built under R version 4.4.3
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 4.4.2
## Package "greybox", v2.0.3 loaded.
## This is package "smooth", v4.1.1
#install.packages("tseries")
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
#install.packages("car")
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
Uploading the time series
data1 <- read_excel("C:\\Users\\patri\\Downloads\\coursework data.xlsx")
data2 <- read_excel("C:\\Users\\patri\\Downloads\\coursework data 1.xlsx")
n1725 <- ts(data1,frequency = 12, start = c(1984,10))
n1726 <- ts(data2,frequency = 12, start = c(1984,10))
Data exploration N1725
#line plot of the time series
plot(n1725)

#basic statistic summary
summary(n1725)
## N1725
## Min. :1920
## 1st Qu.:2545
## Median :2890
## Mean :2989
## 3rd Qu.:3350
## Max. :4800
str(n1725)
## Time-Series [1:126, 1] from 1985 to 1995: 3740 2880 2620 2860 2640 2740 2940 4040 3800 3640 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "N1725"
plot_histogram(n1725)

#Centred moving average to estimate the yearly trend
n1725_cma <- cmav(n1725, ma = 12, fill = FALSE)
plot(n1725)
lines(n1725_cma, col = 'blue')

#seasonality plot
seasplot(n1725)
## Results of statistical testing
## Evidence of trend: TRUE (pval: 0)
## Evidence of seasonality: TRUE (pval: 0)
data_seas_index_n1725 <- seasplot(n1725)$season

#decomposition of the time series
decomposition_n1725 <- decomp(n1725,decomposition = "additive",outplot = TRUE)

decomposition_n1725 <- decomp(n1725,decomposition = "multiplicative",outplot = TRUE)

##structure of the decomposition
str(decomposition_n1725)
## List of 5
## $ trend : Time-Series [1:126, 1] from 1985 to 1995: 3402 3402 3402 3401 3401 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "N1725"
## $ season : Time-Series [1:126, 1] from 1985 to 1995: 1.014 0.859 0.856 0.826 0.811 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "N1725"
## $ irregular: Time-Series [1:126, 1] from 1985 to 1995: 289.9 -44 -291.6 48.7 -119.7 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "y"
## $ f.season : NULL
## $ g : NULL
##Extract the Noise
decomposition_n1725$irregular
## Jan Feb Mar Apr May Jun
## 1984
## 1985 48.692078 -119.675387 -451.220482 -363.343296 181.398395 -300.118022
## 1986 -127.773743 -241.746283 -278.128484 -295.883668 117.611329 440.199623
## 1987 19.432723 -13.020140 -446.433474 -352.026482 -598.595212 632.997867
## 1988 -87.162110 164.211051 -506.136199 -521.364658 -108.587222 -461.079478
## 1989 -283.454299 134.776677 8.290540 1185.526401 811.274900 181.232873
## 1990 382.269043 -160.448466 314.079660 299.444985 94.924089 -575.563305
## 1991 -365.811030 -366.143064 373.846124 159.513218 -157.097733 183.642583
## 1992 -3.267322 -57.849527 -57.102214 151.175714 -96.828370 -237.248888
## 1993 224.414503 47.274653 183.347182 -427.446074 -126.288046 269.785523
## 1994 19.672518 217.343559 365.027999 60.495702 -80.615437 -62.809480
## 1995 101.322249 362.651705 300.950861
## Jul Aug Sep Oct Nov Dec
## 1984 289.851409 -44.027715 -291.570218
## 1985 -475.531604 688.007086 660.236205 125.102182 -65.342752 173.716195
## 1986 507.805280 -736.666908 478.562840 -120.813797 584.241958 -173.708592
## 1987 150.293428 -59.854527 124.193227 -52.366403 433.548172 393.043740
## 1988 -24.117950 674.414263 69.421676 -121.381665 -308.600548 158.211853
## 1989 -486.509196 -549.969947 -494.005442 -387.298182 -368.605453 -188.049586
## 1990 361.040457 324.508196 -177.593336 145.660346 23.242317 154.324037
## 1991 335.291025 -782.194282 -121.809559 67.630362 202.909104 82.678274
## 1992 230.675191 234.269371 -458.784377 245.659268 -201.497772 170.640880
## 1993 -589.561219 528.870919 223.049763 -437.720983 68.365433 -321.454989
## 1994 25.656291 -350.528027 -95.163675 304.417793 -246.266531 -161.451513
## 1995
#pure or mixed multiplicative decomp?
##Extract the Trend
decomposition_n1725$trend
## Jan Feb Mar Apr May Jun Jul Aug
## 1984
## 1985 3401.489 3401.093 3400.599 3400.000 3399.167 3425.833 3450.000 3451.667
## 1986 3615.000 3600.833 3535.833 3515.833 3525.833 3525.833 3497.500 3489.167
## 1987 3267.500 3245.000 3225.000 3182.500 3152.500 3147.500 3143.333 3122.500
## 1988 2912.500 2903.333 2905.000 2883.333 2844.167 2808.333 2803.333 2808.333
## 1989 3150.000 3087.500 3017.500 2979.167 2950.000 2906.667 2889.167 2879.167
## 1990 2707.500 2785.833 2840.833 2882.500 2929.167 2970.833 2966.667 2934.167
## 1991 2886.667 2817.500 2755.833 2738.333 2728.333 2720.833 2720.000 2738.333
## 1992 2690.000 2733.333 2767.500 2767.500 2763.333 2755.000 2774.167 2791.667
## 1993 2777.500 2751.667 2788.333 2786.667 2771.667 2765.833 2740.833 2745.833
## 1994 2855.833 2862.500 2829.167 2860.833 2890.000 2893.333 2912.500 2930.000
## 1995 2974.830 2979.201 2982.697
## Sep Oct Nov Dec
## 1984 3402.260 3402.058 3401.805
## 1985 3465.833 3485.833 3496.667 3535.833
## 1986 3467.500 3432.500 3369.167 3310.833
## 1987 3103.333 3069.167 3055.833 2999.167
## 1988 2839.167 2940.000 3058.333 3133.333
## 1989 2862.500 2827.500 2755.833 2696.667
## 1990 2925.833 2913.333 2881.667 2880.833
## 1991 2730.833 2714.167 2719.167 2707.500
## 1992 2807.500 2795.000 2770.833 2791.667
## 1993 2765.833 2798.333 2829.167 2829.167
## 1994 2943.333 2954.003 2962.539 2969.368
## 1995
##Extract the Seasonality
decomposition_n1725$season
## Jan Feb Mar Apr May Jun Jul
## 1984
## 1985 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1986 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1987 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1988 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1989 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1990 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1991 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1992 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1993 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1994 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1995 0.8264934 0.8114084 0.9384290
## Aug Sep Oct Nov Dec
## 1984 1.0140754 0.8594879 0.8558898
## 1985 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1986 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1987 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1988 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1989 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1990 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1991 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1992 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1993 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1994 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1995
##Multiply the Trend and Seasonality
regular_components_n1725 <- decomposition_n1725$trend * decomposition_n1725$season
###mixed multiplicative model errors
mmm_errors_n1725 <- n1725 - regular_components_n1725
###compare errors
decomposition_n1725$irregular - mmm_errors_n1725
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1984 0 0 0
## 1985 0 0 0 0 0 0 0 0 0 0 0 0
## 1986 0 0 0 0 0 0 0 0 0 0 0 0
## 1987 0 0 0 0 0 0 0 0 0 0 0 0
## 1988 0 0 0 0 0 0 0 0 0 0 0 0
## 1989 0 0 0 0 0 0 0 0 0 0 0 0
## 1990 0 0 0 0 0 0 0 0 0 0 0 0
## 1991 0 0 0 0 0 0 0 0 0 0 0 0
## 1992 0 0 0 0 0 0 0 0 0 0 0 0
## 1993 0 0 0 0 0 0 0 0 0 0 0 0
## 1994 0 0 0 0 0 0 0 0 0 0 0 0
## 1995 0 0 0
### all values are 0, which indicates this is a Mixed Multiplicate Model
Data exploration N1726
#line plot of the time series
plot(n1726)

#basic statistic summary
summary(n1726)
## N1726
## Min. :1200
## 1st Qu.:1780
## Median :2270
## Mean :2550
## 3rd Qu.:2980
## Max. :7480
str(n1726)
## Time-Series [1:126, 1] from 1985 to 1995: 4580 3280 3740 3340 1820 4700 3640 4220 5480 6420 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "N1726"
plot_histogram(n1726)

#Centred moving average to estimate the yearly trend
n1726_cma <- cmav(n1726, ma = 12, fill = FALSE)
plot(n1726)
lines(n1726_cma, col = 'red')

#seasonality plot
seasplot(n1726)
## Results of statistical testing
## Evidence of trend: TRUE (pval: 0)
## Evidence of seasonality: TRUE (pval: 0)
data_seas_index_n1726 <- seasplot(n1726)$season

#decomposition of the time series
decomposition_n1726 <- decomp(n1726,decomposition = "additive",outplot = TRUE)

decomposition_n1726 <- decomp(n1726,decomposition = "multiplicative",outplot = TRUE)

##structure of the decomposition
str(decomposition_n1726)
## List of 5
## $ trend : Time-Series [1:126, 1] from 1985 to 1995: 4686 4639 4592 4545 4498 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "N1726"
## $ season : Time-Series [1:126, 1] from 1985 to 1995: 1.013 0.821 0.885 0.781 0.688 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "N1726"
## $ irregular: Time-Series [1:126, 1] from 1985 to 1995: -168 -530 -326 -209 -1276 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr "y"
## $ f.season : NULL
## $ g : NULL
##Extract the Noise
decomposition_n1726$irregular
## Jan Feb Mar Apr May Jun
## 1984
## 1985 -209.46319 -1276.15378 310.70769 -355.61260 -200.67380 289.96498
## 1986 -690.50687 -216.92696 324.08136 -443.91571 497.81492 1151.27641
## 1987 619.51680 -115.08396 -278.57606 -370.84142 173.71970 -71.51597
## 1988 223.46571 -105.99650 -525.77043 450.05807 437.41491 -247.70224
## 1989 -161.60963 81.79275 301.00482 -373.23536 -765.82784 185.93592
## 1990 200.42983 -47.73166 -439.02081 -311.44518 531.05853 -777.15339
## 1991 -213.53974 -96.87848 725.59302 472.47402 -501.75798 161.78630
## 1992 256.87243 618.65331 -452.97762 526.49092 -529.25157 -380.09003
## 1993 -251.32745 -204.51669 56.61792 -109.04244 115.69670 -231.05110
## 1994 -369.67876 346.60800 -285.59475 65.11792 446.17481 479.81836
## 1995 343.46168 172.31782 322.61195
## Jul Aug Sep Oct Nov Dec
## 1984 -168.09544 -529.79001 -325.96643
## 1985 658.17522 2015.28641 -100.08849 54.23659 -236.99742 -1198.12133
## 1986 -112.30142 449.01333 195.02069 -656.33508 220.48595 -1024.46131
## 1987 473.59017 32.17335 336.71371 -229.43967 -465.20948 -98.97445
## 1988 -343.63347 861.48462 -514.10143 -79.99709 -155.96005 710.98958
## 1989 -587.11119 291.78293 860.15441 -363.80717 243.39974 -70.61512
## 1990 688.41979 597.12106 -600.83002 -156.41769 354.50350 -128.40165
## 1991 -798.73316 -1096.08630 -64.23936 898.72576 430.77281 -47.08362
## 1992 -491.67263 -168.21707 263.73989 122.73800 -265.34198 1245.93219
## 1993 568.74600 -669.29365 -27.53128 285.27093 34.65802 -4.47612
## 1994 273.69525 -936.08630 -324.18065 -109.25889 -97.74245 -109.51213
## 1995
#pure or mixed multiplicative decomp?
##Extract the Trend
decomposition_n1726$trend
## Jan Feb Mar Apr May Jun Jul Aug
## 1984
## 1985 4545.273 4498.239 4451.205 4404.167 4386.667 4328.333 4239.167 4230.000
## 1986 4060.000 3889.167 3768.333 3685.833 3614.167 3576.667 3570.000 3569.167
## 1987 3048.333 2956.667 2858.333 2811.667 2745.000 2711.667 2697.500 2653.333
## 1988 2556.667 2536.667 2520.833 2480.000 2483.333 2508.333 2504.167 2475.833
## 1989 2255.833 2176.667 2169.167 2175.000 2149.167 2113.333 2080.000 2080.833
## 1990 2048.333 2103.333 2047.500 1996.667 2013.333 2015.833 1995.000 1968.333
## 1991 2015.000 1884.167 1840.000 1904.167 1946.667 1950.000 1970.833 2025.000
## 1992 2001.667 2065.000 2122.500 2109.167 2053.333 2085.000 2127.500 2080.833
## 1993 2165.833 2185.833 2153.333 2148.333 2167.500 2127.500 2068.333 2081.667
## 1994 2112.500 2082.500 2054.167 2022.500 1998.333 1985.000 2005.833 2025.000
## 1995 2044.451 2045.148 2045.844
## Sep Oct Nov Dec
## 1984 4686.376 4639.342 4592.307
## 1985 4229.167 4170.833 4136.667 4131.667
## 1986 3484.167 3391.667 3311.667 3167.500
## 1987 2617.500 2615.000 2636.667 2619.167
## 1988 2493.333 2467.500 2357.500 2291.667
## 1989 2037.500 1997.500 2041.667 2045.000
## 1990 2000.000 2069.167 2052.500 2042.500
## 1991 2022.500 1995.000 2008.333 1995.833
## 1992 2072.500 2070.000 2076.667 2116.667
## 1993 2083.333 2067.500 2076.667 2105.833
## 1994 2041.667 2042.361 2043.058 2043.754
## 1995
##Extract the Seasonality
decomposition_n1726$season
## Jan Feb Mar Apr May Jun Jul
## 1984
## 1985 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1986 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1987 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1988 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1989 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1990 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1991 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1992 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1993 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1994 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1995 0.7809130 0.6883035 0.9860907
## Aug Sep Oct Nov Dec
## 1984 1.0131700 0.8211920 0.8853864
## 1985 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1986 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1987 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1988 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1989 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1990 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1991 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1992 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1993 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1994 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1995
##Multiply the Trend and Seasonality
regular_components_n1726 <- decomposition_n1726$trend * decomposition_n1726$season
###mixed multiplicative model errors
mmm_errors_n1726 <- n1726 - regular_components_n1726
###compare errors
decomposition_n1726$irregular - mmm_errors_n1726
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1984 0 0 0
## 1985 0 0 0 0 0 0 0 0 0 0 0 0
## 1986 0 0 0 0 0 0 0 0 0 0 0 0
## 1987 0 0 0 0 0 0 0 0 0 0 0 0
## 1988 0 0 0 0 0 0 0 0 0 0 0 0
## 1989 0 0 0 0 0 0 0 0 0 0 0 0
## 1990 0 0 0 0 0 0 0 0 0 0 0 0
## 1991 0 0 0 0 0 0 0 0 0 0 0 0
## 1992 0 0 0 0 0 0 0 0 0 0 0 0
## 1993 0 0 0 0 0 0 0 0 0 0 0 0
## 1994 0 0 0 0 0 0 0 0 0 0 0 0
## 1995 0 0 0
### all values are 0, which indicates this is a Mixed Multiplicate Model
Exponential Smoothing for
N1725
#SPLITTING THE SERIES
#total number of observations
n1725_length <- length(n1725)
#size of train set and forecasting horizon
train_length_n1725 <- 48
h <- 14
#training set
train_n1725 <- ts(n1725[1:train_length_n1725], frequency=12)
#test set
test_n1725 <- ts(n1725[(train_length_n1725+1):n1725_length],frequency=12)
#SMA
#Fitting a SMA 3
SMA_n1725 <- ma(train_n1725, order=3, centre=FALSE)
#get rid of NA values:
SMA_no_NAs_n1725 <- SMA_n1725[!is.na(SMA_n1725)]
# Then form a forecast:
SMA3_forecast_n1725 <- ts(rep(SMA_no_NAs_n1725[length(SMA_no_NAs_n1725)],14), frequency=12)
# Errors
SMA3_errors_n1725 <- test_n1725 - SMA3_forecast_n1725
#Error Metrics
SMA3_MAE_n1725 <- mean(abs(SMA3_errors_n1725))
SMA3_MAPE_n1725 <- 100 * mean(abs(SMA3_errors_n1725)/test_n1725)
#Fitting a SMA 5
SMA5_n1725 <- ma(train_n1725, order=5, centre=FALSE)
#get rid of NA values:
SMA5_no_NAs_n1725 <- SMA5_n1725[!is.na(SMA5_n1725)]
# Then form a forecast:
SMA5_forecast_n1725 <- ts(rep(SMA5_no_NAs_n1725[length(SMA5_no_NAs_n1725)],14), frequency=12)
# Errors
SMA5_errors_n1725 <- test_n1725 - SMA5_forecast_n1725
#Error Metrics
SMA5_MAE_n1725 <- mean(abs(SMA5_errors_n1725))
SMA5_MAPE_n1725 <- 100 * mean(abs(SMA5_errors_n1725)/test_n1725)
#Naive Forecast
naive_method_n1725 <- naive(train_n1725, h=h)
naive_forecast_n1725 <- naive_method_n1725$mean
plot(naive_method_n1725)

# Errors
naive_errors_n1725 <- test_n1725 - naive_forecast_n1725
#Error Metrics
naive_MAE_n1725 <- mean(abs(naive_errors_n1725))
naive_MAPE_n1725 <- 100 * mean(abs(naive_errors_n1725)/test_n1725)
#Simple exponential smoothing (ets) and (es)
#In the model, A stands for additive, M stands for Multiplicative, Z for automatically selected
#ETS
#alpha 0.15
ETS_MMM_0.15_n1725 <- ets(train_n1725, model="MMM", alpha=0.15)
ETS_MMM_0.15_n1725
## ETS(M,M,M)
##
## Call:
## ets(y = train_n1725, model = "MMM", alpha = 0.15)
##
## Smoothing parameters:
## alpha = 0.15
## beta = 0.0237
## gamma = 1e-04
##
## Initial states:
## l = 3390.6613
## b = 1.0042
## s = 1.1162 1.2761 1.2154 1.2271 1.0942 0.8441
## 0.7997 0.7992 0.7964 0.8649 0.9345 1.0321
##
## sigma: 0.1134
##
## AIC AICc BIC
## 765.7637 783.3121 795.7029
#alpha optimized
ETS_MMM_opt_n1725 <- ets(train_n1725, "MMM")
opt_alpha_ETS_N1725 <- coef(ETS_MMM_opt_n1725)
#forecast ETS
#alpha 0.15
ETS_MMM_0.15_n1725_forecast <- forecast.ets(ETS_MMM_0.15_n1725, h=h)$mean
ETS_0.15_n1725 <- plot(forecast(ETS_MMM_0.15_n1725, h=h))

#optimized alpha
ETS_MMM_opt_n1725_forecast <- forecast.ets(ETS_MMM_opt_n1725, h=h)$mean
ETS_opt_n1725 <- plot(forecast(ETS_MMM_opt_n1725, h=h))

#ES
#alpha 0.15
ES_MMM_0.15_n1725 <- es(train_n1725, "MMM", persistence=0.15, h=h)
## Warning: Length of persistence vector is wrong! Changing to estimation of
## persistence vector values.
ES_MMM_0.15_n1725
## Time elapsed: 0.12 seconds
## Model estimated using es() function: ETS(MMM)
## With optimal initialisation
## Distribution assumed in the model: Normal
## Loss function type: likelihood; Loss function value: 342.9278
## Persistence vector g:
## alpha beta gamma
## 0.0838 0.0545 0.0027
##
## Sample size: 48
## Number of estimated parameters: 17
## Number of degrees of freedom: 31
## Information criteria:
## AIC AICc BIC BICc
## 719.8555 740.2555 751.6660 791.1522
#optimized alpha
ES_MMM_opt_n1725 <- es(train_n1725, "MMM", h=h)
opt_alpha_ES_N1725 <- coef(ES_MMM_opt_n1725)
#Forecast ES
#alpha 0.15
ES_MMM_0.15_forecast_n1725 <- forecast(ES_MMM_0.15_n1725, h=h)$mean
ES_0.15_n1725 <- plot(forecast(ES_MMM_0.15_n1725, h=h))

#optimized alpha
ES_MMM_opt_forecast_n1725 <- forecast(ES_MMM_opt_n1725, h=h)$mean
ES_opt_n1725 <- plot(forecast(ES_MMM_opt_n1725, h=h))

#Errors MAPE
ETS_SES_errors_n1725 <- test_n1725 - ETS_MMM_opt_n1725_forecast
ES_SES_errors_n1725 <- test_n1725 - ES_MMM_opt_forecast_n1725
ETS_SES_MAPE_n1725 <- 100 * mean(abs(ETS_SES_errors_n1725)/test_n1725)
ES_SES_MAPE_n1725 <- 100 * mean(abs(ES_SES_errors_n1725)/test_n1725)
#Initial seed
es_MZZ_initial_1_n1725 <- es(n1725, model="MZZ", initial=n1725[1],
h=h, holdout=TRUE)
## Warning: Predefined initials vector can only be used with preselected ETS model.
## Changing to estimation of initials.
es_MZZ_initial_1_n1725$accuracy
## ME MAE MSE MPE MAPE
## 2.493236e+02 2.980950e+02 1.312463e+05 8.424985e-02 1.015395e-01
## sCE sMAE sMSE MASE RMSSE
## 1.164619e+00 9.945972e-02 1.461074e-02 6.825195e-01 6.510780e-01
## rMAE rRMSE rAME asymmetry sPIS
## 5.378003e-01 5.540561e-01 4.592803e-01 8.373972e-01 -7.943126e+00
ES_MZZ_optinitial_n1725 <- es(n1725, model="MZZ", h=h, holdout=TRUE)
ES_MZZ_optinitial_n1725$accuracy
## ME MAE MSE MPE MAPE
## 2.493236e+02 2.980950e+02 1.312463e+05 8.424985e-02 1.015395e-01
## sCE sMAE sMSE MASE RMSSE
## 1.164619e+00 9.945972e-02 1.461074e-02 6.825195e-01 6.510780e-01
## rMAE rRMSE rAME asymmetry sPIS
## 5.378003e-01 5.540561e-01 4.592803e-01 8.373972e-01 -7.943126e+00
#Holt/Winters method
# ETS
ets_AAA_n1725 <- ets(train_n1725, model="AAA", damped=FALSE)
# ES
es_AAA_n1725 <- es(train_n1725, model="AAA", h=h)
#forecast
ES_AAA_forecast_n1725 <- forecast(es_AAA_n1725, h=h)$mean
ES_AAA_n1725 <- plot(forecast(es_AAA_n1725, h=h))

es_AAA_errors_n1725 <- test_n1725 - ES_AAA_forecast_n1725
#Error Metrics
AAA_MAE_n1725 <- mean(abs(es_AAA_errors_n1725))
AAA_MAPE_n1725 <- 100 * mean(abs(es_AAA_errors_n1725)/test_n1725)
#optimized model
# ETS
ets_ZZZ_n1725 <- ets(train_n1725, model="ZZZ")
# ES
es_ZZZ_n1725 <- es(train_n1725, model="ZZZ")
es_ZZZ_forecast_n1725 <- forecast(es_ZZZ_n1725, h=h)$mean
ES_ZZZ_n1725 <- plot(forecast(es_ZZZ_n1725, h=h))

ES_ZZZ_errors_n1725 <- test_n1725 - es_ZZZ_forecast_n1725
ES_ZZZ_MAPE_n1725 <- 100 * mean(abs(ES_ZZZ_errors_n1725)/test_n1725)
#FORECAST
ExSm_n1725 <- es(n1725, model = 'ZZZ')
forecast_es_n1725 <- forecast(ExSm_n1725, h=h)$mean
es_n1725 <- plot(forecast(ExSm_n1725, h=h))

table(forecast_es_n1725)
## forecast_es_n1725
## 2482.41404757242 2507.99897343387 2598.51130853837 2605.68889895339
## 1 1 1 1
## 2870.40811269844 2980.67785095458 2984.02521002955 3071.78194922568
## 1 1 1 1
## 3089.47572521573 3441.64209446861 3443.34339572928 3621.06040262753
## 1 1 1 1
## 3622.83927842651 3713.98146052169
## 1 1
Exponential Smoothing for
N1726
#SPLITTING THE SERIES
#total number of observations
n1726_length <- length(n1726)
#size of train set and forecasting horizon
train_length_n1726 <- 48
h <- 14
#training set
train_n1726 <- ts(n1726[1:train_length_n1726], frequency=12)
#test set
test_n1726 <- ts(n1726[(train_length_n1726+1):n1726_length],frequency=12)
#SMA
#Fitting a SMA 3
SMA_n1726 <- ma(train_n1726, order=3, centre=FALSE)
#get rid of NA values:
SMA_no_NAs_n1726 <- SMA_n1726[!is.na(SMA_n1726)]
# Then form a forecast:
SMA3_forecast_n1726 <- ts(rep(SMA_no_NAs_n1726[length(SMA_no_NAs_n1726)],14), frequency=12)
# Errors
SMA3_errors_n1726 <- test_n1726 - SMA3_forecast_n1726
#Error Metrics
SMA3_MAE_n1726 <- mean(abs(SMA3_errors_n1726))
SMA3_MAPE_n1726 <- 100 * mean(abs(SMA3_errors_n1726)/test_n1726)
#Fitting a SMA 5
SMA5_n1726 <- ma(train_n1726, order=5, centre=FALSE)
#get rid of NA values:
SMA5_no_NAs_n1726 <- SMA5_n1726[!is.na(SMA5_n1726)]
# Then form a forecast:
SMA5_forecast_n1726 <- ts(rep(SMA5_no_NAs_n1726[length(SMA5_no_NAs_n1726)],14), frequency=12)
# Errors
SMA5_errors_n1726 <- test_n1726 - SMA5_forecast_n1726
#Error Metrics
SMA5_MAE_n1726 <- mean(abs(SMA5_errors_n1726))
SMA5_MAPE_n1726 <- 100 * mean(abs(SMA5_errors_n1726)/test_n1726)
#Naive Forecast
naive_method_n1726 <- naive(train_n1726, h=h)
naive_forecast_n1726 <- naive_method_n1726$mean
plot(naive_method_n1726)

# Errors
naive_errors_n1726 <- test_n1726 - naive_forecast_n1726
#Error Metrics
naive_MAE_n1726 <- mean(abs(naive_errors_n1726))
naive_MAPE_n1726 <- 100 * mean(abs(naive_errors_n1726)/test_n1726)
#Simple exponential smoothing (ets) and (es)
#In the model, M stands for Multiplicative, Z for automatically selected
#ETS
#alpha 0.15
ETS_MZZ_0.15_n1726 <- ets(train_n1726, model="MMM", alpha=0.15)
ETS_MZZ_0.15_n1726
## ETS(M,M,M)
##
## Call:
## ets(y = train_n1726, model = "MMM", alpha = 0.15)
##
## Smoothing parameters:
## alpha = 0.15
## beta = 1e-04
## gamma = 1e-04
##
## Initial states:
## l = 4917.6864
## b = 0.9841
## s = 1.0559 1.5599 1.4087 1.2698 1.0683 0.8856
## 0.9522 0.5962 0.8067 0.7139 0.7461 0.9367
##
## sigma: 0.1736
##
## AIC AICc BIC
## 804.7903 822.3387 834.7295
#alpha optimized
ETS_MZZ_opt_n1726 <- ets(train_n1726, "MMM")
opt_alpha_ETS_N1726 <- coef(ETS_MZZ_opt_n1726)
#forecast ETS
#alpha 0.15
ETS_MZZ_0.15_n1726_forecast <- forecast.ets(ETS_MZZ_0.15_n1726, h=h)$mean
ETS_0.15_n1726 <- plot(forecast(ETS_MZZ_0.15_n1726, h=h))

#optimized alpha
ETS_MZZ_opt_n1726_forecast <- forecast.ets(ETS_MZZ_opt_n1726, h=h)$mean
ETS_opt_n1726 <- plot(forecast(ETS_MZZ_opt_n1726, h=h))

#ES
#alpha 0.15
ES_MZZ_0.15_n1726 <- es(train_n1726, "MMM", persistence=0.15, h=h)
## Warning: Length of persistence vector is wrong! Changing to estimation of
## persistence vector values.
ES_MZZ_0.15_n1726
## Time elapsed: 0.16 seconds
## Model estimated using es() function: ETS(MMM)
## With optimal initialisation
## Distribution assumed in the model: Normal
## Loss function type: likelihood; Loss function value: 363.9173
## Persistence vector g:
## alpha beta gamma
## 0.0522 0.0519 0.0018
##
## Sample size: 48
## Number of estimated parameters: 17
## Number of degrees of freedom: 31
## Information criteria:
## AIC AICc BIC BICc
## 761.8347 782.2347 793.6451 833.1313
#optimized alpha
ES_MZZ_opt_n1726 <- es(train_n1726, "MMM", h=h)
opt_alpha_ES_N1726 <- coef(ES_MZZ_opt_n1726)
#Forecast ES
#alpha 0.15
ES_MZZ_0.15_forecast_n1726 <- forecast(ES_MZZ_0.15_n1726, h=h)$mean
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
ES_0.15_n1726 <- plot(forecast(ES_MZZ_0.15_n1726, h=h))
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.

#optimized alpha
ES_MZZ_opt_forecast_n1726 <- forecast(ES_MZZ_opt_n1726, h=h)$mean
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
ES_opt_n1726 <- plot(forecast(ES_MZZ_opt_n1726, h=h))
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.

#Errors MAPE
ETS_SES_errors_n1726 <- test_n1726 - ETS_MZZ_opt_n1726_forecast
ES_SES_errors_n1726 <- test_n1726 - ES_MZZ_opt_forecast_n1726
ETS_SES_MAPE_n1726 <- 100 * mean(abs(ETS_SES_errors_n1726)/test_n1726)
ES_SES_MAPE_n1726 <- 100 * mean(abs(ES_SES_errors_n1726)/test_n1726)
#Initial seed
es_MZZ_initial_1_n1726 <- es(n1726, model="MZZ", initial=n1726[1],
h=h, holdout=TRUE)
## Warning: Predefined initials vector can only be used with preselected ETS model.
## Changing to estimation of initials.
es_MZZ_initial_1_n1726$accuracy
## ME MAE MSE MPE MAPE
## -1.932278e+02 3.426107e+02 2.397486e+05 -1.196374e-01 1.896696e-01
## sCE sMAE sMSE MASE RMSSE
## -1.034277e+00 1.309906e-01 3.504570e-02 4.884381e-01 5.430249e-01
## rMAE rRMSE rAME asymmetry sPIS
## 4.612067e-01 5.653460e-01 2.601143e-01 -4.191700e-01 7.790401e+00
ES_MZZ_optinitial_n1726 <- es(n1726, model="MZZ", h=h, holdout=TRUE)
ES_MZZ_optinitial_n1726$accuracy
## ME MAE MSE MPE MAPE
## -1.932278e+02 3.426107e+02 2.397486e+05 -1.196374e-01 1.896696e-01
## sCE sMAE sMSE MASE RMSSE
## -1.034277e+00 1.309906e-01 3.504570e-02 4.884381e-01 5.430249e-01
## rMAE rRMSE rAME asymmetry sPIS
## 4.612067e-01 5.653460e-01 2.601143e-01 -4.191700e-01 7.790401e+00
#Holt/Winters method
# ETS
ets_AAA_n1726 <- ets(train_n1726, model="AAA", damped=FALSE)
# ES
es_AAA_n1726 <- es(train_n1726, model="AAA", h=h)
#forecast
ES_AAA_forecast_n1726 <- forecast(es_AAA_n1726, h=h)$mean
ES_AAA_n1726 <- plot(forecast(es_AAA_n1726, h=h))

es_AAA_errors_n1726 <- test_n1726 - ES_AAA_forecast_n1726
#Error Metrics
AAA_MAE_n1726 <- mean(abs(es_AAA_errors_n1726))
AAA_MAPE_n1726 <- 100 * mean(abs(es_AAA_errors_n1726)/test_n1726)
#optimized model
# ETS
ets_ZZZ_n1726 <- ets(train_n1726, model="ZZZ")
# ES
es_ZZZ_n1726 <- es(train_n1726, model="ZZZ")
ets_ZZZ_forecast_n1726 <- forecast(ets_ZZZ_n1726, h=h)$mean
ETS_ZZZ_n1726 <- plot(forecast(ets_ZZZ_n1726, h=h))

es_ZZZ_forecast_n1726 <- forecast(es_ZZZ_n1726, h=h)$mean
ES_ZZZ_n1726 <- plot(forecast(es_ZZZ_n1726, h=h))

ETS_ZZZ_errors_n1726 <- test_n1726 - ets_ZZZ_forecast_n1726
ES_ZZZ_errors_n1726 <- test_n1726 - es_ZZZ_forecast_n1726
ETS_ZZZ_MAPE_n1726 <- 100 * mean(abs(ETS_ZZZ_errors_n1726)/test_n1726)
ES_ZZZ_MAPE_n1726 <- 100 * mean(abs(ES_ZZZ_errors_n1726)/test_n1726)
#FORECAST
ExSm_n1726 <- ets(n1726, model = 'MMM')
forecast_es_n1726 <- forecast(ExSm_n1726, h=h)$mean
es_n1726 <- plot(forecast(ExSm_n1726, h=h))

table(forecast_es_n1726)
## forecast_es_n1726
## 1384.0627474813 1555.85452387733 1677.31861932617 1815.2571048759
## 1 1 1 1
## 1862.58474339395 1868.00359814858 1906.70210892913 1925.41266365109
## 1 1 1 1
## 1981.35998143742 2007.16838156838 2087.19731974442 2375.4931320524
## 1 1 1 1
## 2609.02010311348 2650.92251888498
## 1 1
ARIMA for N1725
#KPSS test, the Null and Alternative Hypotheses are:
#H0: The data is stationary
#H1: The data is not stationary
kpss.test(n1725)
##
## KPSS Test for Level Stationarity
##
## data: n1725
## KPSS Level = 0.62627, Truncation lag parameter = 4, p-value = 0.02025
#p value 0.02 (data is not stationary)
#ADF test, The null and alternative hypotheses are:
#H0: The data is not stationary.
#H1: The data is stationary.
adf.test(n1725)
## Warning in adf.test(n1725): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: n1725
## Dickey-Fuller = -7.2602, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)
#DIFFERENCING
diff_n1725 <- diff(n1725, lag=12)
plot(diff_n1725)

kpss.test(diff_n1725)
## Warning in kpss.test(diff_n1725): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff_n1725
## KPSS Level = 0.18729, Truncation lag parameter = 4, p-value = 0.1
#p value 0.1 (data is stationary)
adf.test(diff_n1725)
## Warning in adf.test(diff_n1725): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_n1725
## Dickey-Fuller = -4.7105, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)
#train and test
hh <- 12
n1725_train <- ts(n1725[1:(n1725_length - hh)], frequency=frequency(n1725),
start=start(n1725))
n1725_test <- n1725[(n1725_length - hh + 1):n1725_length]
#AR process
ar1.n1725 <- ts(diff(n1725_train), frequency=12)
tsdisplay(ar1.n1725)

#AR3 data, 3 spikes on the PACF
#MA process
ma1.n1725 <- ts(diff(n1725_train), frequency = 12)
tsdisplay(ma1.n1725)

#MA4 data, 4 in the ACF graph
#calculate arima
arima_n1725_1 <- arima(n1725_train, order=c(3,1,4))
tsdisplay(residuals(arima_n1725_1))

#extract coefficient
coef(arima_n1725_1)
## ar1 ar2 ar3 ma1 ma2 ma3 ma4
## 0.7237415 0.7249364 -0.9947287 -1.5049498 -0.1700583 1.5379328 -0.7964953
#forecast
arima_forecast_n1725 <- forecast(arima_n1725_1, h=12)
plot(forecast(arima_n1725_1, h=12))

arima_forecast_n1725 <- arima_forecast_n1725$mean
# Errors
arima_errors_n1725 <- n1725_test - arima_forecast_n1725
#Error Metrics
arima_MAE_n1725 <- mean(abs(arima_errors_n1725))
arima_MAPE_n1725 <- 100 * mean(abs(arima_errors_n1725)/n1725_test)
#Automated arima
auto_arima_n1725 <- auto.arima(n1725)
#best method with AIC
auto.arima(n1725, ic="aic")
## Series: n1725
## ARIMA(1,0,0)(2,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 sar2 drift
## 0.2507 -0.6274 -0.3144 -5.4514
## s.e. 0.0926 0.0956 0.0960 2.6266
##
## sigma^2 = 215559: log likelihood = -862.57
## AIC=1735.14 AICc=1735.7 BIC=1748.82
#calculate arima 2
arima_n1725_2 <- arima(n1725_train, order=c(1,0,0), seasonal =c(2,1,0))
tsdisplay(residuals(arima_n1725_2))

#extract coefficient
coef(arima_n1725_2)
## ar1 sar1 sar2
## 0.2768041 -0.5516272 -0.2635639
#forecast
arima2_forecast_n1725 <- forecast(arima_n1725_2, h=12)
plot(forecast(arima_n1725_2, h=12))

arima2_forecast_n1725 <- arima2_forecast_n1725$mean
# Errors
arima2_errors_n1725 <- n1725_test - arima2_forecast_n1725
#Error Metrics
arima2_MAE_n1725 <- mean(abs(arima2_errors_n1725))
arima2_MAPE_n1725 <- 100 * mean(abs(arima2_errors_n1725)/n1725_test)
#Seasonal Arima
# Plot ACF and PACF of first differences
tsdisplay(diff(n1725_train))

# Plot ACF and PACF of first and seasonal differences
tsdisplay(diff(diff(n1725_train),lag=12))

# SARIMA(3,0,4)(2,1,3)[12]
fit1_n1725 <- arima(n1725_train, order=c(3,0,4), seasonal=c(2,1,3))
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
tsdisplay(residuals(fit1_n1725))

fit1_forecast_n1725 <- forecast(fit1_n1725, h=12)
plot(forecast(fit1_n1725, h=12))

fit1_forecast_n1725 <- fit1_forecast_n1725$mean
# Errors
sarima1_errors_n1725 <- n1725_test - fit1_forecast_n1725
#Error Metrics
sarima1_MAE_n1725 <- mean(abs(sarima1_errors_n1725))
sarima1_MAPE_n1725 <- 100 * mean(abs(sarima1_errors_n1725)/n1725_test)
#Automatic order selection
# Auto.arima
fit2_n1725 <- auto.arima(n1725_train)
fit2_forecast_n1725 <- forecast(fit2_n1725, h=12)
plot(forecast(fit2_n1725, h=12))

fit2_forecast_n1725 <- fit2_forecast_n1725$mean
# Errors
sarima_errors_n1725 <- n1725_test - fit2_forecast_n1725
#Error Metrics
sarima_MAE_n1725 <- mean(abs(sarima_errors_n1725))
sarima_MAPE_n1725 <- 100 * mean(abs(sarima_errors_n1725)/n1725_test)
# Fitted values of auto.arima
fitted(fit2_n1725)
## Jan Feb Mar Apr May Jun Jul Aug
## 1984
## 1985 2857.111 2637.324 2737.217 2937.009 4035.902 3796.135 3636.288 4795.120
## 1986 2907.030 2588.392 2735.748 2964.212 4027.091 3889.399 3898.104 4717.237
## 1987 2702.455 2537.081 2784.630 2863.180 3872.594 3958.524 4124.865 3870.749
## 1988 2707.241 2409.198 2602.874 2663.958 3369.797 4043.081 3610.966 3668.262
## 1989 2448.429 2407.502 2458.168 2599.321 3565.859 3909.760 3735.170 3395.294
## 1990 2166.622 2506.420 2346.104 3105.010 3353.639 3533.026 3077.656 3431.922
## 1991 2317.249 2163.777 2470.930 3041.607 3339.360 2888.527 3384.795 3515.301
## 1992 2120.949 2053.598 2789.025 3082.796 3231.773 3159.651 3275.320 2866.440
## 1993 2199.154 1992.070 2710.408 2786.406 2865.124 2987.086 3627.859 2997.190
## 1994 2054.478 2023.829 2733.750
## Sep Oct Nov Dec
## 1984 3736.253 2877.106 2617.358
## 1985 4135.773 3654.304 2817.022 2662.577
## 1986 3805.921 3543.357 2779.320 2943.518
## 1987 3835.186 3265.928 2900.397 2651.749
## 1988 3610.299 3030.697 2908.377 2645.416
## 1989 3025.973 2738.608 2636.539 2495.228
## 1990 2752.332 2629.984 2364.344 2473.022
## 1991 2329.618 2701.379 2165.537 2468.873
## 1992 2576.273 2613.019 2289.364 2204.752
## 1993 2527.338 2976.697 2120.526 2428.445
## 1994
#calculate forecast for arima
arima_n1725_3 <- arima(n1725, order=c(1,0,0), seasonal = c(2,1,0))
tsdisplay(residuals(arima_n1725_3))

#extract coefficient
coef(arima_n1725_3)
## ar1 sar1 sar2
## 0.2885125 -0.6020891 -0.2879073
#forecast
ARIMA_forecast_n1725 <- forecast(arima_n1725_3, h=14)
plot(forecast(arima_n1725_3, h=14))

ARIMA for N1726
#KPSS test, the Null and Alternative Hypotheses are:
#H0: The data is stationary
#H1: The data is not stationary
kpss.test(n1726)
## Warning in kpss.test(n1726): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: n1726
## KPSS Level = 1.4102, Truncation lag parameter = 4, p-value = 0.01
#p value 0.01 (data is not stationary)
#ADF test, The null and alternative hypotheses are:
#H0: The data is not stationary.
#H1: The data is stationary.
adf.test(n1726)
## Warning in adf.test(n1726): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: n1726
## Dickey-Fuller = -5.1018, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)
#DIFFERENCING
diff_n1726 <- diff(n1726, lag=12)
plot(diff_n1726)

kpss.test(diff_n1726)
## Warning in kpss.test(diff_n1726): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff_n1726
## KPSS Level = 1.1438, Truncation lag parameter = 4, p-value = 0.01
#p value 0.01 (data is not stationary)
adf.test(diff_n1726)
## Warning in adf.test(diff_n1726): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_n1726
## Dickey-Fuller = -5.1869, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)
diff_n1726 <- diff(diff(n1726, lag=12))
plot(diff_n1726)

kpss.test(diff_n1726)
## Warning in kpss.test(diff_n1726): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff_n1726
## KPSS Level = 0.02404, Truncation lag parameter = 4, p-value = 0.1
#p value 0.01 (data is not stationary)
adf.test(diff_n1726)
## Warning in adf.test(diff_n1726): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_n1726
## Dickey-Fuller = -6.4816, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)
#train and test
hh <- 12
n1726_train <- ts(n1726[1:(n1726_length - hh)], frequency=frequency(n1726),
start=start(n1726))
n1726_test <- n1726[(n1726_length - hh + 1):n1726_length]
#AR process
ar1.n1726 <- ts(diff(diff(n1726_train)), frequency=12)
tsdisplay(ar1.n1726)

#AR2 data, 2 spikes on the PACF
#MA process
ma1.n1726 <- ts(diff(diff(n1726_train)), frequency = 12)
tsdisplay(ma1.n1726)

#MA1 data, 1 spike in the ACF graph
#calculate arima
arima_n1726_1 <- arima((n1726_train), order=c(2,2,1))
tsdisplay(residuals(arima_n1726_1))

#extract coefficient
coef(arima_n1726_1)
## ar1 ar2 ma1
## -0.39683705 -0.08890487 -0.99999932
#forecast
arima_forecast_n1726 <- forecast(arima_n1726_1, h=12)
plot(forecast(arima_n1726_1, h=12))

arima_forecast_n1726 <- arima_forecast_n1726$mean
# Errors
arima_errors_n1726 <- n1726_test - arima_forecast_n1726
#Error Metrics
arima_MAE_n1726 <- mean(abs(arima_errors_n1726))
arima_MAPE_n1726 <- 100 * mean(abs(arima_errors_n1726)/n1726_test)
# done by box jenkings method
arimaBJ_n1726_1 <- arima((n1726_train), order=c(2,2,0))
tsdisplay(residuals(arimaBJ_n1726_1))

#extract coefficient
coef(arimaBJ_n1726_1)
## ar1 ar2
## -0.9766507 -0.4986086
#Spikes in the residuals, so we try again until no significant spikes appear
arimaBJ_n1726_1 <- arima((n1726_train), order=c(3,2,5))
tsdisplay(residuals(arimaBJ_n1726_1))

#extract coefficient
coef(arimaBJ_n1726_1)
## ar1 ar2 ar3 ma1 ma2 ma3 ma4
## 0.7384934 0.7364873 -0.9843399 -2.6038678 1.4022215 1.8407955 -2.3984817
## ma5
## 0.7636000
#forecast
arimaBJ_forecast_n1726 <- forecast(arimaBJ_n1726_1, h=12)
plot(forecast(arimaBJ_n1726_1, h=12))

arimaBJ_forecast_n1726 <- arimaBJ_forecast_n1726$mean
# Errors
arima_errors_n1726 <- n1726_test - arimaBJ_forecast_n1726
#Error Metrics
arima_MAE_n1726 <- mean(abs(arima_errors_n1726))
arima_MAPE_n1726 <- 100 * mean(abs(arima_errors_n1726)/n1726_test)
#Automated arima
auto_arima_n1726 <- auto.arima(n1726)
#best method with AIC
auto.arima(n1726, ic="aic")
## Series: n1726
## ARIMA(2,1,1)(1,0,2)[12] with drift
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1 sma2 drift
## 0.1576 0.1199 -0.9104 0.8874 -0.6294 0.1645 -21.9005
## s.e. 0.1105 0.0991 0.0377 0.0602 0.1394 0.1427 22.5452
##
## sigma^2 = 433355: log likelihood = -990.45
## AIC=1996.89 AICc=1998.13 BIC=2019.52
#calculate arima 2
arima_n1726_2 <- arima(n1726_train, order=c(2,1,1), seasonal = c(1,0,2))
tsdisplay(residuals(arima_n1726_2))

#extract coefficient
coef(arima_n1726_2)
## ar1 ar2 ma1 sar1 sma1 sma2
## 0.1513244 0.1280471 -0.8976244 0.8897934 -0.6729108 0.2208005
#forecast
arima2_forecast_n1726 <- forecast(arima_n1726_2, h=12)
plot(forecast(arima_n1726_2, h=12))

arima2_forecast_n1726 <- arima2_forecast_n1726$mean
# Errors
arima2_errors_n1726 <- n1726_test - arima2_forecast_n1726
#Error Metrics
arima2_MAE_n1726 <- mean(abs(arima2_errors_n1726))
arima2_MAPE_n1726 <- 100 * mean(abs(arima2_errors_n1726)/n1726_test)
#Seasonal Arima
# Plot ACF and PACF of first differences
tsdisplay(diff(n1726_train))

# Plot ACF and PACF of first and seasonal differences
tsdisplay(diff(diff(n1726_train),lag=12))

# SARIMA(2,1,3)(1,1,4)[12]
fit1_n1726 <- Arima(n1726_train, order=c(2,1,3), seasonal=c(1,1,4))
tsdisplay(residuals(fit1_n1726))

fit1_forecast_n1726 <- forecast(fit1_n1726, h=12)
plot(forecast(fit1_n1726, h=12))

fit1_forecast_n1726 <- fit1_forecast_n1726$mean
# Errors
sarima1_errors_n1726 <- n1726_test - fit1_forecast_n1726
#Error Metrics
sarima1_MAE_n1726 <- mean(abs(sarima1_errors_n1726))
sarima1_MAPE_n1726 <- 100 * mean(abs(sarima1_errors_n1726)/n1726_test)
#Automatic order selection
# Auto.arima
fit2_n1726 <- auto.arima(n1726_train)
fit2_forecast_n1726 <- forecast(fit2_n1726, h=12)
plot(forecast(fit2_n1726, h=12))

fit2_forecast_n1726 <- fit2_forecast_n1726$mean
# Errors
sarima_errors_n1726 <- n1726_test - fit2_forecast_n1726
#Error Metrics
sarima_MAE_n1726 <- mean(abs(sarima_errors_n1726))
sarima_MAPE_n1726 <- 100 * mean(abs(sarima_errors_n1726)/n1726_test)
# Fitted values of auto.arima
fitted(fit2_n1726)
## Jan Feb Mar Apr May Jun Jul Aug
## 1984
## 1985 3659.363 3046.769 3554.200 3679.113 3733.285 4215.661 4768.083 5367.341
## 1986 3230.556 2367.471 4157.134 3437.085 3751.565 4701.185 5254.458 5630.973
## 1987 2282.994 1877.169 3570.505 2458.902 3150.023 4097.539 4076.360 4724.471
## 1988 1840.830 1295.797 2579.227 1679.439 2744.282 3460.053 3401.876 3508.929
## 1989 2026.409 1185.177 1939.128 1861.023 2298.558 2383.360 2883.963 2784.947
## 1990 1544.456 1208.532 1650.551 1592.713 1763.482 2284.260 2161.552 2936.883
## 1991 1436.422 1205.851 1571.471 1633.793 1903.915 1904.999 2434.148 2577.618
## 1992 1468.838 1367.970 2010.841 1635.070 1994.092 1923.067 2410.144 2366.583
## 1993 1914.214 1605.340 1984.793 2099.025 1671.239 2247.453 2326.148 2434.278
## 1994 1659.822 1613.570 1995.280
## Sep Oct Nov Dec
## 1984 4575.396 4006.962 3779.971
## 1985 4811.274 4564.364 3684.213 3717.764
## 1986 3723.429 3750.421 2565.619 2683.762
## 1987 2649.605 2537.116 1873.299 1356.776
## 1988 2559.599 1835.356 1672.308 1242.913
## 1989 1957.842 1862.389 1300.624 1656.134
## 1990 2097.379 1548.658 1445.846 1920.430
## 1991 1650.411 1537.436 1851.791 1820.152
## 1992 1894.249 2198.315 2007.081 1648.329
## 1993 2135.077 2481.636 1992.865 2147.528
## 1994
#calculate forecast for arima
arima_n1726_3 <- auto.arima(n1726)
tsdisplay(residuals(arima_n1726_3))

#extract coefficient
coef(arima_n1726_3)
## ar1 ar2 ma1 sar1 sma1 sma2
## 0.1575712 0.1198517 -0.9103883 0.8874044 -0.6293673 0.1644808
## drift
## -21.9004665
#forecast
ARIMA_forecast_n1726 <- forecast(arima_n1726_3, h=14)
plot(forecast(arima_n1726_3, h=14))

REGRESSION for N1725
#Convert to Quarterly Time Series starting from Q4 in 1984
N1725 <- ts(n1725[7:(n1725_length)], frequency=frequency(n1725),
start=start(n1725))
#Create Seasonal Dummies
M1_n1725 <- rep(c(1,0,0,0,0,0,0,0,0,0,0,0),10)
M2_n1725 <- rep(c(0,1,0,0,0,0,0,0,0,0,0,0),10)
M3_n1725 <- rep(c(0,0,1,0,0,0,0,0,0,0,0,0),10)
M4_n1725 <- rep(c(0,0,0,1,0,0,0,0,0,0,0,0),10)
M5_n1725 <- rep(c(0,0,0,0,1,0,0,0,0,0,0,0),10)
M6_n1725 <- rep(c(0,0,0,0,0,1,0,0,0,0,0,0),10)
M7_n1725 <- rep(c(0,0,0,0,0,0,1,0,0,0,0,0),10)
M8_n1725 <- rep(c(0,0,0,0,0,0,0,1,0,0,0,0),10)
M9_n1725 <- rep(c(0,0,0,0,0,0,0,0,1,0,0,0),10)
M10_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,1,0,0),10)
M11_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,0,1,0),10)
#M12_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,0,0,1),10)
N1725 <- cbind(N1725,M1_n1725,M2_n1725,M3_n1725,M4_n1725, M5_n1725, M6_n1725,M7_n1725, M8_n1725, M9_n1725, M10_n1725, M11_n1725) #M12_n1725
colnames(N1725) <- c("data","M1","M2","M3","M4",'M5','M6',"M7","M8","M9","M10",'M11' )#M12
#Use the Seasonal Dummies
fit3_n1725 <- lm(data ~ ., data=N1725)
summary(fit3_n1725)
##
## Call:
## lm(formula = data ~ ., data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1078 -265 -24 220 1242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2808.0 147.9 18.984 < 2e-16 ***
## M1 96.0 209.2 0.459 0.647199
## M2 596.0 209.2 2.849 0.005249 **
## M3 780.0 209.2 3.729 0.000308 ***
## M4 762.0 209.2 3.643 0.000416 ***
## M5 750.0 209.2 3.585 0.000507 ***
## M6 216.0 209.2 1.033 0.304091
## M7 204.0 209.2 0.975 0.331617
## M8 -226.0 209.2 -1.080 0.282361
## M9 -224.0 209.2 -1.071 0.286619
## M10 -354.0 209.2 -1.692 0.093464 .
## M11 -384.0 209.2 -1.836 0.069144 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 467.7 on 108 degrees of freedom
## Multiple R-squared: 0.4786, Adjusted R-squared: 0.4255
## F-statistic: 9.013 on 11 and 108 DF, p-value: 3.156e-11
plot(fit3_n1725)


## hat values (leverages) are all = 0.1
## and there are no factor predictors; no plot no. 5

#Lag 1 of Sales
L1_data <- lag(N1725[,"data"],-1)
N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L1_data)
colnames(N1725) <- c(N1725_colnames,"L1_data")
#Use seasonal dummies and lag
fit4_n1725 <- lm(data ~ ., data=N1725)
summary(fit4_n1725)
##
## Call:
## lm(formula = data ~ ., data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1082.47 -283.94 -5.14 167.01 1210.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1724.73329 249.87276 6.902 3.89e-10 ***
## M1 -65.10744 196.55048 -0.331 0.7411
## M2 381.49174 193.54216 1.971 0.0513 .
## M3 342.04564 207.35786 1.650 0.1020
## M4 241.81747 214.45380 1.128 0.2620
## M5 237.86153 213.71700 1.113 0.2682
## M6 -290.77576 213.23078 -1.364 0.1756
## M7 -64.13532 196.06600 -0.327 0.7442
## M8 -488.77262 195.78997 -2.496 0.0141 *
## M9 -294.60897 189.47260 -1.555 0.1230
## M10 -425.50275 189.48533 -2.246 0.0268 *
## M11 -397.40677 188.99017 -2.103 0.0379 *
## L1_data 0.44689 0.08711 5.130 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 422.6 on 106 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5823, Adjusted R-squared: 0.5351
## F-statistic: 12.32 on 12 and 106 DF, p-value: 2.63e-15
#Multicollinearity
# Correlation matrix for N1725 data
cor(N1725,use="c")
## data M1 M2 M3 M4
## data 1.000000000 -0.04315862 0.20168007 0.29199393 0.28315888
## M1 -0.043158622 1.00000000 -0.08663865 -0.08663865 -0.08663865
## M2 0.201680067 -0.08663865 1.00000000 -0.09174312 -0.09174312
## M3 0.291993932 -0.08663865 -0.09174312 1.00000000 -0.09174312
## M4 0.283158880 -0.08663865 -0.09174312 -0.09174312 1.00000000
## M5 0.277268846 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M6 0.015162302 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M7 0.009272267 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M8 -0.201787309 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M9 -0.200805636 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M10 -0.264614345 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M11 -0.279339432 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## L1_data 0.608672558 -0.10022847 -0.04308229 0.20235971 0.29268237
## M5 M6 M7 M8 M9
## data 0.27726885 0.01516230 0.009272267 -0.201787309 -0.20080564
## M1 -0.08663865 -0.08663865 -0.086638647 -0.086638647 -0.08663865
## M2 -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M3 -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M4 -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M5 1.00000000 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M6 -0.09174312 1.00000000 -0.091743119 -0.091743119 -0.09174312
## M7 -0.09174312 -0.09174312 1.000000000 -0.091743119 -0.09174312
## M8 -0.09174312 -0.09174312 -0.091743119 1.000000000 -0.09174312
## M9 -0.09174312 -0.09174312 -0.091743119 -0.091743119 1.00000000
## M10 -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M11 -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## L1_data 0.28384645 0.27795585 0.015823790 0.009933182 -0.20114694
## M10 M11 L1_data
## data -0.26461435 -0.27933943 0.608672558
## M1 -0.08663865 -0.08663865 -0.100228471
## M2 -0.09174312 -0.09174312 -0.043082290
## M3 -0.09174312 -0.09174312 0.202359709
## M4 -0.09174312 -0.09174312 0.292682365
## M5 -0.09174312 -0.09174312 0.283846453
## M6 -0.09174312 -0.09174312 0.277955845
## M7 -0.09174312 -0.09174312 0.015823790
## M8 -0.09174312 -0.09174312 0.009933182
## M9 -0.09174312 -0.09174312 -0.201146937
## M10 1.00000000 -0.09174312 -0.200165169
## M11 -0.09174312 1.00000000 -0.263980089
## L1_data -0.20016517 -0.26398009 1.000000000
#VIF for fit4
vif(fit4_n1725)
## M1 M2 M3 M4 M5 M6 M7 M8
## 1.799994 1.921610 2.205744 2.359292 2.343108 2.332459 1.972054 1.966505
## M9 M10 M11 L1_data
## 1.841649 1.841897 1.832283 1.925234
#Analyse residuals of multiple components
tsdisplay(residuals(fit4_n1725))

#Lag 12 of Sales
L12_data <- lag(N1725[,"data"],-12)
N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L12_data)
colnames(N1725) <- c(N1725_colnames,"L12_data")
#Use seasonal dummies and lag
fit5_n1725 <- lm(data ~ ., data=N1725)
summary(fit5_n1725)
##
## Call:
## lm(formula = data ~ ., data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -965.36 -243.63 -8.17 186.20 1213.60
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1699.53170 328.61405 5.172 1.3e-06 ***
## M1 -26.88211 197.48089 -0.136 0.892013
## M2 317.19963 205.27656 1.545 0.125651
## M3 380.89091 220.14659 1.730 0.086882 .
## M4 300.78126 228.29686 1.318 0.190873
## M5 156.96437 228.68033 0.686 0.494156
## M6 -263.43219 217.29564 -1.212 0.228428
## M7 -36.43339 200.08539 -0.182 0.855905
## M8 -415.58046 202.65599 -2.051 0.043083 *
## M9 -302.83900 195.44882 -1.549 0.124633
## M10 -385.58356 197.07519 -1.957 0.053370 .
## M11 -356.55724 197.21290 -1.808 0.073807 .
## L1_data 0.34898 0.09792 3.564 0.000577 ***
## L12_data 0.08888 0.08848 1.005 0.317712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 411.7 on 94 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.5766, Adjusted R-squared: 0.5181
## F-statistic: 9.849 on 13 and 94 DF, p-value: 1.184e-12
#Fit Regression with Seasonally Dummies, and lag 12
fit6_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data, data=N1725)
summary(fit6_n1725)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L12_data, data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -974.61 -256.40 -23.02 210.99 1281.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2336.57547 292.22855 7.996 3.07e-12 ***
## M1 96.01280 206.07319 0.466 0.64234
## M2 446.56788 214.12193 2.086 0.03970 *
## M3 648.42116 219.32598 2.956 0.00393 **
## M4 651.19417 218.35651 2.982 0.00364 **
## M5 504.69032 219.18549 2.303 0.02349 *
## M6 74.96144 207.13744 0.362 0.71824
## M7 124.95192 206.54654 0.605 0.54665
## M8 -213.95339 206.23379 -1.037 0.30217
## M9 -239.54965 206.27841 -1.161 0.24843
## M10 -319.81291 207.94105 -1.538 0.12737
## M11 -323.86937 208.77864 -1.551 0.12417
## L12_data 0.16056 0.09132 1.758 0.08192 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 436.4 on 95 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.5194, Adjusted R-squared: 0.4587
## F-statistic: 8.558 on 12 and 95 DF, p-value: 8.695e-11
#Lag 3 of Sales
L3_data <- lag(N1725[,"data"],-3)
N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L3_data)
colnames(N1725) <- c(N1725_colnames,"L3_data")
#Fit Regression with Seasonally Dummies, log (lag 3), lag 1, and lag 12
fit7_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data + L1_data + I(log(L3_data)), data=N1725)
summary(fit7_n1725)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L12_data + L1_data + I(log(L3_data)), data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -907.96 -261.99 -19.84 193.67 1226.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.708e+03 2.335e+03 -0.732 0.46626
## M1 3.073e+00 1.973e+02 0.016 0.98761
## M2 3.720e+02 2.074e+02 1.794 0.07607 .
## M3 3.860e+02 2.188e+02 1.764 0.08097 .
## M4 2.987e+02 2.269e+02 1.317 0.19122
## M5 8.960e+01 2.318e+02 0.387 0.70001
## M6 -3.744e+02 2.287e+02 -1.637 0.10498
## M7 -1.687e+02 2.181e+02 -0.773 0.44135
## M8 -5.346e+02 2.170e+02 -2.464 0.01559 *
## M9 -3.620e+02 1.983e+02 -1.825 0.07120 .
## M10 -4.573e+02 2.018e+02 -2.266 0.02576 *
## M11 -3.642e+02 1.961e+02 -1.857 0.06641 .
## L12_data 7.202e-02 8.868e-02 0.812 0.41874
## L1_data 3.102e-01 1.008e-01 3.077 0.00275 **
## I(log(L3_data)) 4.534e+02 3.076e+02 1.474 0.14384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 409.2 on 93 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.5863, Adjusted R-squared: 0.524
## F-statistic: 9.415 on 14 and 93 DF, p-value: 1.441e-12
#FORECASTING
# Split the data into train and test sets
N1725_train <- window(N1725, start(N1725), (1993+8/12))
#we have to sustitude all NA with 0
N1725_train[is.na(N1725_train)] = 0
N1725_test <- window(N1725, (1993+8/12), end(N1725))
plot(N1725[,"data"])

plot(N1725_train[,"data"])

plot(N1725_test[,"data"])

# Regression with no variables
N1725_Model0 <- lm(data ~ 1, data=N1725_train)
# Regression with all variables
N1725_Model1 <- lm(data ~ ., data=N1725_train)
# Summary of Regressions
summary(N1725_Model0)
##
## Call:
## lm(formula = data ~ 1, data = N1725_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1077.78 -462.78 -67.78 422.22 1802.22
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2997.78 61.42 48.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 638.3 on 107 degrees of freedom
summary(N1725_Model1)
##
## Call:
## lm(formula = data ~ ., data = N1725_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1004.75 -234.18 -65.78 213.71 1079.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2133.53923 280.84131 7.597 2.33e-11 ***
## M1 184.76961 209.10399 0.884 0.37918
## M2 622.13924 221.22987 2.812 0.00600 **
## M3 638.12662 232.03975 2.750 0.00716 **
## M4 518.97581 233.11257 2.226 0.02841 *
## M5 495.56310 230.17843 2.153 0.03391 *
## M6 -147.19417 230.67342 -0.638 0.52497
## M7 -43.20402 218.33680 -0.198 0.84357
## M8 -439.01776 218.66804 -2.008 0.04758 *
## M9 -283.20869 208.02217 -1.361 0.17667
## M10 -463.13098 207.86946 -2.228 0.02829 *
## M11 -448.43284 206.00738 -2.177 0.03203 *
## L1_data 0.28960 0.08759 3.306 0.00134 **
## L12_data -0.11835 0.04122 -2.871 0.00507 **
## L3_data 0.09217 0.07690 1.199 0.23375
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 436 on 93 degrees of freedom
## Multiple R-squared: 0.5945, Adjusted R-squared: 0.5334
## F-statistic: 9.738 on 14 and 93 DF, p-value: 6.178e-13
# AIC forward selection
N1725_Model2 <- step(N1725_Model0, formula(N1725_Model1), direction="forward")
## Start: AIC=1396.09
## data ~ 1
##
## Df Sum of Sq RSS AIC
## + L1_data 1 13819646 29773421 1356.9
## + M11 1 3693382 39899685 1388.5
## + M3 1 3666667 39926400 1388.6
## + M5 1 3613527 39979539 1388.8
## + M4 1 3302836 40290230 1389.6
## + M10 1 3030303 40562764 1390.3
## + M2 1 1806012 41787055 1393.5
## + M9 1 1501867 42091200 1394.3
## + M8 1 1451103 42141964 1394.4
## <none> 43593067 1396.1
## + L12_data 1 158039 43435027 1397.7
## + L3_data 1 89757 43503310 1397.9
## + M1 1 73745 43519321 1397.9
## + M6 1 19394 43573673 1398.0
## + M7 1 3103 43589964 1398.1
##
## Step: AIC=1356.92
## data ~ L1_data
##
## Df Sum of Sq RSS AIC
## + M2 1 2069919 27703501 1351.1
## + L3_data 1 1699908 28073513 1352.6
## + M8 1 1490851 28282570 1353.4
## + M3 1 1449876 28323545 1353.5
## + M10 1 1372365 28401056 1353.8
## + M11 1 1215930 28557490 1354.4
## + M5 1 916197 28857224 1355.5
## + M6 1 839455 28933965 1355.8
## + M4 1 675595 29097826 1356.4
## <none> 29773421 1356.9
## + M9 1 435975 29337445 1357.3
## + M1 1 355314 29418107 1357.6
## + M7 1 29652 29743768 1358.8
## + L12_data 1 4513 29768907 1358.9
##
## Step: AIC=1351.13
## data ~ L1_data + M2
##
## Df Sum of Sq RSS AIC
## + M3 1 1783156 25920345 1346.0
## + M8 1 1198705 26504796 1348.4
## + M5 1 1181534 26521967 1348.4
## + M10 1 1077437 26626065 1348.8
## + M11 1 930303 26773198 1349.4
## + M4 1 904799 26798702 1349.5
## + L3_data 1 778328 26925174 1350.1
## + M6 1 629829 27073672 1350.7
## + M1 1 551711 27151791 1351.0
## <none> 27703501 1351.1
## + M9 1 275193 27428309 1352.1
## + L12_data 1 52376 27651126 1352.9
## + M7 1 1816 27701685 1353.1
##
## Step: AIC=1345.95
## data ~ L1_data + M2 + M3
##
## Df Sum of Sq RSS AIC
## + M5 1 1735789 24184556 1340.5
## + M4 1 1404881 24515464 1341.9
## + M8 1 927705 24992640 1344.0
## + M10 1 894407 25025938 1344.2
## + M11 1 791320 25129025 1344.6
## + M1 1 672858 25247487 1345.1
## <none> 25920345 1346.0
## + M6 1 340975 25579370 1346.5
## + L3_data 1 207476 25712869 1347.1
## + M9 1 185162 25735183 1347.2
## + L12_data 1 163599 25756746 1347.3
## + M7 1 10404 25909941 1347.9
##
## Step: AIC=1340.46
## data ~ L1_data + M2 + M3 + M5
##
## Df Sum of Sq RSS AIC
## + M4 1 2272613 21911942 1331.8
## + M1 1 791035 23393521 1338.9
## + M10 1 724660 23459896 1339.2
## + M11 1 676247 23508309 1339.4
## + M8 1 652095 23532461 1339.5
## <none> 24184556 1340.5
## + L12_data 1 349792 23834763 1340.9
## + L3_data 1 228147 23956408 1341.4
## + M9 1 111101 24073455 1342.0
## + M6 1 93614 24090942 1342.0
## + M7 1 76912 24107643 1342.1
##
## Step: AIC=1331.8
## data ~ L1_data + M2 + M3 + M5 + M4
##
## Df Sum of Sq RSS AIC
## + M1 1 967581 20944361 1328.9
## + L12_data 1 835730 21076212 1329.6
## + M11 1 533992 21377950 1331.1
## + M10 1 516252 21395690 1331.2
## <none> 21911942 1331.8
## + M8 1 337118 21574824 1332.1
## + M7 1 287047 21624895 1332.4
## + M9 1 38703 21873239 1333.6
## + M6 1 15566 21896377 1333.7
## + L3_data 1 12271 21899671 1333.7
##
## Step: AIC=1328.93
## data ~ L1_data + M2 + M3 + M5 + M4 + M1
##
## Df Sum of Sq RSS AIC
## + L12_data 1 953403 19990958 1325.9
## + M7 1 437715 20506646 1328.7
## <none> 20944361 1328.9
## + M10 1 324743 20619617 1329.2
## + M11 1 322977 20621384 1329.2
## + M8 1 210580 20733781 1329.8
## + L3_data 1 47484 20896877 1330.7
## + M6 1 42692 20901669 1330.7
## + M9 1 1764 20942596 1330.9
##
## Step: AIC=1325.9
## data ~ L1_data + M2 + M3 + M5 + M4 + M1 + L12_data
##
## Df Sum of Sq RSS AIC
## + M7 1 567306 19423651 1324.8
## + M11 1 436543 19554415 1325.5
## + M10 1 406645 19584313 1325.7
## <none> 19990958 1325.9
## + L3_data 1 246813 19744145 1326.5
## + M8 1 229463 19761495 1326.7
## + M6 1 92910 19898048 1327.4
## + M9 1 2736 19988222 1327.9
##
## Step: AIC=1324.79
## data ~ L1_data + M2 + M3 + M5 + M4 + M1 + L12_data + M7
##
## Df Sum of Sq RSS AIC
## <none> 19423651 1324.8
## + M11 1 314075 19109576 1325.0
## + M10 1 281016 19142635 1325.2
## + M6 1 258713 19164938 1325.3
## + L3_data 1 156275 19267377 1325.9
## + M8 1 122888 19300764 1326.1
## + M9 1 4369 19419282 1326.8
#backward
N1725_Model3 <- step(N1725_Model1, formula(N1725_Model0), direction="backward")
## Start: AIC=1326.62
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 +
## L1_data + L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M7 1 7443 17685367 1324.7
## - M6 1 77399 17755323 1325.1
## - M1 1 148417 17826341 1325.5
## - L3_data 1 273057 17950981 1326.3
## <none> 17677924 1326.6
## - M9 1 352324 18030248 1326.8
## - M8 1 766199 18444123 1329.2
## - M5 1 881083 18559007 1329.9
## - M11 1 900695 18578618 1330.0
## - M4 1 942131 18620054 1330.2
## - M10 1 943571 18621495 1330.2
## - M3 1 1437597 19115521 1333.1
## - M2 1 1503267 19181190 1333.4
## - L12_data 1 1566802 19244726 1333.8
## - L1_data 1 2077800 19755724 1336.6
##
## Step: AIC=1324.66
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M6 1 76928 17762295 1323.1
## - M1 1 219055 17904422 1324.0
## - L3_data 1 266653 17952020 1324.3
## <none> 17685367 1324.7
## - M9 1 410167 18095534 1325.1
## - M8 1 988298 18673665 1328.5
## - M11 1 1062586 18747952 1329.0
## - M10 1 1160902 18846269 1329.5
## - M4 1 1375224 19060590 1330.8
## - M5 1 1383944 19069311 1330.8
## - L12_data 1 1568759 19254126 1331.8
## - M3 1 1945972 19631339 1333.9
## - M2 1 1951974 19637341 1334.0
## - L1_data 1 2083315 19768682 1334.7
##
## Step: AIC=1323.13
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - L3_data 1 261807 18024101 1322.7
## - M1 1 306502 18068796 1323.0
## <none> 17762295 1323.1
## - M9 1 342214 18104508 1323.2
## - M8 1 916304 18678599 1326.6
## - M11 1 985892 18748187 1327.0
## - M10 1 1085102 18847396 1327.5
## - L12_data 1 1601481 19363775 1330.5
## - M4 1 1967694 19729989 1332.5
## - M5 1 2025065 19787359 1332.8
## - L1_data 1 2044312 19806607 1332.9
## - M2 1 2412251 20174546 1334.9
## - M3 1 2583984 20346279 1335.8
##
## Step: AIC=1322.71
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data +
## L12_data
##
## Df Sum of Sq RSS AIC
## - M1 1 167130 18191232 1321.7
## <none> 18024101 1322.7
## - M9 1 337437 18361539 1322.7
## - M8 1 777241 18801342 1325.3
## - M10 1 1090769 19114870 1327.1
## - M11 1 1128923 19153025 1327.3
## - L12_data 1 1373560 19397661 1328.6
## - M4 1 1713012 19737113 1330.5
## - M5 1 1911027 19935129 1331.6
## - M2 1 2205528 20229629 1333.2
## - M3 1 2369049 20393150 1334.0
## - L1_data 1 3579231 21603333 1340.3
##
## Step: AIC=1321.71
## data ~ M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## <none> 18191232 1321.7
## - M9 1 523941 18715173 1322.8
## - M8 1 1026031 19217262 1325.6
## - L12_data 1 1375593 19566825 1327.6
## - M10 1 1454838 19646070 1328.0
## - M11 1 1524210 19715441 1328.4
## - M4 1 1580397 19771628 1328.7
## - M5 1 1771797 19963029 1329.7
## - M2 1 2039781 20231012 1331.2
## - M3 1 2214465 20405696 1332.1
## - L1_data 1 3414108 21605339 1338.3
#both
N1725_Model4 <- step(N1725_Model1, direction="both")
## Start: AIC=1326.62
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 +
## L1_data + L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M7 1 7443 17685367 1324.7
## - M6 1 77399 17755323 1325.1
## - M1 1 148417 17826341 1325.5
## - L3_data 1 273057 17950981 1326.3
## <none> 17677924 1326.6
## - M9 1 352324 18030248 1326.8
## - M8 1 766199 18444123 1329.2
## - M5 1 881083 18559007 1329.9
## - M11 1 900695 18578618 1330.0
## - M4 1 942131 18620054 1330.2
## - M10 1 943571 18621495 1330.2
## - M3 1 1437597 19115521 1333.1
## - M2 1 1503267 19181190 1333.4
## - L12_data 1 1566802 19244726 1333.8
## - L1_data 1 2077800 19755724 1336.6
##
## Step: AIC=1324.66
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M6 1 76928 17762295 1323.1
## - M1 1 219055 17904422 1324.0
## - L3_data 1 266653 17952020 1324.3
## <none> 17685367 1324.7
## - M9 1 410167 18095534 1325.1
## + M7 1 7443 17677924 1326.6
## - M8 1 988298 18673665 1328.5
## - M11 1 1062586 18747952 1329.0
## - M10 1 1160902 18846269 1329.5
## - M4 1 1375224 19060590 1330.8
## - M5 1 1383944 19069311 1330.8
## - L12_data 1 1568759 19254126 1331.8
## - M3 1 1945972 19631339 1333.9
## - M2 1 1951974 19637341 1334.0
## - L1_data 1 2083315 19768682 1334.7
##
## Step: AIC=1323.13
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - L3_data 1 261807 18024101 1322.7
## - M1 1 306502 18068796 1323.0
## <none> 17762295 1323.1
## - M9 1 342214 18104508 1323.2
## + M6 1 76928 17685367 1324.7
## + M7 1 6972 17755323 1325.1
## - M8 1 916304 18678599 1326.6
## - M11 1 985892 18748187 1327.0
## - M10 1 1085102 18847396 1327.5
## - L12_data 1 1601481 19363775 1330.5
## - M4 1 1967694 19729989 1332.5
## - M5 1 2025065 19787359 1332.8
## - L1_data 1 2044312 19806607 1332.9
## - M2 1 2412251 20174546 1334.9
## - M3 1 2583984 20346279 1335.8
##
## Step: AIC=1322.71
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data +
## L12_data
##
## Df Sum of Sq RSS AIC
## - M1 1 167130 18191232 1321.7
## <none> 18024101 1322.7
## - M9 1 337437 18361539 1322.7
## + L3_data 1 261807 17762295 1323.1
## + M6 1 72081 17952020 1324.3
## + M7 1 30201 17993900 1324.5
## - M8 1 777241 18801342 1325.3
## - M10 1 1090769 19114870 1327.1
## - M11 1 1128923 19153025 1327.3
## - L12_data 1 1373560 19397661 1328.6
## - M4 1 1713012 19737113 1330.5
## - M5 1 1911027 19935129 1331.6
## - M2 1 2205528 20229629 1333.2
## - M3 1 2369049 20393150 1334.0
## - L1_data 1 3579231 21603333 1340.3
##
## Step: AIC=1321.71
## data ~ M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## <none> 18191232 1321.7
## + M1 1 167130 18024101 1322.7
## - M9 1 523941 18715173 1322.8
## + M6 1 134989 18056243 1322.9
## + L3_data 1 122435 18068796 1323.0
## + M7 1 1116 18190115 1323.7
## - M8 1 1026031 19217262 1325.6
## - L12_data 1 1375593 19566825 1327.6
## - M10 1 1454838 19646070 1328.0
## - M11 1 1524210 19715441 1328.4
## - M4 1 1580397 19771628 1328.7
## - M5 1 1771797 19963029 1329.7
## - M2 1 2039781 20231012 1331.2
## - M3 1 2214465 20405696 1332.1
## - L1_data 1 3414108 21605339 1338.3
#fit 8
fit8_n1725 <- lm(data ~ L12_data + L1_data + I(log(L3_data)), data=N1725)
summary(fit8_n1725)
##
## Call:
## lm(formula = data ~ L12_data + L1_data + I(log(L3_data)), data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1006.81 -284.37 -14.82 234.09 1360.30
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3022.22182 1857.83620 1.627 0.107
## L12_data 0.33584 0.07979 4.209 5.46e-05 ***
## L1_data 0.43258 0.09246 4.679 8.73e-06 ***
## I(log(L3_data)) -296.38379 241.08539 -1.229 0.222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 446.2 on 104 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.45, Adjusted R-squared: 0.4341
## F-statistic: 28.36 on 3 and 104 DF, p-value: 1.756e-13
#fit 9
fit9_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + L3_data, data=N1725)
summary(fit9_n1725)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L1_data + L3_data, data = N1725)
##
## Residuals:
## Min 1Q Median 3Q Max
## -989.79 -229.65 -5.88 182.24 1179.88
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1379.56041 293.05541 4.708 7.84e-06 ***
## M1 -12.75880 194.33619 -0.066 0.94778
## M2 384.99677 197.66961 1.948 0.05418 .
## M3 374.79063 206.26992 1.817 0.07213 .
## M4 258.63126 210.94132 1.226 0.22296
## M5 154.30367 213.44558 0.723 0.47137
## M6 -411.64107 216.70518 -1.900 0.06029 .
## M7 -218.25102 205.04266 -1.064 0.28963
## M8 -641.33658 204.53929 -3.136 0.00224 **
## M9 -370.94982 189.12440 -1.961 0.05253 .
## M10 -499.32658 188.92412 -2.643 0.00950 **
## M11 -394.94176 185.34797 -2.131 0.03548 *
## L1_data 0.37794 0.09212 4.103 8.19e-05 ***
## L3_data 0.19826 0.09154 2.166 0.03262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 414.4 on 103 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.5937, Adjusted R-squared: 0.5425
## F-statistic: 11.58 on 13 and 103 DF, p-value: 5.506e-15
#Forecast from Model 1
N1725_Model1Forecast <- predict(N1725_Model1, N1725_test)
plot(N1725_Model1Forecast)

plot.ts(N1725_Model1Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])
model1_errors_n1725 <- N1725_test_data - N1725_Model1Forecast
#eliminate NAs
model1_errors_n1725 <- ts(model1_errors_n1725[1:13], frequency=frequency(model1_errors_n1725),
start=start(model1_errors_n1725))
View(model1_errors_n1725)
#Error Metrics
model1_MAE_n1725 <- mean(abs(model1_errors_n1725))
model1_MAPE_n1725 <- 100 * mean(abs(model1_errors_n1725)/N1725_test_data)
#Forecast from fit 8 and 9
N1725_fit8Forecast <- predict(fit8_n1725, N1725_test)
plot(N1725_fit8Forecast)

plot.ts(N1725_fit8Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])
fit8_errors_n1725 <- N1725_test_data - N1725_fit8Forecast
#eliminate NAs
fit8_errors_n1725 <- ts(fit8_errors_n1725[1:13], frequency=frequency(fit8_errors_n1725),
start=start(fit8_errors_n1725))
#Error Metrics
fit8_MAE_n1725 <- mean(abs(fit8_errors_n1725))
fit8_MAPE_n1725 <- 100 * mean(abs(fit8_errors_n1725)/N1725_test_data)
#fit 9
N1725_fit9Forecast <- predict(fit9_n1725, N1725_test)
plot(N1725_fit9Forecast)

plot.ts(N1725_fit9Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])
fit9_errors_n1725 <- N1725_test_data - N1725_fit9Forecast
#eliminate NAs
fit9_errors_n1725 <- ts(fit9_errors_n1725[1:13], frequency=frequency(fit9_errors_n1725),
start=start(fit9_errors_n1725))
#Error Metrics
fit9_MAE_n1725 <- mean(abs(fit9_errors_n1725))
fit9_MAPE_n1725 <- 100 * mean(abs(fit9_errors_n1725)/N1725_test_data)
#FORECAST FOR REGRESSION
regression_forecast_n1725 <- predict(fit9_n1725, N1725_test)
plot(regression_forecast_n1725)

plot.ts(regression_forecast_n1725)

REGRESSION for N1726
#Convert to monthly Time Series starting from March in 1985
N1726 <- ts(n1726[7:(n1726_length)], frequency=frequency(n1726),
start=start(n1726))
#Create Seasonal Dummies
M1_n1726 <- rep(c(1,0,0,0,0,0,0,0,0,0,0,0),10)
M2_n1726 <- rep(c(0,1,0,0,0,0,0,0,0,0,0,0),10)
M3_n1726 <- rep(c(0,0,1,0,0,0,0,0,0,0,0,0),10)
M4_n1726 <- rep(c(0,0,0,1,0,0,0,0,0,0,0,0),10)
M5_n1726 <- rep(c(0,0,0,0,1,0,0,0,0,0,0,0),10)
M6_n1726 <- rep(c(0,0,0,0,0,1,0,0,0,0,0,0),10)
M7_n1726 <- rep(c(0,0,0,0,0,0,1,0,0,0,0,0),10)
M8_n1726 <- rep(c(0,0,0,0,0,0,0,1,0,0,0,0),10)
M9_n1726 <- rep(c(0,0,0,0,0,0,0,0,1,0,0,0),10)
M10_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,1,0,0),10)
M11_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,0,1,0),10)
#M12_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,0,0,1),10)
N1726 <- cbind(N1726,M1_n1726,M2_n1726,M3_n1726,M4_n1726, M5_n1726, M6_n1726,M7_n1726, M8_n1726, M9_n1726, M10_n1726, M11_n1726) #M12_n1726
colnames(N1726) <- c("data","M1","M2","M3","M4",'M5','M6',"M7","M8","M9","M10",'M11' )#M12
#Use the Seasonal Dummies
fit3_n1726 <- lm(data ~ ., data=N1726)
summary(fit3_n1726)
##
## Call:
## lm(formula = data ~ ., data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1872.0 -476.5 -119.0 344.0 4088.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2300.0 304.7 7.549 1.48e-11 ***
## M1 -10.0 430.9 -0.023 0.98153
## M2 296.0 430.9 0.687 0.49359
## M3 802.0 430.9 1.861 0.06543 .
## M4 1166.0 430.9 2.706 0.00792 **
## M5 1092.0 430.9 2.534 0.01270 *
## M6 312.0 430.9 0.724 0.47059
## M7 198.0 430.9 0.460 0.64680
## M8 -262.0 430.9 -0.608 0.54444
## M9 -198.0 430.9 -0.460 0.64680
## M10 -406.0 430.9 -0.942 0.34818
## M11 -610.0 430.9 -1.416 0.15976
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 963.5 on 108 degrees of freedom
## Multiple R-squared: 0.2638, Adjusted R-squared: 0.1888
## F-statistic: 3.518 on 11 and 108 DF, p-value: 0.0003109
#Lag 1 of Sales
L1_data <- lag(N1726[,"data"],-1)
N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L1_data)
colnames(N1726) <- c(N1726_colnames,"L1_data")
#Use seasonal dummies and lag
fit4_n1726 <- lm(data ~ ., data=N1726)
summary(fit4_n1726)
##
## Call:
## lm(formula = data ~ ., data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1437.5 -401.7 -90.0 410.5 2075.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1148.73909 250.36160 4.588 1.23e-05 ***
## M1 -572.51624 323.68639 -1.769 0.0798 .
## M2 -112.73168 315.15050 -0.358 0.7213
## M3 184.81516 318.68811 0.580 0.5632
## M4 204.11811 327.51960 0.623 0.5345
## M5 -117.84578 336.02882 -0.351 0.7265
## M6 -847.43554 334.16026 -2.536 0.0127 *
## M7 -430.08435 318.91131 -1.349 0.1803
## M8 -812.42533 317.40324 -2.560 0.0119 *
## M9 -435.06438 313.30089 -1.389 0.1679
## M10 -686.66242 313.67836 -2.189 0.0308 *
## M11 -748.96877 312.68305 -2.395 0.0184 *
## L1_data 0.68122 0.06975 9.766 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 698.5 on 106 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.6166, Adjusted R-squared: 0.5732
## F-statistic: 14.21 on 12 and 106 DF, p-value: < 2.2e-16
#Multicollinearity
# Correlation matrix for N1726 data
cor(N1726,use="c")
## data M1 M2 M3 M4 M5
## data 1.00000000 -0.09369989 0.03051685 0.17447966 0.27804183 0.25698798
## M1 -0.09369989 1.00000000 -0.08663865 -0.08663865 -0.08663865 -0.08663865
## M2 0.03051685 -0.08663865 1.00000000 -0.09174312 -0.09174312 -0.09174312
## M3 0.17447966 -0.08663865 -0.09174312 1.00000000 -0.09174312 -0.09174312
## M4 0.27804183 -0.08663865 -0.09174312 -0.09174312 1.00000000 -0.09174312
## M5 0.25698798 -0.08663865 -0.09174312 -0.09174312 -0.09174312 1.00000000
## M6 0.03506903 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M7 0.00263472 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M8 -0.12824056 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M9 -0.11003183 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M10 -0.16921022 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M11 -0.22725056 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## L1_data 0.70513338 -0.05457994 -0.05936873 0.02727868 0.17055840 0.27362919
## M6 M7 M8 M9 M10
## data 0.03506903 0.00263472 -0.1282405621 -0.11003183 -0.16921022
## M1 -0.08663865 -0.08663865 -0.0866386473 -0.08663865 -0.08663865
## M2 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M3 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M4 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M5 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M6 1.00000000 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M7 -0.09174312 1.00000000 -0.0917431193 -0.09174312 -0.09174312
## M8 -0.09174312 -0.09174312 1.0000000000 -0.09174312 -0.09174312
## M9 -0.09174312 -0.09174312 -0.0917431193 1.00000000 -0.09174312
## M10 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 1.00000000
## M11 -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## L1_data 0.25267524 0.03180927 -0.0004711427 -0.13072543 -0.11260310
## M11 L1_data
## data -0.22725056 0.7051333790
## M1 -0.08663865 -0.0545799400
## M2 -0.09174312 -0.0593687349
## M3 -0.09174312 0.0272786844
## M4 -0.09174312 0.1705584040
## M5 -0.09174312 0.2736291904
## M6 -0.09174312 0.2526752393
## M7 -0.09174312 0.0318092685
## M8 -0.09174312 -0.0004711427
## M9 -0.09174312 -0.1307254332
## M10 -0.09174312 -0.1126030971
## M11 1.00000000 -0.1715006893
## L1_data -0.17150069 1.0000000000
#VIF for fit4
vif(fit4_n1726)
## M1 M2 M3 M4 M5 M6 M7 M8
## 1.786730 1.864821 1.906922 2.014076 2.120090 2.096577 1.909594 1.891577
## M9 M10 M11 L1_data
## 1.842997 1.847440 1.835735 1.358051
#Analyse residuals of multiple components
tsdisplay(residuals(fit4_n1726))

#Lag 12 of Sales
L12_data <- lag(N1726[,"data"],-12)
N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L12_data)
colnames(N1726) <- c(N1726_colnames,"L12_data")
#Use seasonal dummies and lag
fit5_n1726 <- lm(data ~ ., data=N1726)
summary(fit5_n1726)
##
## Call:
## lm(formula = data ~ ., data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1170.71 -351.44 -37.35 328.49 1528.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1068.76716 236.19775 4.525 1.77e-05 ***
## M1 -71.74741 277.65344 -0.258 0.797
## M2 124.51238 271.26577 0.459 0.647
## M3 324.05966 275.09612 1.178 0.242
## M4 428.31641 284.72091 1.504 0.136
## M5 165.42520 294.01085 0.563 0.575
## M6 -4.97982 296.01021 -0.017 0.987
## M7 -9.52178 278.45551 -0.034 0.973
## M8 -211.09958 282.13153 -0.748 0.456
## M9 -31.79719 271.00590 -0.117 0.907
## M10 -192.14422 278.53420 -0.690 0.492
## M11 -318.57196 276.03397 -1.154 0.251
## L1_data 0.13239 0.11090 1.194 0.236
## L12_data 0.35960 0.08163 4.406 2.80e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 567.4 on 94 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.566, Adjusted R-squared: 0.506
## F-statistic: 9.43 on 13 and 94 DF, p-value: 3.45e-12
#Fit Regression with Seasonally Dummies, and lag 12
fit6_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data, data=N1726)
summary(fit6_n1726)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L12_data, data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1228.54 -357.45 -47.29 357.87 1596.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1124.34889 232.08128 4.845 4.92e-06 ***
## M1 17.16741 268.07545 0.064 0.9491
## M2 173.85585 268.69767 0.647 0.5192
## M3 374.50979 272.43848 1.375 0.1725
## M4 508.09584 277.38594 1.832 0.0701 .
## M5 280.51867 278.37593 1.008 0.3162
## M6 143.75633 269.11402 0.534 0.5945
## M7 81.12278 268.50247 0.302 0.7632
## M8 -104.89621 268.33735 -0.391 0.6967
## M9 19.26833 268.20706 0.072 0.9429
## M10 -103.75633 269.11402 -0.386 0.7007
## M11 -248.32208 270.29163 -0.919 0.3606
## L12_data 0.42792 0.05833 7.336 7.36e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 568.7 on 95 degrees of freedom
## (25 observations deleted due to missingness)
## Multiple R-squared: 0.5594, Adjusted R-squared: 0.5038
## F-statistic: 10.05 on 12 and 95 DF, p-value: 2.008e-12
#Lag 3 of Sales
L3_data <- lag(N1726[,"data"],-3)
N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L3_data)
colnames(N1726) <- c(N1726_colnames,"L3_data")
#Fit Regression with Seasonally Dummies, log (lag 3), lag 1, and lag 12
fit7_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data + L1_data + I(log(L3_data)), data=N1726)
summary(fit7_n1726)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L12_data + L1_data + I(log(L3_data)), data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1236.03 -410.89 -9.51 349.91 1371.60
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.823e+03 1.658e+03 -2.306 0.023342 *
## M1 5.001e+01 2.698e+02 0.185 0.853361
## M2 3.094e+02 2.679e+02 1.155 0.250999
## M3 3.666e+02 2.646e+02 1.385 0.169291
## M4 5.578e+02 2.769e+02 2.014 0.046889 *
## M5 2.643e+02 2.844e+02 0.930 0.355006
## M6 -8.126e+01 2.855e+02 -0.285 0.776565
## M7 -2.089e+02 2.757e+02 -0.758 0.450647
## M8 -3.780e+02 2.767e+02 -1.366 0.175264
## M9 -1.164e+02 2.619e+02 -0.445 0.657687
## M10 -2.602e+02 2.685e+02 -0.969 0.335117
## M11 -2.857e+02 2.654e+02 -1.077 0.284455
## L12_data 3.074e-01 8.034e-02 3.826 0.000236 ***
## L1_data 5.735e-02 1.095e-01 0.524 0.601554
## I(log(L3_data)) 6.747e+02 2.265e+02 2.978 0.003694 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 545 on 93 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.6038, Adjusted R-squared: 0.5441
## F-statistic: 10.12 on 14 and 93 DF, p-value: 2.295e-13
#FORECASTING
# Split the data into train and test sets
N1726_train <- window(N1726, start(N1726), (1993+8/12))
#we have to sustitude all NA with 0
N1726_train[is.na(N1726_train)] = 0
N1726_test <- window(N1726, (1993+8/12), end(N1726))
plot(N1726[,"data"])

plot(N1726_train[,"data"])

plot(N1726_test[,"data"])

# Regression with no variables
N1726_Model0 <- lm(data ~ 1, data=N1726_train)
# Regression with all variables
N1726_Model1 <- lm(data ~ ., data=N1726_train)
# Summary of Regressions
summary(N1726_Model0)
##
## Call:
## lm(formula = data ~ 1, data = N1726_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1346.3 -776.3 -276.3 403.7 4933.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2546.3 106.5 23.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1107 on 107 degrees of freedom
summary(N1726_Model1)
##
## Call:
## lm(formula = data ~ ., data = N1726_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1557.02 -460.44 -13.62 379.37 2391.28
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1381.06188 325.09783 4.248 5.11e-05 ***
## M1 -132.34165 364.01452 -0.364 0.7170
## M2 75.19565 373.33702 0.201 0.8408
## M3 437.14654 371.65434 1.176 0.2425
## M4 549.69565 384.37345 1.430 0.1560
## M5 396.11947 395.38345 1.002 0.3190
## M6 -691.99477 386.44755 -1.791 0.0766 .
## M7 -419.95478 372.94282 -1.126 0.2630
## M8 -867.40302 373.96340 -2.319 0.0226 *
## M9 -445.50738 360.70928 -1.235 0.2199
## M10 -742.88182 360.60844 -2.060 0.0422 *
## M11 -764.87717 360.36638 -2.122 0.0365 *
## L1_data 0.54874 0.09302 5.899 5.87e-08 ***
## L12_data -0.12673 0.05902 -2.147 0.0344 *
## L3_data 0.11499 0.09061 1.269 0.2076
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 759.7 on 93 degrees of freedom
## Multiple R-squared: 0.5906, Adjusted R-squared: 0.5289
## F-statistic: 9.582 on 14 and 93 DF, p-value: 9.285e-13
# AIC forward selection
N1726_Model2 <- step(N1726_Model0, formula(N1726_Model1), direction="forward")
## Start: AIC=1515
## data ~ 1
##
## Df Sum of Sq RSS AIC
## + L1_data 1 57894715 73193003 1454.1
## + L3_data 1 11621037 119466682 1507.0
## + M5 1 10536308 120551410 1508.0
## + M4 1 9266167 121821552 1509.1
## + M11 1 6995072 124092646 1511.1
## + M10 1 4243266 126844453 1513.4
## + M3 1 3332430 127755289 1514.2
## <none> 131087719 1515.0
## + M8 1 2054175 129033543 1515.3
## + M9 1 1568001 129519717 1515.7
## + L12_data 1 856011 130231708 1516.3
## + M1 1 445286 130642432 1516.6
## + M6 1 238708 130849010 1516.8
## + M2 1 41246 131046473 1517.0
## + M7 1 1294 131086424 1517.0
##
## Step: AIC=1454.06
## data ~ L1_data
##
## Df Sum of Sq RSS AIC
## + M4 1 3414946 69778057 1450.9
## + M6 1 2975832 70217172 1451.6
## + M3 1 2758855 70434148 1451.9
## + M8 1 2218628 70974375 1452.7
## + M11 1 1844762 71348242 1453.3
## + M10 1 1651773 71541230 1453.6
## + M5 1 1636050 71556954 1453.6
## <none> 73193003 1454.1
## + L3_data 1 519713 72673290 1455.3
## + M2 1 366098 72826905 1455.5
## + M9 1 126346 73066657 1455.9
## + M7 1 99175 73093828 1455.9
## + L12_data 1 67418 73125585 1456.0
## + M1 1 42489 73150515 1456.0
##
## Step: AIC=1450.9
## data ~ L1_data + M4
##
## Df Sum of Sq RSS AIC
## + M3 1 3408415 66369643 1447.5
## + M5 1 2412423 67365635 1449.1
## + M6 1 2175138 67602920 1449.5
## + M8 1 1749947 68028110 1450.2
## + M11 1 1538652 68239406 1450.5
## + M10 1 1318848 68459209 1450.8
## <none> 69778057 1450.9
## + M2 1 581132 69196925 1452.0
## + L12_data 1 323619 69454438 1452.4
## + L3_data 1 196594 69581464 1452.6
## + M1 1 118174 69659883 1452.7
## + M9 1 48565 69729493 1452.8
## + M7 1 17253 69760804 1452.9
##
## Step: AIC=1447.49
## data ~ L1_data + M4 + M3
##
## Df Sum of Sq RSS AIC
## + M5 1 3160912 63208731 1444.2
## + M6 1 1608307 64761336 1446.8
## + M8 1 1305019 65064624 1447.3
## <none> 66369643 1447.5
## + M11 1 1143152 65226491 1447.6
## + M10 1 948011 65421632 1447.9
## + M2 1 900970 65468673 1448.0
## + L12_data 1 697667 65671976 1448.3
## + M1 1 275594 66094049 1449.0
## + L3_data 1 27065 66342578 1449.5
## + M7 1 3395 66366248 1449.5
## + M9 1 1765 66367878 1449.5
##
## Step: AIC=1444.22
## data ~ L1_data + M4 + M3 + M5
##
## Df Sum of Sq RSS AIC
## + L12_data 1 1740411 61468319 1443.2
## + M2 1 1306630 61902100 1444.0
## <none> 63208731 1444.2
## + M11 1 898636 62310095 1444.7
## + M8 1 863121 62345610 1444.7
## + M6 1 810144 62398587 1444.8
## + M10 1 666630 62542100 1445.1
## + M1 1 464665 62744065 1445.4
## + M7 1 91994 63116736 1446.1
## + L3_data 1 26187 63182544 1446.2
## + M9 1 12233 63196497 1446.2
##
## Step: AIC=1443.21
## data ~ L1_data + M4 + M3 + M5 + L12_data
##
## Df Sum of Sq RSS AIC
## + M2 1 1550302 59918017 1442.5
## + M11 1 1261631 60206688 1443.0
## <none> 61468319 1443.2
## + M8 1 965789 60502530 1443.5
## + M10 1 830483 60637836 1443.7
## + M6 1 618319 60850000 1444.1
## + M1 1 507679 60960641 1444.3
## + M7 1 141201 61327118 1445.0
## + L3_data 1 34429 61433891 1445.1
## + M9 1 6564 61461756 1445.2
##
## Step: AIC=1442.45
## data ~ L1_data + M4 + M3 + M5 + L12_data + M2
##
## Df Sum of Sq RSS AIC
## <none> 59918017 1442.5
## + M11 1 965747 58952270 1442.7
## + M1 1 772057 59145960 1443.0
## + M8 1 699813 59218204 1443.2
## + M10 1 587563 59330455 1443.4
## + L3_data 1 412964 59505053 1443.7
## + M6 1 381958 59536059 1443.8
## + M7 1 293944 59624073 1443.9
## + M9 1 56462 59861555 1444.3
#backward
N1726_Model3 <- step(N1726_Model1, formula(N1726_Model0), direction="backward")
## Start: AIC=1446.55
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 +
## L1_data + L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M2 1 23412 53693860 1444.6
## - M1 1 76280 53746728 1444.7
## - M5 1 579252 54249701 1445.7
## - M7 1 731767 54402216 1446.0
## - M3 1 798414 54468862 1446.2
## - M9 1 880334 54550782 1446.3
## - L3_data 1 929307 54599756 1446.4
## <none> 53670449 1446.5
## - M4 1 1180294 54850742 1446.9
## - M6 1 1850448 55520896 1448.2
## - M10 1 2449176 56119624 1449.4
## - M11 1 2599843 56270292 1449.7
## - L12_data 1 2660572 56331021 1449.8
## - M8 1 3104816 56775265 1450.6
## - L1_data 1 20082318 73752767 1478.9
##
## Step: AIC=1444.6
## data ~ M1 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M1 1 174330 53868190 1443.0
## - M5 1 639114 54332975 1443.9
## - L3_data 1 913869 54607730 1444.4
## - M3 1 915716 54609577 1444.4
## <none> 53693860 1444.6
## - M7 1 1057848 54751708 1444.7
## - M9 1 1299640 54993501 1445.2
## - M4 1 1405678 55099538 1445.4
## - L12_data 1 2641506 56335367 1447.8
## - M6 1 2763249 56457110 1448.0
## - M10 1 3468115 57161975 1449.4
## - M11 1 3730147 57424008 1449.8
## - M8 1 4068871 57762731 1450.5
## - L1_data 1 21764685 75458546 1479.3
##
## Step: AIC=1442.95
## data ~ M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M7 1 896666 54764856 1442.7
## - L3_data 1 943115 54811305 1442.8
## - M5 1 955511 54823702 1442.8
## <none> 53868190 1443.0
## - M9 1 1128868 54997059 1443.2
## - M3 1 1341875 55210065 1443.6
## - M4 1 1939830 55808020 1444.8
## - M6 1 2600148 56468338 1446.0
## - L12_data 1 2618511 56486702 1446.1
## - M10 1 3330379 57198569 1447.4
## - M11 1 3606714 57474905 1448.0
## - M8 1 3929270 57797461 1448.6
## - L1_data 1 21651929 75520120 1477.4
##
## Step: AIC=1442.73
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data +
## L3_data
##
## Df Sum of Sq RSS AIC
## - L3_data 1 446298 55211154 1441.6
## - M9 1 646328 55411185 1442.0
## <none> 54764856 1442.7
## - M5 1 1612225 56377081 1443.9
## - M6 1 1916952 56681808 1444.5
## - M3 1 2058115 56822971 1444.7
## - M10 1 2605242 57370098 1445.8
## - L12_data 1 2624302 57389158 1445.8
## - M4 1 2834277 57599133 1446.2
## - M11 1 2930582 57695438 1446.4
## - M8 1 3080521 57845377 1446.6
## - L1_data 1 22414552 77179409 1477.8
##
## Step: AIC=1441.61
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## - M9 1 500149 55711303 1440.6
## <none> 55211154 1441.6
## - M5 1 1432643 56643797 1442.4
## - M3 1 1872870 57084023 1443.2
## - M6 1 1895529 57106683 1443.3
## - M10 1 2403610 57614764 1444.2
## - L12_data 1 2530766 57741919 1444.5
## - M4 1 2586171 57797324 1444.5
## - M8 1 2650203 57861357 1444.7
## - M11 1 2878934 58090088 1445.1
## - L1_data 1 38087741 93298895 1496.3
##
## Step: AIC=1440.58
## data ~ M3 + M4 + M5 + M6 + M8 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## <none> 55711303 1440.6
## - M6 1 1620700 57332003 1441.7
## - M5 1 1765598 57476901 1442.0
## - M10 1 2062015 57773317 1442.5
## - M8 1 2302602 58013904 1443.0
## - M3 1 2330008 58041310 1443.0
## - L12_data 1 2410273 58121575 1443.2
## - M11 1 2510199 58221502 1443.3
## - M4 1 3086751 58798054 1444.4
## - L1_data 1 38399687 94110990 1495.2
#both
N1726_Model4 <- step(N1726_Model1, direction="both")
## Start: AIC=1446.55
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 +
## L1_data + L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M2 1 23412 53693860 1444.6
## - M1 1 76280 53746728 1444.7
## - M5 1 579252 54249701 1445.7
## - M7 1 731767 54402216 1446.0
## - M3 1 798414 54468862 1446.2
## - M9 1 880334 54550782 1446.3
## - L3_data 1 929307 54599756 1446.4
## <none> 53670449 1446.5
## - M4 1 1180294 54850742 1446.9
## - M6 1 1850448 55520896 1448.2
## - M10 1 2449176 56119624 1449.4
## - M11 1 2599843 56270292 1449.7
## - L12_data 1 2660572 56331021 1449.8
## - M8 1 3104816 56775265 1450.6
## - L1_data 1 20082318 73752767 1478.9
##
## Step: AIC=1444.6
## data ~ M1 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M1 1 174330 53868190 1443.0
## - M5 1 639114 54332975 1443.9
## - L3_data 1 913869 54607730 1444.4
## - M3 1 915716 54609577 1444.4
## <none> 53693860 1444.6
## - M7 1 1057848 54751708 1444.7
## - M9 1 1299640 54993501 1445.2
## - M4 1 1405678 55099538 1445.4
## + M2 1 23412 53670449 1446.5
## - L12_data 1 2641506 56335367 1447.8
## - M6 1 2763249 56457110 1448.0
## - M10 1 3468115 57161975 1449.4
## - M11 1 3730147 57424008 1449.8
## - M8 1 4068871 57762731 1450.5
## - L1_data 1 21764685 75458546 1479.3
##
## Step: AIC=1442.95
## data ~ M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data +
## L12_data + L3_data
##
## Df Sum of Sq RSS AIC
## - M7 1 896666 54764856 1442.7
## - L3_data 1 943115 54811305 1442.8
## - M5 1 955511 54823702 1442.8
## <none> 53868190 1443.0
## - M9 1 1128868 54997059 1443.2
## - M3 1 1341875 55210065 1443.6
## + M1 1 174330 53693860 1444.6
## + M2 1 121462 53746728 1444.7
## - M4 1 1939830 55808020 1444.8
## - M6 1 2600148 56468338 1446.0
## - L12_data 1 2618511 56486702 1446.1
## - M10 1 3330379 57198569 1447.4
## - M11 1 3606714 57474905 1448.0
## - M8 1 3929270 57797461 1448.6
## - L1_data 1 21651929 75520120 1477.4
##
## Step: AIC=1442.73
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data +
## L3_data
##
## Df Sum of Sq RSS AIC
## - L3_data 1 446298 55211154 1441.6
## - M9 1 646328 55411185 1442.0
## <none> 54764856 1442.7
## + M7 1 896666 53868190 1443.0
## - M5 1 1612225 56377081 1443.9
## + M2 1 345266 54419591 1444.0
## - M6 1 1916952 56681808 1444.5
## + M1 1 13148 54751708 1444.7
## - M3 1 2058115 56822971 1444.7
## - M10 1 2605242 57370098 1445.8
## - L12_data 1 2624302 57389158 1445.8
## - M4 1 2834277 57599133 1446.2
## - M11 1 2930582 57695438 1446.4
## - M8 1 3080521 57845377 1446.6
## - L1_data 1 22414552 77179409 1477.8
##
## Step: AIC=1441.61
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## - M9 1 500149 55711303 1440.6
## <none> 55211154 1441.6
## - M5 1 1432643 56643797 1442.4
## + L3_data 1 446298 54764856 1442.7
## + M7 1 399848 54811305 1442.8
## - M3 1 1872870 57084023 1443.2
## - M6 1 1895529 57106683 1443.3
## + M2 1 129887 55081267 1443.4
## + M1 1 46576 55164577 1443.5
## - M10 1 2403610 57614764 1444.2
## - L12_data 1 2530766 57741919 1444.5
## - M4 1 2586171 57797324 1444.5
## - M8 1 2650203 57861357 1444.7
## - M11 1 2878934 58090088 1445.1
## - L1_data 1 38087741 93298895 1496.3
##
## Step: AIC=1440.58
## data ~ M3 + M4 + M5 + M6 + M8 + M10 + M11 + L1_data + L12_data
##
## Df Sum of Sq RSS AIC
## <none> 55711303 1440.6
## + M9 1 500149 55211154 1441.6
## - M6 1 1620700 57332003 1441.7
## - M5 1 1765598 57476901 1442.0
## + L3_data 1 300118 55411185 1442.0
## + M2 1 274314 55436989 1442.0
## + M7 1 191909 55519394 1442.2
## - M10 1 2062015 57773317 1442.5
## + M1 1 937 55710366 1442.6
## - M8 1 2302602 58013904 1443.0
## - M3 1 2330008 58041310 1443.0
## - L12_data 1 2410273 58121575 1443.2
## - M11 1 2510199 58221502 1443.3
## - M4 1 3086751 58798054 1444.4
## - L1_data 1 38399687 94110990 1495.2
#fit 8
fit8_n1726 <- lm(data ~ L12_data + L1_data + I(log(L3_data)), data=N1726)
summary(fit8_n1726)
##
## Call:
## lm(formula = data ~ L12_data + L1_data + I(log(L3_data)), data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -991.91 -390.34 -55.01 358.80 1505.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.161e+03 1.399e+03 -0.830 0.408
## L12_data 4.605e-01 7.185e-02 6.409 4.36e-09 ***
## L1_data 5.850e-02 1.006e-01 0.582 0.562
## I(log(L3_data)) 2.806e+02 1.903e+02 1.475 0.143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 564.5 on 104 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.5247, Adjusted R-squared: 0.511
## F-statistic: 38.27 on 3 and 104 DF, p-value: < 2.2e-16
#fit 9
fit9_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + I(log(L3_data)), data=N1726)
summary(fit9_n1726)
##
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 +
## M10 + M11 + L1_data + I(log(L3_data)), data = N1726)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1507.65 -454.53 -0.42 341.16 2233.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.947e+03 1.800e+03 -2.748 0.007076 **
## M1 -3.413e+02 3.096e+02 -1.103 0.272751
## M2 8.275e+01 3.114e+02 0.266 0.790984
## M3 1.374e+02 3.054e+02 0.450 0.653616
## M4 4.411e+02 3.125e+02 1.412 0.161082
## M5 1.204e+02 3.202e+02 0.376 0.707714
## M6 -7.761e+02 3.143e+02 -2.470 0.015171 *
## M7 -6.251e+02 3.064e+02 -2.040 0.043908 *
## M8 -9.698e+02 3.026e+02 -3.205 0.001798 **
## M9 -5.212e+02 2.951e+02 -1.766 0.080312 .
## M10 -7.375e+02 2.947e+02 -2.503 0.013901 *
## M11 -6.737e+02 2.938e+02 -2.293 0.023863 *
## L1_data 4.684e-01 8.456e-02 5.540 2.34e-07 ***
## I(log(L3_data)) 8.465e+02 2.454e+02 3.449 0.000816 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 654.7 on 103 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.6403, Adjusted R-squared: 0.5949
## F-statistic: 14.1 on 13 and 103 DF, p-value: < 2.2e-16
#Forecast from Model 1
N1726_Model1Forecast <- predict(N1726_Model1, N1726_test)
plot(N1726_Model1Forecast)

plot.ts(N1726_Model1Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])
model1_errors_n1726 <- N1726_test_data - N1726_Model1Forecast
#eliminate NAs
model1_errors_n1726 <- ts(model1_errors_n1726[1:13], frequency=frequency(model1_errors_n1726),
start=start(model1_errors_n1726))
View(model1_errors_n1726)
#Error Metrics
model1_MAE_n1726 <- mean(abs(model1_errors_n1726))
model1_MAPE_n1726 <- 100 * mean(abs(model1_errors_n1726)/N1726_test_data)
#Forecast from fit 8 and 9
N1726_fit8Forecast <- predict(fit8_n1726, N1726_test)
plot(N1726_fit8Forecast)

plot.ts(N1726_fit8Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])
fit8_errors_n1726 <- N1726_test_data - N1726_fit8Forecast
#eliminate NAs
fit8_errors_n1726 <- ts(fit8_errors_n1726[1:13], frequency=frequency(fit8_errors_n1726),
start=start(fit8_errors_n1726))
#Error Metrics
fit8_MAE_n1726 <- mean(abs(fit8_errors_n1726))
fit8_MAPE_n1726 <- 100 * mean(abs(fit8_errors_n1726)/N1726_test_data)
#fit 9
N1726_fit9Forecast <- predict(fit9_n1726, N1726_test)
plot(N1726_fit9Forecast)

plot.ts(N1726_fit9Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])
fit9_errors_n1726 <- N1726_test_data - N1726_fit9Forecast
#eliminate NAs
fit9_errors_n1726 <- ts(fit9_errors_n1726[1:13], frequency=frequency(fit9_errors_n1726),
start=start(fit9_errors_n1726))
#Error Metrics
fit9_MAE_n1726 <- mean(abs(fit9_errors_n1726))
fit9_MAPE_n1726 <- 100 * mean(abs(fit9_errors_n1726)/N1726_test_data)
#FORECAST FOR REGRESSION
regression_forecast_n1726 <- predict(N1726_Model1, N1726_test)
plot(regression_forecast_n1726)

plot.ts(regression_forecast_n1726)
