Libraries used in the project

#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
#install.packages("DataExplorer")
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.4.3
#install.packages("tsutils")
library(tsutils)
## Warning: package 'tsutils' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#install.packages("forecast")
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
#install.packages("smooth")
library(smooth)
## Warning: package 'smooth' was built under R version 4.4.3
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 4.4.2
## Package "greybox", v2.0.3 loaded.
## This is package "smooth", v4.1.1
#install.packages("tseries")
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
#install.packages("car")
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3

Uploading the time series

data1 <- read_excel("C:\\Users\\patri\\Downloads\\coursework data.xlsx")
data2 <- read_excel("C:\\Users\\patri\\Downloads\\coursework data 1.xlsx")



n1725 <- ts(data1,frequency = 12, start = c(1984,10))
n1726 <- ts(data2,frequency = 12, start = c(1984,10))

Data exploration N1725

  #line plot of the time series  
plot(n1725)

  #basic statistic summary
summary(n1725)
##      N1725     
##  Min.   :1920  
##  1st Qu.:2545  
##  Median :2890  
##  Mean   :2989  
##  3rd Qu.:3350  
##  Max.   :4800
str(n1725)
##  Time-Series [1:126, 1] from 1985 to 1995: 3740 2880 2620 2860 2640 2740 2940 4040 3800 3640 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "N1725"
plot_histogram(n1725)

  #Centred moving average to estimate the yearly trend
n1725_cma <- cmav(n1725, ma = 12, fill = FALSE)

plot(n1725)
lines(n1725_cma, col = 'blue')

  #seasonality plot
seasplot(n1725)
## Results of statistical testing
## Evidence of trend: TRUE  (pval: 0)
## Evidence of seasonality: TRUE  (pval: 0)
data_seas_index_n1725 <- seasplot(n1725)$season

  #decomposition of the time series
decomposition_n1725 <- decomp(n1725,decomposition = "additive",outplot = TRUE)

decomposition_n1725 <- decomp(n1725,decomposition = "multiplicative",outplot = TRUE)

##structure of the decomposition
str(decomposition_n1725)
## List of 5
##  $ trend    : Time-Series [1:126, 1] from 1985 to 1995: 3402 3402 3402 3401 3401 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "N1725"
##  $ season   : Time-Series [1:126, 1] from 1985 to 1995: 1.014 0.859 0.856 0.826 0.811 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "N1725"
##  $ irregular: Time-Series [1:126, 1] from 1985 to 1995: 289.9 -44 -291.6 48.7 -119.7 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "y"
##  $ f.season : NULL
##  $ g        : NULL
##Extract the Noise
decomposition_n1725$irregular
##              Jan         Feb         Mar         Apr         May         Jun
## 1984                                                                        
## 1985   48.692078 -119.675387 -451.220482 -363.343296  181.398395 -300.118022
## 1986 -127.773743 -241.746283 -278.128484 -295.883668  117.611329  440.199623
## 1987   19.432723  -13.020140 -446.433474 -352.026482 -598.595212  632.997867
## 1988  -87.162110  164.211051 -506.136199 -521.364658 -108.587222 -461.079478
## 1989 -283.454299  134.776677    8.290540 1185.526401  811.274900  181.232873
## 1990  382.269043 -160.448466  314.079660  299.444985   94.924089 -575.563305
## 1991 -365.811030 -366.143064  373.846124  159.513218 -157.097733  183.642583
## 1992   -3.267322  -57.849527  -57.102214  151.175714  -96.828370 -237.248888
## 1993  224.414503   47.274653  183.347182 -427.446074 -126.288046  269.785523
## 1994   19.672518  217.343559  365.027999   60.495702  -80.615437  -62.809480
## 1995  101.322249  362.651705  300.950861                                    
##              Jul         Aug         Sep         Oct         Nov         Dec
## 1984                                      289.851409  -44.027715 -291.570218
## 1985 -475.531604  688.007086  660.236205  125.102182  -65.342752  173.716195
## 1986  507.805280 -736.666908  478.562840 -120.813797  584.241958 -173.708592
## 1987  150.293428  -59.854527  124.193227  -52.366403  433.548172  393.043740
## 1988  -24.117950  674.414263   69.421676 -121.381665 -308.600548  158.211853
## 1989 -486.509196 -549.969947 -494.005442 -387.298182 -368.605453 -188.049586
## 1990  361.040457  324.508196 -177.593336  145.660346   23.242317  154.324037
## 1991  335.291025 -782.194282 -121.809559   67.630362  202.909104   82.678274
## 1992  230.675191  234.269371 -458.784377  245.659268 -201.497772  170.640880
## 1993 -589.561219  528.870919  223.049763 -437.720983   68.365433 -321.454989
## 1994   25.656291 -350.528027  -95.163675  304.417793 -246.266531 -161.451513
## 1995
  #pure or mixed multiplicative decomp?

##Extract the Trend
decomposition_n1725$trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1984                                                                        
## 1985 3401.489 3401.093 3400.599 3400.000 3399.167 3425.833 3450.000 3451.667
## 1986 3615.000 3600.833 3535.833 3515.833 3525.833 3525.833 3497.500 3489.167
## 1987 3267.500 3245.000 3225.000 3182.500 3152.500 3147.500 3143.333 3122.500
## 1988 2912.500 2903.333 2905.000 2883.333 2844.167 2808.333 2803.333 2808.333
## 1989 3150.000 3087.500 3017.500 2979.167 2950.000 2906.667 2889.167 2879.167
## 1990 2707.500 2785.833 2840.833 2882.500 2929.167 2970.833 2966.667 2934.167
## 1991 2886.667 2817.500 2755.833 2738.333 2728.333 2720.833 2720.000 2738.333
## 1992 2690.000 2733.333 2767.500 2767.500 2763.333 2755.000 2774.167 2791.667
## 1993 2777.500 2751.667 2788.333 2786.667 2771.667 2765.833 2740.833 2745.833
## 1994 2855.833 2862.500 2829.167 2860.833 2890.000 2893.333 2912.500 2930.000
## 1995 2974.830 2979.201 2982.697                                             
##           Sep      Oct      Nov      Dec
## 1984          3402.260 3402.058 3401.805
## 1985 3465.833 3485.833 3496.667 3535.833
## 1986 3467.500 3432.500 3369.167 3310.833
## 1987 3103.333 3069.167 3055.833 2999.167
## 1988 2839.167 2940.000 3058.333 3133.333
## 1989 2862.500 2827.500 2755.833 2696.667
## 1990 2925.833 2913.333 2881.667 2880.833
## 1991 2730.833 2714.167 2719.167 2707.500
## 1992 2807.500 2795.000 2770.833 2791.667
## 1993 2765.833 2798.333 2829.167 2829.167
## 1994 2943.333 2954.003 2962.539 2969.368
## 1995
##Extract the Seasonality
decomposition_n1725$season
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1984                                                                      
## 1985 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1986 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1987 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1988 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1989 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1990 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1991 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1992 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1993 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1994 0.8264934 0.8114084 0.9384290 0.9715716 1.1351611 1.1968236 1.1929077
## 1995 0.8264934 0.8114084 0.9384290                                        
##            Aug       Sep       Oct       Nov       Dec
## 1984                     1.0140754 0.8594879 0.8558898
## 1985 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1986 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1987 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1988 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1989 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1990 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1991 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1992 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1993 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1994 1.1913065 1.0040194 1.0140754 0.8594879 0.8558898
## 1995
##Multiply the Trend and Seasonality
regular_components_n1725 <- decomposition_n1725$trend * decomposition_n1725$season

###mixed multiplicative model errors
mmm_errors_n1725 <- n1725 - regular_components_n1725
###compare errors
decomposition_n1725$irregular - mmm_errors_n1725
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1984                                       0   0   0
## 1985   0   0   0   0   0   0   0   0   0   0   0   0
## 1986   0   0   0   0   0   0   0   0   0   0   0   0
## 1987   0   0   0   0   0   0   0   0   0   0   0   0
## 1988   0   0   0   0   0   0   0   0   0   0   0   0
## 1989   0   0   0   0   0   0   0   0   0   0   0   0
## 1990   0   0   0   0   0   0   0   0   0   0   0   0
## 1991   0   0   0   0   0   0   0   0   0   0   0   0
## 1992   0   0   0   0   0   0   0   0   0   0   0   0
## 1993   0   0   0   0   0   0   0   0   0   0   0   0
## 1994   0   0   0   0   0   0   0   0   0   0   0   0
## 1995   0   0   0
### all values are 0, which indicates this is a Mixed Multiplicate Model

Data exploration N1726

#line plot of the time series  
plot(n1726)

#basic statistic summary
summary(n1726)
##      N1726     
##  Min.   :1200  
##  1st Qu.:1780  
##  Median :2270  
##  Mean   :2550  
##  3rd Qu.:2980  
##  Max.   :7480
str(n1726)
##  Time-Series [1:126, 1] from 1985 to 1995: 4580 3280 3740 3340 1820 4700 3640 4220 5480 6420 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "N1726"
plot_histogram(n1726)

#Centred moving average to estimate the yearly trend
n1726_cma <- cmav(n1726, ma = 12, fill = FALSE)

plot(n1726)
lines(n1726_cma, col = 'red')

#seasonality plot
seasplot(n1726)
## Results of statistical testing
## Evidence of trend: TRUE  (pval: 0)
## Evidence of seasonality: TRUE  (pval: 0)
data_seas_index_n1726 <- seasplot(n1726)$season

#decomposition of the time series
decomposition_n1726 <- decomp(n1726,decomposition = "additive",outplot = TRUE)

decomposition_n1726 <- decomp(n1726,decomposition = "multiplicative",outplot = TRUE)

##structure of the decomposition
str(decomposition_n1726)
## List of 5
##  $ trend    : Time-Series [1:126, 1] from 1985 to 1995: 4686 4639 4592 4545 4498 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "N1726"
##  $ season   : Time-Series [1:126, 1] from 1985 to 1995: 1.013 0.821 0.885 0.781 0.688 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "N1726"
##  $ irregular: Time-Series [1:126, 1] from 1985 to 1995: -168 -530 -326 -209 -1276 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr "y"
##  $ f.season : NULL
##  $ g        : NULL
##Extract the Noise
decomposition_n1726$irregular
##              Jan         Feb         Mar         Apr         May         Jun
## 1984                                                                        
## 1985  -209.46319 -1276.15378   310.70769  -355.61260  -200.67380   289.96498
## 1986  -690.50687  -216.92696   324.08136  -443.91571   497.81492  1151.27641
## 1987   619.51680  -115.08396  -278.57606  -370.84142   173.71970   -71.51597
## 1988   223.46571  -105.99650  -525.77043   450.05807   437.41491  -247.70224
## 1989  -161.60963    81.79275   301.00482  -373.23536  -765.82784   185.93592
## 1990   200.42983   -47.73166  -439.02081  -311.44518   531.05853  -777.15339
## 1991  -213.53974   -96.87848   725.59302   472.47402  -501.75798   161.78630
## 1992   256.87243   618.65331  -452.97762   526.49092  -529.25157  -380.09003
## 1993  -251.32745  -204.51669    56.61792  -109.04244   115.69670  -231.05110
## 1994  -369.67876   346.60800  -285.59475    65.11792   446.17481   479.81836
## 1995   343.46168   172.31782   322.61195                                    
##              Jul         Aug         Sep         Oct         Nov         Dec
## 1984                                      -168.09544  -529.79001  -325.96643
## 1985   658.17522  2015.28641  -100.08849    54.23659  -236.99742 -1198.12133
## 1986  -112.30142   449.01333   195.02069  -656.33508   220.48595 -1024.46131
## 1987   473.59017    32.17335   336.71371  -229.43967  -465.20948   -98.97445
## 1988  -343.63347   861.48462  -514.10143   -79.99709  -155.96005   710.98958
## 1989  -587.11119   291.78293   860.15441  -363.80717   243.39974   -70.61512
## 1990   688.41979   597.12106  -600.83002  -156.41769   354.50350  -128.40165
## 1991  -798.73316 -1096.08630   -64.23936   898.72576   430.77281   -47.08362
## 1992  -491.67263  -168.21707   263.73989   122.73800  -265.34198  1245.93219
## 1993   568.74600  -669.29365   -27.53128   285.27093    34.65802    -4.47612
## 1994   273.69525  -936.08630  -324.18065  -109.25889   -97.74245  -109.51213
## 1995
#pure or mixed multiplicative decomp?

##Extract the Trend
decomposition_n1726$trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1984                                                                        
## 1985 4545.273 4498.239 4451.205 4404.167 4386.667 4328.333 4239.167 4230.000
## 1986 4060.000 3889.167 3768.333 3685.833 3614.167 3576.667 3570.000 3569.167
## 1987 3048.333 2956.667 2858.333 2811.667 2745.000 2711.667 2697.500 2653.333
## 1988 2556.667 2536.667 2520.833 2480.000 2483.333 2508.333 2504.167 2475.833
## 1989 2255.833 2176.667 2169.167 2175.000 2149.167 2113.333 2080.000 2080.833
## 1990 2048.333 2103.333 2047.500 1996.667 2013.333 2015.833 1995.000 1968.333
## 1991 2015.000 1884.167 1840.000 1904.167 1946.667 1950.000 1970.833 2025.000
## 1992 2001.667 2065.000 2122.500 2109.167 2053.333 2085.000 2127.500 2080.833
## 1993 2165.833 2185.833 2153.333 2148.333 2167.500 2127.500 2068.333 2081.667
## 1994 2112.500 2082.500 2054.167 2022.500 1998.333 1985.000 2005.833 2025.000
## 1995 2044.451 2045.148 2045.844                                             
##           Sep      Oct      Nov      Dec
## 1984          4686.376 4639.342 4592.307
## 1985 4229.167 4170.833 4136.667 4131.667
## 1986 3484.167 3391.667 3311.667 3167.500
## 1987 2617.500 2615.000 2636.667 2619.167
## 1988 2493.333 2467.500 2357.500 2291.667
## 1989 2037.500 1997.500 2041.667 2045.000
## 1990 2000.000 2069.167 2052.500 2042.500
## 1991 2022.500 1995.000 2008.333 1995.833
## 1992 2072.500 2070.000 2076.667 2116.667
## 1993 2083.333 2067.500 2076.667 2105.833
## 1994 2041.667 2042.361 2043.058 2043.754
## 1995
##Extract the Seasonality
decomposition_n1726$season
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1984                                                                      
## 1985 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1986 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1987 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1988 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1989 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1990 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1991 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1992 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1993 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1994 0.7809130 0.6883035 0.9860907 0.9072346 1.0077524 1.1990839 1.3591881
## 1995 0.7809130 0.6883035 0.9860907                                        
##            Aug       Sep       Oct       Nov       Dec
## 1984                     1.0131700 0.8211920 0.8853864
## 1985 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1986 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1987 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1988 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1989 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1990 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1991 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1992 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1993 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1994 1.2918945 1.0404150 1.0131700 0.8211920 0.8853864
## 1995
##Multiply the Trend and Seasonality
regular_components_n1726 <- decomposition_n1726$trend * decomposition_n1726$season

###mixed multiplicative model errors
mmm_errors_n1726 <- n1726 - regular_components_n1726
###compare errors
decomposition_n1726$irregular - mmm_errors_n1726
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1984                                       0   0   0
## 1985   0   0   0   0   0   0   0   0   0   0   0   0
## 1986   0   0   0   0   0   0   0   0   0   0   0   0
## 1987   0   0   0   0   0   0   0   0   0   0   0   0
## 1988   0   0   0   0   0   0   0   0   0   0   0   0
## 1989   0   0   0   0   0   0   0   0   0   0   0   0
## 1990   0   0   0   0   0   0   0   0   0   0   0   0
## 1991   0   0   0   0   0   0   0   0   0   0   0   0
## 1992   0   0   0   0   0   0   0   0   0   0   0   0
## 1993   0   0   0   0   0   0   0   0   0   0   0   0
## 1994   0   0   0   0   0   0   0   0   0   0   0   0
## 1995   0   0   0
### all values are 0, which indicates this is a Mixed Multiplicate Model

Exponential Smoothing for N1725

#SPLITTING THE SERIES
#total number of observations
n1725_length <- length(n1725)
#size of train set and forecasting horizon
train_length_n1725 <- 48
h <- 14
#training set
train_n1725 <- ts(n1725[1:train_length_n1725], frequency=12)
#test set
test_n1725 <- ts(n1725[(train_length_n1725+1):n1725_length],frequency=12)

#SMA
#Fitting a SMA 3
SMA_n1725 <- ma(train_n1725, order=3, centre=FALSE)
#get rid of NA values:
SMA_no_NAs_n1725 <- SMA_n1725[!is.na(SMA_n1725)]
# Then form a forecast:
SMA3_forecast_n1725 <- ts(rep(SMA_no_NAs_n1725[length(SMA_no_NAs_n1725)],14), frequency=12)
# Errors
SMA3_errors_n1725 <- test_n1725 - SMA3_forecast_n1725
#Error Metrics
SMA3_MAE_n1725 <- mean(abs(SMA3_errors_n1725))
SMA3_MAPE_n1725 <- 100 * mean(abs(SMA3_errors_n1725)/test_n1725)

#Fitting a SMA 5
SMA5_n1725 <- ma(train_n1725, order=5, centre=FALSE)
#get rid of NA values:
SMA5_no_NAs_n1725 <- SMA5_n1725[!is.na(SMA5_n1725)]
# Then form a forecast:
SMA5_forecast_n1725 <- ts(rep(SMA5_no_NAs_n1725[length(SMA5_no_NAs_n1725)],14), frequency=12)
# Errors
SMA5_errors_n1725 <- test_n1725 - SMA5_forecast_n1725
#Error Metrics
SMA5_MAE_n1725 <- mean(abs(SMA5_errors_n1725))
SMA5_MAPE_n1725 <- 100 * mean(abs(SMA5_errors_n1725)/test_n1725)

#Naive Forecast
naive_method_n1725 <- naive(train_n1725, h=h)
naive_forecast_n1725 <- naive_method_n1725$mean
plot(naive_method_n1725)

# Errors
naive_errors_n1725 <- test_n1725 - naive_forecast_n1725
#Error Metrics
naive_MAE_n1725 <- mean(abs(naive_errors_n1725))
naive_MAPE_n1725 <- 100 * mean(abs(naive_errors_n1725)/test_n1725)

#Simple exponential smoothing (ets) and (es)
#In the model, A stands for additive, M stands for Multiplicative, Z for automatically selected

#ETS
#alpha 0.15
ETS_MMM_0.15_n1725 <- ets(train_n1725, model="MMM", alpha=0.15)
ETS_MMM_0.15_n1725
## ETS(M,M,M) 
## 
## Call:
## ets(y = train_n1725, model = "MMM", alpha = 0.15)
## 
##   Smoothing parameters:
##     alpha = 0.15 
##     beta  = 0.0237 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 3390.6613 
##     b = 1.0042 
##     s = 1.1162 1.2761 1.2154 1.2271 1.0942 0.8441
##            0.7997 0.7992 0.7964 0.8649 0.9345 1.0321
## 
##   sigma:  0.1134
## 
##      AIC     AICc      BIC 
## 765.7637 783.3121 795.7029
#alpha optimized
ETS_MMM_opt_n1725 <- ets(train_n1725, "MMM")
opt_alpha_ETS_N1725 <- coef(ETS_MMM_opt_n1725)

#forecast ETS
#alpha 0.15
ETS_MMM_0.15_n1725_forecast <- forecast.ets(ETS_MMM_0.15_n1725, h=h)$mean
ETS_0.15_n1725 <- plot(forecast(ETS_MMM_0.15_n1725, h=h))

#optimized alpha
ETS_MMM_opt_n1725_forecast <- forecast.ets(ETS_MMM_opt_n1725, h=h)$mean
ETS_opt_n1725 <- plot(forecast(ETS_MMM_opt_n1725, h=h))

#ES
#alpha 0.15
ES_MMM_0.15_n1725 <- es(train_n1725, "MMM", persistence=0.15, h=h)
## Warning: Length of persistence vector is wrong! Changing to estimation of
## persistence vector values.
ES_MMM_0.15_n1725
## Time elapsed: 0.12 seconds
## Model estimated using es() function: ETS(MMM)
## With optimal initialisation
## Distribution assumed in the model: Normal
## Loss function type: likelihood; Loss function value: 342.9278
## Persistence vector g:
##  alpha   beta  gamma 
## 0.0838 0.0545 0.0027 
## 
## Sample size: 48
## Number of estimated parameters: 17
## Number of degrees of freedom: 31
## Information criteria:
##      AIC     AICc      BIC     BICc 
## 719.8555 740.2555 751.6660 791.1522
#optimized alpha
ES_MMM_opt_n1725 <- es(train_n1725, "MMM", h=h)
opt_alpha_ES_N1725 <- coef(ES_MMM_opt_n1725)

#Forecast ES
#alpha 0.15
ES_MMM_0.15_forecast_n1725 <- forecast(ES_MMM_0.15_n1725, h=h)$mean
ES_0.15_n1725 <- plot(forecast(ES_MMM_0.15_n1725, h=h))

#optimized alpha
ES_MMM_opt_forecast_n1725 <- forecast(ES_MMM_opt_n1725, h=h)$mean
ES_opt_n1725 <- plot(forecast(ES_MMM_opt_n1725, h=h))

#Errors MAPE
ETS_SES_errors_n1725 <- test_n1725 - ETS_MMM_opt_n1725_forecast
ES_SES_errors_n1725 <- test_n1725 - ES_MMM_opt_forecast_n1725

ETS_SES_MAPE_n1725 <- 100 * mean(abs(ETS_SES_errors_n1725)/test_n1725)
ES_SES_MAPE_n1725 <- 100 * mean(abs(ES_SES_errors_n1725)/test_n1725)
#Initial seed
es_MZZ_initial_1_n1725 <- es(n1725, model="MZZ", initial=n1725[1],
h=h, holdout=TRUE)
## Warning: Predefined initials vector can only be used with preselected ETS model.
## Changing to estimation of initials.
es_MZZ_initial_1_n1725$accuracy
##            ME           MAE           MSE           MPE          MAPE 
##  2.493236e+02  2.980950e+02  1.312463e+05  8.424985e-02  1.015395e-01 
##           sCE          sMAE          sMSE          MASE         RMSSE 
##  1.164619e+00  9.945972e-02  1.461074e-02  6.825195e-01  6.510780e-01 
##          rMAE         rRMSE          rAME     asymmetry          sPIS 
##  5.378003e-01  5.540561e-01  4.592803e-01  8.373972e-01 -7.943126e+00
ES_MZZ_optinitial_n1725 <- es(n1725, model="MZZ", h=h, holdout=TRUE)
ES_MZZ_optinitial_n1725$accuracy
##            ME           MAE           MSE           MPE          MAPE 
##  2.493236e+02  2.980950e+02  1.312463e+05  8.424985e-02  1.015395e-01 
##           sCE          sMAE          sMSE          MASE         RMSSE 
##  1.164619e+00  9.945972e-02  1.461074e-02  6.825195e-01  6.510780e-01 
##          rMAE         rRMSE          rAME     asymmetry          sPIS 
##  5.378003e-01  5.540561e-01  4.592803e-01  8.373972e-01 -7.943126e+00
#Holt/Winters method
# ETS
ets_AAA_n1725 <- ets(train_n1725, model="AAA", damped=FALSE)
# ES
es_AAA_n1725 <- es(train_n1725, model="AAA", h=h)
#forecast
ES_AAA_forecast_n1725 <- forecast(es_AAA_n1725, h=h)$mean
ES_AAA_n1725 <- plot(forecast(es_AAA_n1725, h=h))

es_AAA_errors_n1725 <- test_n1725 - ES_AAA_forecast_n1725
#Error Metrics
AAA_MAE_n1725 <- mean(abs(es_AAA_errors_n1725))
AAA_MAPE_n1725 <- 100 * mean(abs(es_AAA_errors_n1725)/test_n1725)

#optimized model
# ETS
ets_ZZZ_n1725 <- ets(train_n1725, model="ZZZ")
# ES
es_ZZZ_n1725 <- es(train_n1725, model="ZZZ")

es_ZZZ_forecast_n1725 <- forecast(es_ZZZ_n1725, h=h)$mean
ES_ZZZ_n1725 <- plot(forecast(es_ZZZ_n1725, h=h))

ES_ZZZ_errors_n1725 <- test_n1725 - es_ZZZ_forecast_n1725

ES_ZZZ_MAPE_n1725 <- 100 * mean(abs(ES_ZZZ_errors_n1725)/test_n1725)

#FORECAST
ExSm_n1725 <- es(n1725, model = 'ZZZ')
forecast_es_n1725 <- forecast(ExSm_n1725, h=h)$mean
es_n1725 <- plot(forecast(ExSm_n1725, h=h))

table(forecast_es_n1725)
## forecast_es_n1725
## 2482.41404757242 2507.99897343387 2598.51130853837 2605.68889895339 
##                1                1                1                1 
## 2870.40811269844 2980.67785095458 2984.02521002955 3071.78194922568 
##                1                1                1                1 
## 3089.47572521573 3441.64209446861 3443.34339572928 3621.06040262753 
##                1                1                1                1 
## 3622.83927842651 3713.98146052169 
##                1                1

Exponential Smoothing for N1726

#SPLITTING THE SERIES
#total number of observations
n1726_length <- length(n1726)
#size of train set and forecasting horizon
train_length_n1726 <- 48
h <- 14
#training set
train_n1726 <- ts(n1726[1:train_length_n1726], frequency=12)
#test set
test_n1726 <- ts(n1726[(train_length_n1726+1):n1726_length],frequency=12)

#SMA
#Fitting a SMA 3
SMA_n1726 <- ma(train_n1726, order=3, centre=FALSE)
#get rid of NA values:
SMA_no_NAs_n1726 <- SMA_n1726[!is.na(SMA_n1726)]
# Then form a forecast:
SMA3_forecast_n1726 <- ts(rep(SMA_no_NAs_n1726[length(SMA_no_NAs_n1726)],14), frequency=12)
# Errors
SMA3_errors_n1726 <- test_n1726 - SMA3_forecast_n1726
#Error Metrics
SMA3_MAE_n1726 <- mean(abs(SMA3_errors_n1726))
SMA3_MAPE_n1726 <- 100 * mean(abs(SMA3_errors_n1726)/test_n1726)

#Fitting a SMA 5
SMA5_n1726 <- ma(train_n1726, order=5, centre=FALSE)
#get rid of NA values:
SMA5_no_NAs_n1726 <- SMA5_n1726[!is.na(SMA5_n1726)]
# Then form a forecast:
SMA5_forecast_n1726 <- ts(rep(SMA5_no_NAs_n1726[length(SMA5_no_NAs_n1726)],14), frequency=12)
# Errors
SMA5_errors_n1726 <- test_n1726 - SMA5_forecast_n1726
#Error Metrics
SMA5_MAE_n1726 <- mean(abs(SMA5_errors_n1726))
SMA5_MAPE_n1726 <- 100 * mean(abs(SMA5_errors_n1726)/test_n1726)

#Naive Forecast
naive_method_n1726 <- naive(train_n1726, h=h)
naive_forecast_n1726 <- naive_method_n1726$mean
plot(naive_method_n1726)

# Errors
naive_errors_n1726 <- test_n1726 - naive_forecast_n1726
#Error Metrics
naive_MAE_n1726 <- mean(abs(naive_errors_n1726))
naive_MAPE_n1726 <- 100 * mean(abs(naive_errors_n1726)/test_n1726)

#Simple exponential smoothing (ets) and (es)
#In the model, M stands for Multiplicative, Z for automatically selected

#ETS
#alpha 0.15
ETS_MZZ_0.15_n1726 <- ets(train_n1726, model="MMM", alpha=0.15)
ETS_MZZ_0.15_n1726
## ETS(M,M,M) 
## 
## Call:
## ets(y = train_n1726, model = "MMM", alpha = 0.15)
## 
##   Smoothing parameters:
##     alpha = 0.15 
##     beta  = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 4917.6864 
##     b = 0.9841 
##     s = 1.0559 1.5599 1.4087 1.2698 1.0683 0.8856
##            0.9522 0.5962 0.8067 0.7139 0.7461 0.9367
## 
##   sigma:  0.1736
## 
##      AIC     AICc      BIC 
## 804.7903 822.3387 834.7295
#alpha optimized
ETS_MZZ_opt_n1726 <- ets(train_n1726, "MMM")
opt_alpha_ETS_N1726 <- coef(ETS_MZZ_opt_n1726)

#forecast ETS
#alpha 0.15
ETS_MZZ_0.15_n1726_forecast <- forecast.ets(ETS_MZZ_0.15_n1726, h=h)$mean
ETS_0.15_n1726 <- plot(forecast(ETS_MZZ_0.15_n1726, h=h))

#optimized alpha
ETS_MZZ_opt_n1726_forecast <- forecast.ets(ETS_MZZ_opt_n1726, h=h)$mean
ETS_opt_n1726 <- plot(forecast(ETS_MZZ_opt_n1726, h=h))

#ES
#alpha 0.15
ES_MZZ_0.15_n1726 <- es(train_n1726, "MMM", persistence=0.15, h=h)
## Warning: Length of persistence vector is wrong! Changing to estimation of
## persistence vector values.
ES_MZZ_0.15_n1726
## Time elapsed: 0.16 seconds
## Model estimated using es() function: ETS(MMM)
## With optimal initialisation
## Distribution assumed in the model: Normal
## Loss function type: likelihood; Loss function value: 363.9173
## Persistence vector g:
##  alpha   beta  gamma 
## 0.0522 0.0519 0.0018 
## 
## Sample size: 48
## Number of estimated parameters: 17
## Number of degrees of freedom: 31
## Information criteria:
##      AIC     AICc      BIC     BICc 
## 761.8347 782.2347 793.6451 833.1313
#optimized alpha
ES_MZZ_opt_n1726 <- es(train_n1726, "MMM", h=h)
opt_alpha_ES_N1726 <- coef(ES_MZZ_opt_n1726)

#Forecast ES
#alpha 0.15
ES_MZZ_0.15_forecast_n1726 <- forecast(ES_MZZ_0.15_n1726, h=h)$mean
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
ES_0.15_n1726 <- plot(forecast(ES_MZZ_0.15_n1726, h=h))
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.

#optimized alpha
ES_MZZ_opt_forecast_n1726 <- forecast(ES_MZZ_opt_n1726, h=h)$mean
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
ES_opt_n1726 <- plot(forecast(ES_MZZ_opt_n1726, h=h))
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.
## Warning: Your model has a potentially explosive multiplicative trend. I cannot
## do anything about it, so please just be careful.

#Errors MAPE
ETS_SES_errors_n1726 <- test_n1726 - ETS_MZZ_opt_n1726_forecast
ES_SES_errors_n1726 <- test_n1726 - ES_MZZ_opt_forecast_n1726

ETS_SES_MAPE_n1726 <- 100 * mean(abs(ETS_SES_errors_n1726)/test_n1726)
ES_SES_MAPE_n1726 <- 100 * mean(abs(ES_SES_errors_n1726)/test_n1726)
#Initial seed
es_MZZ_initial_1_n1726 <- es(n1726, model="MZZ", initial=n1726[1],
h=h, holdout=TRUE)
## Warning: Predefined initials vector can only be used with preselected ETS model.
## Changing to estimation of initials.
es_MZZ_initial_1_n1726$accuracy
##            ME           MAE           MSE           MPE          MAPE 
## -1.932278e+02  3.426107e+02  2.397486e+05 -1.196374e-01  1.896696e-01 
##           sCE          sMAE          sMSE          MASE         RMSSE 
## -1.034277e+00  1.309906e-01  3.504570e-02  4.884381e-01  5.430249e-01 
##          rMAE         rRMSE          rAME     asymmetry          sPIS 
##  4.612067e-01  5.653460e-01  2.601143e-01 -4.191700e-01  7.790401e+00
ES_MZZ_optinitial_n1726 <- es(n1726, model="MZZ", h=h, holdout=TRUE)
ES_MZZ_optinitial_n1726$accuracy
##            ME           MAE           MSE           MPE          MAPE 
## -1.932278e+02  3.426107e+02  2.397486e+05 -1.196374e-01  1.896696e-01 
##           sCE          sMAE          sMSE          MASE         RMSSE 
## -1.034277e+00  1.309906e-01  3.504570e-02  4.884381e-01  5.430249e-01 
##          rMAE         rRMSE          rAME     asymmetry          sPIS 
##  4.612067e-01  5.653460e-01  2.601143e-01 -4.191700e-01  7.790401e+00
#Holt/Winters method
# ETS
ets_AAA_n1726 <- ets(train_n1726, model="AAA", damped=FALSE)
# ES
es_AAA_n1726 <- es(train_n1726, model="AAA", h=h)

#forecast
ES_AAA_forecast_n1726 <- forecast(es_AAA_n1726, h=h)$mean
ES_AAA_n1726 <- plot(forecast(es_AAA_n1726, h=h))

es_AAA_errors_n1726 <- test_n1726 - ES_AAA_forecast_n1726
#Error Metrics
AAA_MAE_n1726 <- mean(abs(es_AAA_errors_n1726))
AAA_MAPE_n1726 <- 100 * mean(abs(es_AAA_errors_n1726)/test_n1726)

#optimized model
# ETS
ets_ZZZ_n1726 <- ets(train_n1726, model="ZZZ")
# ES
es_ZZZ_n1726 <- es(train_n1726, model="ZZZ")

ets_ZZZ_forecast_n1726 <- forecast(ets_ZZZ_n1726, h=h)$mean
ETS_ZZZ_n1726 <- plot(forecast(ets_ZZZ_n1726, h=h))

es_ZZZ_forecast_n1726 <- forecast(es_ZZZ_n1726, h=h)$mean
ES_ZZZ_n1726 <- plot(forecast(es_ZZZ_n1726, h=h))

ETS_ZZZ_errors_n1726 <- test_n1726 - ets_ZZZ_forecast_n1726
ES_ZZZ_errors_n1726 <- test_n1726 - es_ZZZ_forecast_n1726

ETS_ZZZ_MAPE_n1726 <- 100 * mean(abs(ETS_ZZZ_errors_n1726)/test_n1726)
ES_ZZZ_MAPE_n1726 <- 100 * mean(abs(ES_ZZZ_errors_n1726)/test_n1726)

#FORECAST
ExSm_n1726 <- ets(n1726, model = 'MMM')
forecast_es_n1726 <- forecast(ExSm_n1726, h=h)$mean
es_n1726 <- plot(forecast(ExSm_n1726, h=h))

table(forecast_es_n1726)
## forecast_es_n1726
##  1384.0627474813 1555.85452387733 1677.31861932617  1815.2571048759 
##                1                1                1                1 
## 1862.58474339395 1868.00359814858 1906.70210892913 1925.41266365109 
##                1                1                1                1 
## 1981.35998143742 2007.16838156838 2087.19731974442  2375.4931320524 
##                1                1                1                1 
## 2609.02010311348 2650.92251888498 
##                1                1

ARIMA for N1725

#KPSS test, the Null and Alternative Hypotheses are:
#H0: The data is stationary
#H1: The data is not stationary
kpss.test(n1725)
## 
##  KPSS Test for Level Stationarity
## 
## data:  n1725
## KPSS Level = 0.62627, Truncation lag parameter = 4, p-value = 0.02025
#p value 0.02 (data is not stationary)

#ADF test, The null and alternative hypotheses are:
#H0: The data is not stationary.
#H1: The data is stationary.
adf.test(n1725)
## Warning in adf.test(n1725): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  n1725
## Dickey-Fuller = -7.2602, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)

#DIFFERENCING 
diff_n1725 <- diff(n1725, lag=12)
plot(diff_n1725)

kpss.test(diff_n1725)
## Warning in kpss.test(diff_n1725): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_n1725
## KPSS Level = 0.18729, Truncation lag parameter = 4, p-value = 0.1
#p value 0.1 (data is stationary)
adf.test(diff_n1725)
## Warning in adf.test(diff_n1725): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_n1725
## Dickey-Fuller = -4.7105, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)

#train and test
hh <- 12
n1725_train <- ts(n1725[1:(n1725_length - hh)], frequency=frequency(n1725),
start=start(n1725))
n1725_test <- n1725[(n1725_length - hh + 1):n1725_length]

#AR process
ar1.n1725 <- ts(diff(n1725_train), frequency=12)
tsdisplay(ar1.n1725)

#AR3 data, 3 spikes on the PACF

#MA process
ma1.n1725 <- ts(diff(n1725_train), frequency = 12)
tsdisplay(ma1.n1725)

#MA4 data, 4 in the ACF graph

#calculate arima
arima_n1725_1 <- arima(n1725_train, order=c(3,1,4))
tsdisplay(residuals(arima_n1725_1))

#extract coefficient
coef(arima_n1725_1)
##        ar1        ar2        ar3        ma1        ma2        ma3        ma4 
##  0.7237415  0.7249364 -0.9947287 -1.5049498 -0.1700583  1.5379328 -0.7964953
#forecast
arima_forecast_n1725 <- forecast(arima_n1725_1, h=12)
plot(forecast(arima_n1725_1, h=12))

arima_forecast_n1725 <- arima_forecast_n1725$mean
# Errors
arima_errors_n1725 <- n1725_test - arima_forecast_n1725
#Error Metrics
arima_MAE_n1725 <- mean(abs(arima_errors_n1725))
arima_MAPE_n1725 <- 100 * mean(abs(arima_errors_n1725)/n1725_test)

#Automated arima
auto_arima_n1725 <- auto.arima(n1725)
#best method with AIC
auto.arima(n1725, ic="aic")
## Series: n1725 
## ARIMA(1,0,0)(2,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1     sar2    drift
##       0.2507  -0.6274  -0.3144  -5.4514
## s.e.  0.0926   0.0956   0.0960   2.6266
## 
## sigma^2 = 215559:  log likelihood = -862.57
## AIC=1735.14   AICc=1735.7   BIC=1748.82
#calculate arima 2
arima_n1725_2 <- arima(n1725_train, order=c(1,0,0), seasonal =c(2,1,0))
tsdisplay(residuals(arima_n1725_2))

#extract coefficient
coef(arima_n1725_2)
##        ar1       sar1       sar2 
##  0.2768041 -0.5516272 -0.2635639
#forecast
arima2_forecast_n1725 <- forecast(arima_n1725_2, h=12)
plot(forecast(arima_n1725_2, h=12))

arima2_forecast_n1725 <- arima2_forecast_n1725$mean
# Errors
arima2_errors_n1725 <- n1725_test - arima2_forecast_n1725
#Error Metrics
arima2_MAE_n1725 <- mean(abs(arima2_errors_n1725))
arima2_MAPE_n1725 <- 100 * mean(abs(arima2_errors_n1725)/n1725_test)


#Seasonal Arima

# Plot ACF and PACF of first differences
tsdisplay(diff(n1725_train))

# Plot ACF and PACF of first and seasonal differences
tsdisplay(diff(diff(n1725_train),lag=12))

# SARIMA(3,0,4)(2,1,3)[12]
fit1_n1725 <- arima(n1725_train, order=c(3,0,4), seasonal=c(2,1,3))
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
tsdisplay(residuals(fit1_n1725))

fit1_forecast_n1725 <- forecast(fit1_n1725, h=12)
plot(forecast(fit1_n1725, h=12))

fit1_forecast_n1725 <- fit1_forecast_n1725$mean

# Errors
sarima1_errors_n1725 <- n1725_test - fit1_forecast_n1725
#Error Metrics
sarima1_MAE_n1725 <- mean(abs(sarima1_errors_n1725))
sarima1_MAPE_n1725 <- 100 * mean(abs(sarima1_errors_n1725)/n1725_test)

#Automatic order selection
# Auto.arima
fit2_n1725 <- auto.arima(n1725_train)
fit2_forecast_n1725 <- forecast(fit2_n1725, h=12)
plot(forecast(fit2_n1725, h=12))

fit2_forecast_n1725 <- fit2_forecast_n1725$mean

# Errors
sarima_errors_n1725 <- n1725_test - fit2_forecast_n1725
#Error Metrics
sarima_MAE_n1725 <- mean(abs(sarima_errors_n1725))
sarima_MAPE_n1725 <- 100 * mean(abs(sarima_errors_n1725)/n1725_test)

# Fitted values of auto.arima
fitted(fit2_n1725)
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1984                                                                        
## 1985 2857.111 2637.324 2737.217 2937.009 4035.902 3796.135 3636.288 4795.120
## 1986 2907.030 2588.392 2735.748 2964.212 4027.091 3889.399 3898.104 4717.237
## 1987 2702.455 2537.081 2784.630 2863.180 3872.594 3958.524 4124.865 3870.749
## 1988 2707.241 2409.198 2602.874 2663.958 3369.797 4043.081 3610.966 3668.262
## 1989 2448.429 2407.502 2458.168 2599.321 3565.859 3909.760 3735.170 3395.294
## 1990 2166.622 2506.420 2346.104 3105.010 3353.639 3533.026 3077.656 3431.922
## 1991 2317.249 2163.777 2470.930 3041.607 3339.360 2888.527 3384.795 3515.301
## 1992 2120.949 2053.598 2789.025 3082.796 3231.773 3159.651 3275.320 2866.440
## 1993 2199.154 1992.070 2710.408 2786.406 2865.124 2987.086 3627.859 2997.190
## 1994 2054.478 2023.829 2733.750                                             
##           Sep      Oct      Nov      Dec
## 1984          3736.253 2877.106 2617.358
## 1985 4135.773 3654.304 2817.022 2662.577
## 1986 3805.921 3543.357 2779.320 2943.518
## 1987 3835.186 3265.928 2900.397 2651.749
## 1988 3610.299 3030.697 2908.377 2645.416
## 1989 3025.973 2738.608 2636.539 2495.228
## 1990 2752.332 2629.984 2364.344 2473.022
## 1991 2329.618 2701.379 2165.537 2468.873
## 1992 2576.273 2613.019 2289.364 2204.752
## 1993 2527.338 2976.697 2120.526 2428.445
## 1994
#calculate forecast for arima
arima_n1725_3 <- arima(n1725, order=c(1,0,0), seasonal = c(2,1,0))
tsdisplay(residuals(arima_n1725_3))

#extract coefficient
coef(arima_n1725_3)
##        ar1       sar1       sar2 
##  0.2885125 -0.6020891 -0.2879073
#forecast
ARIMA_forecast_n1725 <- forecast(arima_n1725_3, h=14)
plot(forecast(arima_n1725_3, h=14))

ARIMA for N1726

#KPSS test, the Null and Alternative Hypotheses are:
#H0: The data is stationary
#H1: The data is not stationary
kpss.test(n1726)
## Warning in kpss.test(n1726): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  n1726
## KPSS Level = 1.4102, Truncation lag parameter = 4, p-value = 0.01
#p value 0.01 (data is not stationary)

#ADF test, The null and alternative hypotheses are:
#H0: The data is not stationary.
#H1: The data is stationary.
adf.test(n1726)
## Warning in adf.test(n1726): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  n1726
## Dickey-Fuller = -5.1018, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)

#DIFFERENCING 
diff_n1726 <- diff(n1726, lag=12)
plot(diff_n1726)

kpss.test(diff_n1726)
## Warning in kpss.test(diff_n1726): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_n1726
## KPSS Level = 1.1438, Truncation lag parameter = 4, p-value = 0.01
#p value 0.01 (data is not stationary)
adf.test(diff_n1726)
## Warning in adf.test(diff_n1726): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_n1726
## Dickey-Fuller = -5.1869, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)

diff_n1726 <- diff(diff(n1726, lag=12))
plot(diff_n1726)

kpss.test(diff_n1726)
## Warning in kpss.test(diff_n1726): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_n1726
## KPSS Level = 0.02404, Truncation lag parameter = 4, p-value = 0.1
#p value 0.01 (data is not stationary)
adf.test(diff_n1726)
## Warning in adf.test(diff_n1726): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_n1726
## Dickey-Fuller = -6.4816, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#p value less than 0.01 (data is stationary)

#train and test
hh <- 12
n1726_train <- ts(n1726[1:(n1726_length - hh)], frequency=frequency(n1726),
start=start(n1726))
n1726_test <- n1726[(n1726_length - hh + 1):n1726_length]

#AR process
ar1.n1726 <- ts(diff(diff(n1726_train)), frequency=12)
tsdisplay(ar1.n1726)

#AR2 data, 2 spikes on the PACF

#MA process
ma1.n1726 <- ts(diff(diff(n1726_train)), frequency = 12)
tsdisplay(ma1.n1726)

#MA1 data, 1 spike in the ACF graph

#calculate arima
arima_n1726_1 <- arima((n1726_train), order=c(2,2,1))
tsdisplay(residuals(arima_n1726_1))

#extract coefficient
coef(arima_n1726_1)
##         ar1         ar2         ma1 
## -0.39683705 -0.08890487 -0.99999932
#forecast
arima_forecast_n1726 <- forecast(arima_n1726_1, h=12)
plot(forecast(arima_n1726_1, h=12))

arima_forecast_n1726 <- arima_forecast_n1726$mean
# Errors
arima_errors_n1726 <- n1726_test - arima_forecast_n1726
#Error Metrics
arima_MAE_n1726 <- mean(abs(arima_errors_n1726))
arima_MAPE_n1726 <- 100 * mean(abs(arima_errors_n1726)/n1726_test)

# done by box jenkings method
arimaBJ_n1726_1 <- arima((n1726_train), order=c(2,2,0))
tsdisplay(residuals(arimaBJ_n1726_1))

#extract coefficient
coef(arimaBJ_n1726_1)
##        ar1        ar2 
## -0.9766507 -0.4986086
#Spikes in the residuals, so we try again until no significant spikes appear
arimaBJ_n1726_1 <- arima((n1726_train), order=c(3,2,5))
tsdisplay(residuals(arimaBJ_n1726_1))

#extract coefficient
coef(arimaBJ_n1726_1)
##        ar1        ar2        ar3        ma1        ma2        ma3        ma4 
##  0.7384934  0.7364873 -0.9843399 -2.6038678  1.4022215  1.8407955 -2.3984817 
##        ma5 
##  0.7636000
#forecast
arimaBJ_forecast_n1726 <- forecast(arimaBJ_n1726_1, h=12)
plot(forecast(arimaBJ_n1726_1, h=12))

arimaBJ_forecast_n1726 <- arimaBJ_forecast_n1726$mean
# Errors
arima_errors_n1726 <- n1726_test - arimaBJ_forecast_n1726
#Error Metrics
arima_MAE_n1726 <- mean(abs(arima_errors_n1726))
arima_MAPE_n1726 <- 100 * mean(abs(arima_errors_n1726)/n1726_test)

#Automated arima
auto_arima_n1726 <- auto.arima(n1726)
#best method with AIC
auto.arima(n1726, ic="aic")
## Series: n1726 
## ARIMA(2,1,1)(1,0,2)[12] with drift 
## 
## Coefficients:
##          ar1     ar2      ma1    sar1     sma1    sma2     drift
##       0.1576  0.1199  -0.9104  0.8874  -0.6294  0.1645  -21.9005
## s.e.  0.1105  0.0991   0.0377  0.0602   0.1394  0.1427   22.5452
## 
## sigma^2 = 433355:  log likelihood = -990.45
## AIC=1996.89   AICc=1998.13   BIC=2019.52
#calculate arima 2
arima_n1726_2 <- arima(n1726_train, order=c(2,1,1), seasonal = c(1,0,2))
tsdisplay(residuals(arima_n1726_2))

#extract coefficient
coef(arima_n1726_2)
##        ar1        ar2        ma1       sar1       sma1       sma2 
##  0.1513244  0.1280471 -0.8976244  0.8897934 -0.6729108  0.2208005
#forecast
arima2_forecast_n1726 <- forecast(arima_n1726_2, h=12)
plot(forecast(arima_n1726_2, h=12))

arima2_forecast_n1726 <- arima2_forecast_n1726$mean
# Errors
arima2_errors_n1726 <- n1726_test - arima2_forecast_n1726
#Error Metrics
arima2_MAE_n1726 <- mean(abs(arima2_errors_n1726))
arima2_MAPE_n1726 <- 100 * mean(abs(arima2_errors_n1726)/n1726_test)



#Seasonal Arima

# Plot ACF and PACF of first differences
tsdisplay(diff(n1726_train))

# Plot ACF and PACF of first and seasonal differences
tsdisplay(diff(diff(n1726_train),lag=12))

# SARIMA(2,1,3)(1,1,4)[12]
fit1_n1726 <- Arima(n1726_train, order=c(2,1,3), seasonal=c(1,1,4))
tsdisplay(residuals(fit1_n1726))

fit1_forecast_n1726 <- forecast(fit1_n1726, h=12)
plot(forecast(fit1_n1726, h=12))

fit1_forecast_n1726 <- fit1_forecast_n1726$mean

# Errors
sarima1_errors_n1726 <- n1726_test - fit1_forecast_n1726
#Error Metrics
sarima1_MAE_n1726 <- mean(abs(sarima1_errors_n1726))
sarima1_MAPE_n1726 <- 100 * mean(abs(sarima1_errors_n1726)/n1726_test)

#Automatic order selection
# Auto.arima
fit2_n1726 <- auto.arima(n1726_train)
fit2_forecast_n1726 <- forecast(fit2_n1726, h=12)
plot(forecast(fit2_n1726, h=12))

fit2_forecast_n1726 <- fit2_forecast_n1726$mean

# Errors
sarima_errors_n1726 <- n1726_test - fit2_forecast_n1726
#Error Metrics
sarima_MAE_n1726 <- mean(abs(sarima_errors_n1726))
sarima_MAPE_n1726 <- 100 * mean(abs(sarima_errors_n1726)/n1726_test)

# Fitted values of auto.arima
fitted(fit2_n1726)
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1984                                                                        
## 1985 3659.363 3046.769 3554.200 3679.113 3733.285 4215.661 4768.083 5367.341
## 1986 3230.556 2367.471 4157.134 3437.085 3751.565 4701.185 5254.458 5630.973
## 1987 2282.994 1877.169 3570.505 2458.902 3150.023 4097.539 4076.360 4724.471
## 1988 1840.830 1295.797 2579.227 1679.439 2744.282 3460.053 3401.876 3508.929
## 1989 2026.409 1185.177 1939.128 1861.023 2298.558 2383.360 2883.963 2784.947
## 1990 1544.456 1208.532 1650.551 1592.713 1763.482 2284.260 2161.552 2936.883
## 1991 1436.422 1205.851 1571.471 1633.793 1903.915 1904.999 2434.148 2577.618
## 1992 1468.838 1367.970 2010.841 1635.070 1994.092 1923.067 2410.144 2366.583
## 1993 1914.214 1605.340 1984.793 2099.025 1671.239 2247.453 2326.148 2434.278
## 1994 1659.822 1613.570 1995.280                                             
##           Sep      Oct      Nov      Dec
## 1984          4575.396 4006.962 3779.971
## 1985 4811.274 4564.364 3684.213 3717.764
## 1986 3723.429 3750.421 2565.619 2683.762
## 1987 2649.605 2537.116 1873.299 1356.776
## 1988 2559.599 1835.356 1672.308 1242.913
## 1989 1957.842 1862.389 1300.624 1656.134
## 1990 2097.379 1548.658 1445.846 1920.430
## 1991 1650.411 1537.436 1851.791 1820.152
## 1992 1894.249 2198.315 2007.081 1648.329
## 1993 2135.077 2481.636 1992.865 2147.528
## 1994
#calculate forecast for arima
arima_n1726_3 <- auto.arima(n1726)
tsdisplay(residuals(arima_n1726_3))

#extract coefficient
coef(arima_n1726_3)
##         ar1         ar2         ma1        sar1        sma1        sma2 
##   0.1575712   0.1198517  -0.9103883   0.8874044  -0.6293673   0.1644808 
##       drift 
## -21.9004665
#forecast
ARIMA_forecast_n1726 <- forecast(arima_n1726_3, h=14)
plot(forecast(arima_n1726_3, h=14))

REGRESSION for N1725

#Convert to Quarterly Time Series starting from Q4 in 1984
N1725 <- ts(n1725[7:(n1725_length)], frequency=frequency(n1725),
start=start(n1725))

#Create Seasonal Dummies
M1_n1725 <- rep(c(1,0,0,0,0,0,0,0,0,0,0,0),10)
M2_n1725 <- rep(c(0,1,0,0,0,0,0,0,0,0,0,0),10)
M3_n1725 <- rep(c(0,0,1,0,0,0,0,0,0,0,0,0),10)
M4_n1725 <- rep(c(0,0,0,1,0,0,0,0,0,0,0,0),10)
M5_n1725 <- rep(c(0,0,0,0,1,0,0,0,0,0,0,0),10)
M6_n1725 <- rep(c(0,0,0,0,0,1,0,0,0,0,0,0),10)
M7_n1725 <- rep(c(0,0,0,0,0,0,1,0,0,0,0,0),10)
M8_n1725 <- rep(c(0,0,0,0,0,0,0,1,0,0,0,0),10)
M9_n1725 <- rep(c(0,0,0,0,0,0,0,0,1,0,0,0),10)
M10_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,1,0,0),10)
M11_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,0,1,0),10)
#M12_n1725 <- rep(c(0,0,0,0,0,0,0,0,0,0,0,1),10)


N1725 <- cbind(N1725,M1_n1725,M2_n1725,M3_n1725,M4_n1725, M5_n1725, M6_n1725,M7_n1725, M8_n1725, M9_n1725, M10_n1725, M11_n1725) #M12_n1725

colnames(N1725) <- c("data","M1","M2","M3","M4",'M5','M6',"M7","M8","M9","M10",'M11' )#M12

#Use the Seasonal Dummies
fit3_n1725 <- lm(data ~ ., data=N1725)
summary(fit3_n1725)
## 
## Call:
## lm(formula = data ~ ., data = N1725)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1078   -265    -24    220   1242 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2808.0      147.9  18.984  < 2e-16 ***
## M1              96.0      209.2   0.459 0.647199    
## M2             596.0      209.2   2.849 0.005249 ** 
## M3             780.0      209.2   3.729 0.000308 ***
## M4             762.0      209.2   3.643 0.000416 ***
## M5             750.0      209.2   3.585 0.000507 ***
## M6             216.0      209.2   1.033 0.304091    
## M7             204.0      209.2   0.975 0.331617    
## M8            -226.0      209.2  -1.080 0.282361    
## M9            -224.0      209.2  -1.071 0.286619    
## M10           -354.0      209.2  -1.692 0.093464 .  
## M11           -384.0      209.2  -1.836 0.069144 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 467.7 on 108 degrees of freedom
## Multiple R-squared:  0.4786, Adjusted R-squared:  0.4255 
## F-statistic: 9.013 on 11 and 108 DF,  p-value: 3.156e-11
plot(fit3_n1725)

## hat values (leverages) are all = 0.1
##  and there are no factor predictors; no plot no. 5

#Lag 1 of Sales
L1_data <- lag(N1725[,"data"],-1)

N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L1_data)
colnames(N1725) <- c(N1725_colnames,"L1_data")

#Use seasonal dummies and lag
fit4_n1725 <- lm(data ~ ., data=N1725)
summary(fit4_n1725)
## 
## Call:
## lm(formula = data ~ ., data = N1725)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1082.47  -283.94    -5.14   167.01  1210.72 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1724.73329  249.87276   6.902 3.89e-10 ***
## M1           -65.10744  196.55048  -0.331   0.7411    
## M2           381.49174  193.54216   1.971   0.0513 .  
## M3           342.04564  207.35786   1.650   0.1020    
## M4           241.81747  214.45380   1.128   0.2620    
## M5           237.86153  213.71700   1.113   0.2682    
## M6          -290.77576  213.23078  -1.364   0.1756    
## M7           -64.13532  196.06600  -0.327   0.7442    
## M8          -488.77262  195.78997  -2.496   0.0141 *  
## M9          -294.60897  189.47260  -1.555   0.1230    
## M10         -425.50275  189.48533  -2.246   0.0268 *  
## M11         -397.40677  188.99017  -2.103   0.0379 *  
## L1_data        0.44689    0.08711   5.130 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 422.6 on 106 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5823, Adjusted R-squared:  0.5351 
## F-statistic: 12.32 on 12 and 106 DF,  p-value: 2.63e-15
#Multicollinearity

# Correlation matrix for N1725 data
cor(N1725,use="c")
##                 data          M1          M2          M3          M4
## data     1.000000000 -0.04315862  0.20168007  0.29199393  0.28315888
## M1      -0.043158622  1.00000000 -0.08663865 -0.08663865 -0.08663865
## M2       0.201680067 -0.08663865  1.00000000 -0.09174312 -0.09174312
## M3       0.291993932 -0.08663865 -0.09174312  1.00000000 -0.09174312
## M4       0.283158880 -0.08663865 -0.09174312 -0.09174312  1.00000000
## M5       0.277268846 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M6       0.015162302 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M7       0.009272267 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M8      -0.201787309 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M9      -0.200805636 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M10     -0.264614345 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## M11     -0.279339432 -0.08663865 -0.09174312 -0.09174312 -0.09174312
## L1_data  0.608672558 -0.10022847 -0.04308229  0.20235971  0.29268237
##                  M5          M6           M7           M8          M9
## data     0.27726885  0.01516230  0.009272267 -0.201787309 -0.20080564
## M1      -0.08663865 -0.08663865 -0.086638647 -0.086638647 -0.08663865
## M2      -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M3      -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M4      -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M5       1.00000000 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M6      -0.09174312  1.00000000 -0.091743119 -0.091743119 -0.09174312
## M7      -0.09174312 -0.09174312  1.000000000 -0.091743119 -0.09174312
## M8      -0.09174312 -0.09174312 -0.091743119  1.000000000 -0.09174312
## M9      -0.09174312 -0.09174312 -0.091743119 -0.091743119  1.00000000
## M10     -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## M11     -0.09174312 -0.09174312 -0.091743119 -0.091743119 -0.09174312
## L1_data  0.28384645  0.27795585  0.015823790  0.009933182 -0.20114694
##                 M10         M11      L1_data
## data    -0.26461435 -0.27933943  0.608672558
## M1      -0.08663865 -0.08663865 -0.100228471
## M2      -0.09174312 -0.09174312 -0.043082290
## M3      -0.09174312 -0.09174312  0.202359709
## M4      -0.09174312 -0.09174312  0.292682365
## M5      -0.09174312 -0.09174312  0.283846453
## M6      -0.09174312 -0.09174312  0.277955845
## M7      -0.09174312 -0.09174312  0.015823790
## M8      -0.09174312 -0.09174312  0.009933182
## M9      -0.09174312 -0.09174312 -0.201146937
## M10      1.00000000 -0.09174312 -0.200165169
## M11     -0.09174312  1.00000000 -0.263980089
## L1_data -0.20016517 -0.26398009  1.000000000
#VIF for fit4
vif(fit4_n1725)
##       M1       M2       M3       M4       M5       M6       M7       M8 
## 1.799994 1.921610 2.205744 2.359292 2.343108 2.332459 1.972054 1.966505 
##       M9      M10      M11  L1_data 
## 1.841649 1.841897 1.832283 1.925234
#Analyse residuals of multiple components
tsdisplay(residuals(fit4_n1725))

#Lag 12 of Sales
L12_data <- lag(N1725[,"data"],-12)

N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L12_data)
colnames(N1725) <- c(N1725_colnames,"L12_data")

#Use seasonal dummies and lag
fit5_n1725 <- lm(data ~ ., data=N1725)
summary(fit5_n1725)
## 
## Call:
## lm(formula = data ~ ., data = N1725)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -965.36 -243.63   -8.17  186.20 1213.60 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1699.53170  328.61405   5.172  1.3e-06 ***
## M1           -26.88211  197.48089  -0.136 0.892013    
## M2           317.19963  205.27656   1.545 0.125651    
## M3           380.89091  220.14659   1.730 0.086882 .  
## M4           300.78126  228.29686   1.318 0.190873    
## M5           156.96437  228.68033   0.686 0.494156    
## M6          -263.43219  217.29564  -1.212 0.228428    
## M7           -36.43339  200.08539  -0.182 0.855905    
## M8          -415.58046  202.65599  -2.051 0.043083 *  
## M9          -302.83900  195.44882  -1.549 0.124633    
## M10         -385.58356  197.07519  -1.957 0.053370 .  
## M11         -356.55724  197.21290  -1.808 0.073807 .  
## L1_data        0.34898    0.09792   3.564 0.000577 ***
## L12_data       0.08888    0.08848   1.005 0.317712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 411.7 on 94 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.5766, Adjusted R-squared:  0.5181 
## F-statistic: 9.849 on 13 and 94 DF,  p-value: 1.184e-12
#Fit Regression with Seasonally Dummies, and lag 12
fit6_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data, data=N1725)
summary(fit6_n1725)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L12_data, data = N1725)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -974.61 -256.40  -23.02  210.99 1281.33 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2336.57547  292.22855   7.996 3.07e-12 ***
## M1            96.01280  206.07319   0.466  0.64234    
## M2           446.56788  214.12193   2.086  0.03970 *  
## M3           648.42116  219.32598   2.956  0.00393 ** 
## M4           651.19417  218.35651   2.982  0.00364 ** 
## M5           504.69032  219.18549   2.303  0.02349 *  
## M6            74.96144  207.13744   0.362  0.71824    
## M7           124.95192  206.54654   0.605  0.54665    
## M8          -213.95339  206.23379  -1.037  0.30217    
## M9          -239.54965  206.27841  -1.161  0.24843    
## M10         -319.81291  207.94105  -1.538  0.12737    
## M11         -323.86937  208.77864  -1.551  0.12417    
## L12_data       0.16056    0.09132   1.758  0.08192 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 436.4 on 95 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.5194, Adjusted R-squared:  0.4587 
## F-statistic: 8.558 on 12 and 95 DF,  p-value: 8.695e-11
#Lag 3 of Sales
L3_data <- lag(N1725[,"data"],-3)

N1725_colnames <- colnames(N1725)
N1725 <- cbind(N1725,L3_data)
colnames(N1725) <- c(N1725_colnames,"L3_data")

#Fit Regression with Seasonally Dummies, log (lag 3), lag 1, and lag 12
fit7_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data + L1_data + I(log(L3_data)), data=N1725)
summary(fit7_n1725)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L12_data + L1_data + I(log(L3_data)), data = N1725)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -907.96 -261.99  -19.84  193.67 1226.40 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -1.708e+03  2.335e+03  -0.732  0.46626   
## M1               3.073e+00  1.973e+02   0.016  0.98761   
## M2               3.720e+02  2.074e+02   1.794  0.07607 . 
## M3               3.860e+02  2.188e+02   1.764  0.08097 . 
## M4               2.987e+02  2.269e+02   1.317  0.19122   
## M5               8.960e+01  2.318e+02   0.387  0.70001   
## M6              -3.744e+02  2.287e+02  -1.637  0.10498   
## M7              -1.687e+02  2.181e+02  -0.773  0.44135   
## M8              -5.346e+02  2.170e+02  -2.464  0.01559 * 
## M9              -3.620e+02  1.983e+02  -1.825  0.07120 . 
## M10             -4.573e+02  2.018e+02  -2.266  0.02576 * 
## M11             -3.642e+02  1.961e+02  -1.857  0.06641 . 
## L12_data         7.202e-02  8.868e-02   0.812  0.41874   
## L1_data          3.102e-01  1.008e-01   3.077  0.00275 **
## I(log(L3_data))  4.534e+02  3.076e+02   1.474  0.14384   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 409.2 on 93 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:  0.5863, Adjusted R-squared:  0.524 
## F-statistic: 9.415 on 14 and 93 DF,  p-value: 1.441e-12
#FORECASTING
# Split the data into train and test sets
N1725_train <- window(N1725, start(N1725), (1993+8/12))
#we have to sustitude all NA with 0
N1725_train[is.na(N1725_train)] = 0

N1725_test <- window(N1725, (1993+8/12), end(N1725))

plot(N1725[,"data"])

plot(N1725_train[,"data"])

plot(N1725_test[,"data"])

# Regression with no variables
N1725_Model0 <- lm(data ~ 1, data=N1725_train)
# Regression with all variables
N1725_Model1 <- lm(data ~ ., data=N1725_train)

# Summary of Regressions
summary(N1725_Model0)
## 
## Call:
## lm(formula = data ~ 1, data = N1725_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1077.78  -462.78   -67.78   422.22  1802.22 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2997.78      61.42   48.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 638.3 on 107 degrees of freedom
summary(N1725_Model1)
## 
## Call:
## lm(formula = data ~ ., data = N1725_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1004.75  -234.18   -65.78   213.71  1079.03 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2133.53923  280.84131   7.597 2.33e-11 ***
## M1           184.76961  209.10399   0.884  0.37918    
## M2           622.13924  221.22987   2.812  0.00600 ** 
## M3           638.12662  232.03975   2.750  0.00716 ** 
## M4           518.97581  233.11257   2.226  0.02841 *  
## M5           495.56310  230.17843   2.153  0.03391 *  
## M6          -147.19417  230.67342  -0.638  0.52497    
## M7           -43.20402  218.33680  -0.198  0.84357    
## M8          -439.01776  218.66804  -2.008  0.04758 *  
## M9          -283.20869  208.02217  -1.361  0.17667    
## M10         -463.13098  207.86946  -2.228  0.02829 *  
## M11         -448.43284  206.00738  -2.177  0.03203 *  
## L1_data        0.28960    0.08759   3.306  0.00134 ** 
## L12_data      -0.11835    0.04122  -2.871  0.00507 ** 
## L3_data        0.09217    0.07690   1.199  0.23375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 436 on 93 degrees of freedom
## Multiple R-squared:  0.5945, Adjusted R-squared:  0.5334 
## F-statistic: 9.738 on 14 and 93 DF,  p-value: 6.178e-13
# AIC forward selection
N1725_Model2 <- step(N1725_Model0, formula(N1725_Model1), direction="forward")
## Start:  AIC=1396.09
## data ~ 1
## 
##            Df Sum of Sq      RSS    AIC
## + L1_data   1  13819646 29773421 1356.9
## + M11       1   3693382 39899685 1388.5
## + M3        1   3666667 39926400 1388.6
## + M5        1   3613527 39979539 1388.8
## + M4        1   3302836 40290230 1389.6
## + M10       1   3030303 40562764 1390.3
## + M2        1   1806012 41787055 1393.5
## + M9        1   1501867 42091200 1394.3
## + M8        1   1451103 42141964 1394.4
## <none>                  43593067 1396.1
## + L12_data  1    158039 43435027 1397.7
## + L3_data   1     89757 43503310 1397.9
## + M1        1     73745 43519321 1397.9
## + M6        1     19394 43573673 1398.0
## + M7        1      3103 43589964 1398.1
## 
## Step:  AIC=1356.92
## data ~ L1_data
## 
##            Df Sum of Sq      RSS    AIC
## + M2        1   2069919 27703501 1351.1
## + L3_data   1   1699908 28073513 1352.6
## + M8        1   1490851 28282570 1353.4
## + M3        1   1449876 28323545 1353.5
## + M10       1   1372365 28401056 1353.8
## + M11       1   1215930 28557490 1354.4
## + M5        1    916197 28857224 1355.5
## + M6        1    839455 28933965 1355.8
## + M4        1    675595 29097826 1356.4
## <none>                  29773421 1356.9
## + M9        1    435975 29337445 1357.3
## + M1        1    355314 29418107 1357.6
## + M7        1     29652 29743768 1358.8
## + L12_data  1      4513 29768907 1358.9
## 
## Step:  AIC=1351.13
## data ~ L1_data + M2
## 
##            Df Sum of Sq      RSS    AIC
## + M3        1   1783156 25920345 1346.0
## + M8        1   1198705 26504796 1348.4
## + M5        1   1181534 26521967 1348.4
## + M10       1   1077437 26626065 1348.8
## + M11       1    930303 26773198 1349.4
## + M4        1    904799 26798702 1349.5
## + L3_data   1    778328 26925174 1350.1
## + M6        1    629829 27073672 1350.7
## + M1        1    551711 27151791 1351.0
## <none>                  27703501 1351.1
## + M9        1    275193 27428309 1352.1
## + L12_data  1     52376 27651126 1352.9
## + M7        1      1816 27701685 1353.1
## 
## Step:  AIC=1345.95
## data ~ L1_data + M2 + M3
## 
##            Df Sum of Sq      RSS    AIC
## + M5        1   1735789 24184556 1340.5
## + M4        1   1404881 24515464 1341.9
## + M8        1    927705 24992640 1344.0
## + M10       1    894407 25025938 1344.2
## + M11       1    791320 25129025 1344.6
## + M1        1    672858 25247487 1345.1
## <none>                  25920345 1346.0
## + M6        1    340975 25579370 1346.5
## + L3_data   1    207476 25712869 1347.1
## + M9        1    185162 25735183 1347.2
## + L12_data  1    163599 25756746 1347.3
## + M7        1     10404 25909941 1347.9
## 
## Step:  AIC=1340.46
## data ~ L1_data + M2 + M3 + M5
## 
##            Df Sum of Sq      RSS    AIC
## + M4        1   2272613 21911942 1331.8
## + M1        1    791035 23393521 1338.9
## + M10       1    724660 23459896 1339.2
## + M11       1    676247 23508309 1339.4
## + M8        1    652095 23532461 1339.5
## <none>                  24184556 1340.5
## + L12_data  1    349792 23834763 1340.9
## + L3_data   1    228147 23956408 1341.4
## + M9        1    111101 24073455 1342.0
## + M6        1     93614 24090942 1342.0
## + M7        1     76912 24107643 1342.1
## 
## Step:  AIC=1331.8
## data ~ L1_data + M2 + M3 + M5 + M4
## 
##            Df Sum of Sq      RSS    AIC
## + M1        1    967581 20944361 1328.9
## + L12_data  1    835730 21076212 1329.6
## + M11       1    533992 21377950 1331.1
## + M10       1    516252 21395690 1331.2
## <none>                  21911942 1331.8
## + M8        1    337118 21574824 1332.1
## + M7        1    287047 21624895 1332.4
## + M9        1     38703 21873239 1333.6
## + M6        1     15566 21896377 1333.7
## + L3_data   1     12271 21899671 1333.7
## 
## Step:  AIC=1328.93
## data ~ L1_data + M2 + M3 + M5 + M4 + M1
## 
##            Df Sum of Sq      RSS    AIC
## + L12_data  1    953403 19990958 1325.9
## + M7        1    437715 20506646 1328.7
## <none>                  20944361 1328.9
## + M10       1    324743 20619617 1329.2
## + M11       1    322977 20621384 1329.2
## + M8        1    210580 20733781 1329.8
## + L3_data   1     47484 20896877 1330.7
## + M6        1     42692 20901669 1330.7
## + M9        1      1764 20942596 1330.9
## 
## Step:  AIC=1325.9
## data ~ L1_data + M2 + M3 + M5 + M4 + M1 + L12_data
## 
##           Df Sum of Sq      RSS    AIC
## + M7       1    567306 19423651 1324.8
## + M11      1    436543 19554415 1325.5
## + M10      1    406645 19584313 1325.7
## <none>                 19990958 1325.9
## + L3_data  1    246813 19744145 1326.5
## + M8       1    229463 19761495 1326.7
## + M6       1     92910 19898048 1327.4
## + M9       1      2736 19988222 1327.9
## 
## Step:  AIC=1324.79
## data ~ L1_data + M2 + M3 + M5 + M4 + M1 + L12_data + M7
## 
##           Df Sum of Sq      RSS    AIC
## <none>                 19423651 1324.8
## + M11      1    314075 19109576 1325.0
## + M10      1    281016 19142635 1325.2
## + M6       1    258713 19164938 1325.3
## + L3_data  1    156275 19267377 1325.9
## + M8       1    122888 19300764 1326.1
## + M9       1      4369 19419282 1326.8
#backward
N1725_Model3 <- step(N1725_Model1, formula(N1725_Model0), direction="backward")
## Start:  AIC=1326.62
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + 
##     L1_data + L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M7        1      7443 17685367 1324.7
## - M6        1     77399 17755323 1325.1
## - M1        1    148417 17826341 1325.5
## - L3_data   1    273057 17950981 1326.3
## <none>                  17677924 1326.6
## - M9        1    352324 18030248 1326.8
## - M8        1    766199 18444123 1329.2
## - M5        1    881083 18559007 1329.9
## - M11       1    900695 18578618 1330.0
## - M4        1    942131 18620054 1330.2
## - M10       1    943571 18621495 1330.2
## - M3        1   1437597 19115521 1333.1
## - M2        1   1503267 19181190 1333.4
## - L12_data  1   1566802 19244726 1333.8
## - L1_data   1   2077800 19755724 1336.6
## 
## Step:  AIC=1324.66
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M6        1     76928 17762295 1323.1
## - M1        1    219055 17904422 1324.0
## - L3_data   1    266653 17952020 1324.3
## <none>                  17685367 1324.7
## - M9        1    410167 18095534 1325.1
## - M8        1    988298 18673665 1328.5
## - M11       1   1062586 18747952 1329.0
## - M10       1   1160902 18846269 1329.5
## - M4        1   1375224 19060590 1330.8
## - M5        1   1383944 19069311 1330.8
## - L12_data  1   1568759 19254126 1331.8
## - M3        1   1945972 19631339 1333.9
## - M2        1   1951974 19637341 1334.0
## - L1_data   1   2083315 19768682 1334.7
## 
## Step:  AIC=1323.13
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - L3_data   1    261807 18024101 1322.7
## - M1        1    306502 18068796 1323.0
## <none>                  17762295 1323.1
## - M9        1    342214 18104508 1323.2
## - M8        1    916304 18678599 1326.6
## - M11       1    985892 18748187 1327.0
## - M10       1   1085102 18847396 1327.5
## - L12_data  1   1601481 19363775 1330.5
## - M4        1   1967694 19729989 1332.5
## - M5        1   2025065 19787359 1332.8
## - L1_data   1   2044312 19806607 1332.9
## - M2        1   2412251 20174546 1334.9
## - M3        1   2583984 20346279 1335.8
## 
## Step:  AIC=1322.71
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data
## 
##            Df Sum of Sq      RSS    AIC
## - M1        1    167130 18191232 1321.7
## <none>                  18024101 1322.7
## - M9        1    337437 18361539 1322.7
## - M8        1    777241 18801342 1325.3
## - M10       1   1090769 19114870 1327.1
## - M11       1   1128923 19153025 1327.3
## - L12_data  1   1373560 19397661 1328.6
## - M4        1   1713012 19737113 1330.5
## - M5        1   1911027 19935129 1331.6
## - M2        1   2205528 20229629 1333.2
## - M3        1   2369049 20393150 1334.0
## - L1_data   1   3579231 21603333 1340.3
## 
## Step:  AIC=1321.71
## data ~ M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## <none>                  18191232 1321.7
## - M9        1    523941 18715173 1322.8
## - M8        1   1026031 19217262 1325.6
## - L12_data  1   1375593 19566825 1327.6
## - M10       1   1454838 19646070 1328.0
## - M11       1   1524210 19715441 1328.4
## - M4        1   1580397 19771628 1328.7
## - M5        1   1771797 19963029 1329.7
## - M2        1   2039781 20231012 1331.2
## - M3        1   2214465 20405696 1332.1
## - L1_data   1   3414108 21605339 1338.3
#both
N1725_Model4 <- step(N1725_Model1, direction="both")
## Start:  AIC=1326.62
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + 
##     L1_data + L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M7        1      7443 17685367 1324.7
## - M6        1     77399 17755323 1325.1
## - M1        1    148417 17826341 1325.5
## - L3_data   1    273057 17950981 1326.3
## <none>                  17677924 1326.6
## - M9        1    352324 18030248 1326.8
## - M8        1    766199 18444123 1329.2
## - M5        1    881083 18559007 1329.9
## - M11       1    900695 18578618 1330.0
## - M4        1    942131 18620054 1330.2
## - M10       1    943571 18621495 1330.2
## - M3        1   1437597 19115521 1333.1
## - M2        1   1503267 19181190 1333.4
## - L12_data  1   1566802 19244726 1333.8
## - L1_data   1   2077800 19755724 1336.6
## 
## Step:  AIC=1324.66
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M6        1     76928 17762295 1323.1
## - M1        1    219055 17904422 1324.0
## - L3_data   1    266653 17952020 1324.3
## <none>                  17685367 1324.7
## - M9        1    410167 18095534 1325.1
## + M7        1      7443 17677924 1326.6
## - M8        1    988298 18673665 1328.5
## - M11       1   1062586 18747952 1329.0
## - M10       1   1160902 18846269 1329.5
## - M4        1   1375224 19060590 1330.8
## - M5        1   1383944 19069311 1330.8
## - L12_data  1   1568759 19254126 1331.8
## - M3        1   1945972 19631339 1333.9
## - M2        1   1951974 19637341 1334.0
## - L1_data   1   2083315 19768682 1334.7
## 
## Step:  AIC=1323.13
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - L3_data   1    261807 18024101 1322.7
## - M1        1    306502 18068796 1323.0
## <none>                  17762295 1323.1
## - M9        1    342214 18104508 1323.2
## + M6        1     76928 17685367 1324.7
## + M7        1      6972 17755323 1325.1
## - M8        1    916304 18678599 1326.6
## - M11       1    985892 18748187 1327.0
## - M10       1   1085102 18847396 1327.5
## - L12_data  1   1601481 19363775 1330.5
## - M4        1   1967694 19729989 1332.5
## - M5        1   2025065 19787359 1332.8
## - L1_data   1   2044312 19806607 1332.9
## - M2        1   2412251 20174546 1334.9
## - M3        1   2583984 20346279 1335.8
## 
## Step:  AIC=1322.71
## data ~ M1 + M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data
## 
##            Df Sum of Sq      RSS    AIC
## - M1        1    167130 18191232 1321.7
## <none>                  18024101 1322.7
## - M9        1    337437 18361539 1322.7
## + L3_data   1    261807 17762295 1323.1
## + M6        1     72081 17952020 1324.3
## + M7        1     30201 17993900 1324.5
## - M8        1    777241 18801342 1325.3
## - M10       1   1090769 19114870 1327.1
## - M11       1   1128923 19153025 1327.3
## - L12_data  1   1373560 19397661 1328.6
## - M4        1   1713012 19737113 1330.5
## - M5        1   1911027 19935129 1331.6
## - M2        1   2205528 20229629 1333.2
## - M3        1   2369049 20393150 1334.0
## - L1_data   1   3579231 21603333 1340.3
## 
## Step:  AIC=1321.71
## data ~ M2 + M3 + M4 + M5 + M8 + M9 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## <none>                  18191232 1321.7
## + M1        1    167130 18024101 1322.7
## - M9        1    523941 18715173 1322.8
## + M6        1    134989 18056243 1322.9
## + L3_data   1    122435 18068796 1323.0
## + M7        1      1116 18190115 1323.7
## - M8        1   1026031 19217262 1325.6
## - L12_data  1   1375593 19566825 1327.6
## - M10       1   1454838 19646070 1328.0
## - M11       1   1524210 19715441 1328.4
## - M4        1   1580397 19771628 1328.7
## - M5        1   1771797 19963029 1329.7
## - M2        1   2039781 20231012 1331.2
## - M3        1   2214465 20405696 1332.1
## - L1_data   1   3414108 21605339 1338.3
#fit 8
fit8_n1725 <- lm(data ~ L12_data + L1_data + I(log(L3_data)), data=N1725)
summary(fit8_n1725)
## 
## Call:
## lm(formula = data ~ L12_data + L1_data + I(log(L3_data)), data = N1725)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1006.81  -284.37   -14.82   234.09  1360.30 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3022.22182 1857.83620   1.627    0.107    
## L12_data           0.33584    0.07979   4.209 5.46e-05 ***
## L1_data            0.43258    0.09246   4.679 8.73e-06 ***
## I(log(L3_data)) -296.38379  241.08539  -1.229    0.222    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 446.2 on 104 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:   0.45,  Adjusted R-squared:  0.4341 
## F-statistic: 28.36 on 3 and 104 DF,  p-value: 1.756e-13
#fit 9
fit9_n1725 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + L3_data, data=N1725)
summary(fit9_n1725)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L1_data + L3_data, data = N1725)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -989.79 -229.65   -5.88  182.24 1179.88 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1379.56041  293.05541   4.708 7.84e-06 ***
## M1           -12.75880  194.33619  -0.066  0.94778    
## M2           384.99677  197.66961   1.948  0.05418 .  
## M3           374.79063  206.26992   1.817  0.07213 .  
## M4           258.63126  210.94132   1.226  0.22296    
## M5           154.30367  213.44558   0.723  0.47137    
## M6          -411.64107  216.70518  -1.900  0.06029 .  
## M7          -218.25102  205.04266  -1.064  0.28963    
## M8          -641.33658  204.53929  -3.136  0.00224 ** 
## M9          -370.94982  189.12440  -1.961  0.05253 .  
## M10         -499.32658  188.92412  -2.643  0.00950 ** 
## M11         -394.94176  185.34797  -2.131  0.03548 *  
## L1_data        0.37794    0.09212   4.103 8.19e-05 ***
## L3_data        0.19826    0.09154   2.166  0.03262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 414.4 on 103 degrees of freedom
##   (19 observations deleted due to missingness)
## Multiple R-squared:  0.5937, Adjusted R-squared:  0.5425 
## F-statistic: 11.58 on 13 and 103 DF,  p-value: 5.506e-15
#Forecast from Model 1
N1725_Model1Forecast <- predict(N1725_Model1, N1725_test)

plot(N1725_Model1Forecast)

plot.ts(N1725_Model1Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])

model1_errors_n1725 <- N1725_test_data - N1725_Model1Forecast

#eliminate NAs
model1_errors_n1725 <- ts(model1_errors_n1725[1:13], frequency=frequency(model1_errors_n1725),
start=start(model1_errors_n1725))

View(model1_errors_n1725)
#Error Metrics
model1_MAE_n1725 <- mean(abs(model1_errors_n1725))
model1_MAPE_n1725 <- 100 * mean(abs(model1_errors_n1725)/N1725_test_data)

#Forecast from fit 8 and 9
N1725_fit8Forecast <- predict(fit8_n1725, N1725_test)

plot(N1725_fit8Forecast)

plot.ts(N1725_fit8Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])

fit8_errors_n1725 <- N1725_test_data - N1725_fit8Forecast

#eliminate NAs
fit8_errors_n1725 <- ts(fit8_errors_n1725[1:13], frequency=frequency(fit8_errors_n1725),
start=start(fit8_errors_n1725))

#Error Metrics
fit8_MAE_n1725 <- mean(abs(fit8_errors_n1725))
fit8_MAPE_n1725 <- 100 * mean(abs(fit8_errors_n1725)/N1725_test_data)

#fit 9
N1725_fit9Forecast <- predict(fit9_n1725, N1725_test)

plot(N1725_fit9Forecast)

plot.ts(N1725_fit9Forecast)

# Errors
N1725_test_data <- (N1725_test[,"data"])

fit9_errors_n1725 <- N1725_test_data - N1725_fit9Forecast

#eliminate NAs
fit9_errors_n1725 <- ts(fit9_errors_n1725[1:13], frequency=frequency(fit9_errors_n1725),
start=start(fit9_errors_n1725))

#Error Metrics
fit9_MAE_n1725 <- mean(abs(fit9_errors_n1725))
fit9_MAPE_n1725 <- 100 * mean(abs(fit9_errors_n1725)/N1725_test_data)

#FORECAST FOR REGRESSION

regression_forecast_n1725 <- predict(fit9_n1725, N1725_test)

plot(regression_forecast_n1725)

plot.ts(regression_forecast_n1725)

REGRESSION for N1726

#Convert to monthly Time Series starting from March in 1985
N1726 <- ts(n1726[7:(n1726_length)], frequency=frequency(n1726),
start=start(n1726))

#Create Seasonal Dummies
M1_n1726 <- rep(c(1,0,0,0,0,0,0,0,0,0,0,0),10)
M2_n1726 <- rep(c(0,1,0,0,0,0,0,0,0,0,0,0),10)
M3_n1726 <- rep(c(0,0,1,0,0,0,0,0,0,0,0,0),10)
M4_n1726 <- rep(c(0,0,0,1,0,0,0,0,0,0,0,0),10)
M5_n1726 <- rep(c(0,0,0,0,1,0,0,0,0,0,0,0),10)
M6_n1726 <- rep(c(0,0,0,0,0,1,0,0,0,0,0,0),10)
M7_n1726 <- rep(c(0,0,0,0,0,0,1,0,0,0,0,0),10)
M8_n1726 <- rep(c(0,0,0,0,0,0,0,1,0,0,0,0),10)
M9_n1726 <- rep(c(0,0,0,0,0,0,0,0,1,0,0,0),10)
M10_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,1,0,0),10)
M11_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,0,1,0),10)
#M12_n1726 <- rep(c(0,0,0,0,0,0,0,0,0,0,0,1),10)


N1726 <- cbind(N1726,M1_n1726,M2_n1726,M3_n1726,M4_n1726, M5_n1726, M6_n1726,M7_n1726, M8_n1726, M9_n1726, M10_n1726, M11_n1726) #M12_n1726

colnames(N1726) <- c("data","M1","M2","M3","M4",'M5','M6',"M7","M8","M9","M10",'M11' )#M12

#Use the Seasonal Dummies
fit3_n1726 <- lm(data ~ ., data=N1726)
summary(fit3_n1726)
## 
## Call:
## lm(formula = data ~ ., data = N1726)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1872.0  -476.5  -119.0   344.0  4088.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2300.0      304.7   7.549 1.48e-11 ***
## M1             -10.0      430.9  -0.023  0.98153    
## M2             296.0      430.9   0.687  0.49359    
## M3             802.0      430.9   1.861  0.06543 .  
## M4            1166.0      430.9   2.706  0.00792 ** 
## M5            1092.0      430.9   2.534  0.01270 *  
## M6             312.0      430.9   0.724  0.47059    
## M7             198.0      430.9   0.460  0.64680    
## M8            -262.0      430.9  -0.608  0.54444    
## M9            -198.0      430.9  -0.460  0.64680    
## M10           -406.0      430.9  -0.942  0.34818    
## M11           -610.0      430.9  -1.416  0.15976    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 963.5 on 108 degrees of freedom
## Multiple R-squared:  0.2638, Adjusted R-squared:  0.1888 
## F-statistic: 3.518 on 11 and 108 DF,  p-value: 0.0003109
#Lag 1 of Sales
L1_data <- lag(N1726[,"data"],-1)

N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L1_data)
colnames(N1726) <- c(N1726_colnames,"L1_data")

#Use seasonal dummies and lag
fit4_n1726 <- lm(data ~ ., data=N1726)
summary(fit4_n1726)
## 
## Call:
## lm(formula = data ~ ., data = N1726)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1437.5  -401.7   -90.0   410.5  2075.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1148.73909  250.36160   4.588 1.23e-05 ***
## M1          -572.51624  323.68639  -1.769   0.0798 .  
## M2          -112.73168  315.15050  -0.358   0.7213    
## M3           184.81516  318.68811   0.580   0.5632    
## M4           204.11811  327.51960   0.623   0.5345    
## M5          -117.84578  336.02882  -0.351   0.7265    
## M6          -847.43554  334.16026  -2.536   0.0127 *  
## M7          -430.08435  318.91131  -1.349   0.1803    
## M8          -812.42533  317.40324  -2.560   0.0119 *  
## M9          -435.06438  313.30089  -1.389   0.1679    
## M10         -686.66242  313.67836  -2.189   0.0308 *  
## M11         -748.96877  312.68305  -2.395   0.0184 *  
## L1_data        0.68122    0.06975   9.766  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 698.5 on 106 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.6166, Adjusted R-squared:  0.5732 
## F-statistic: 14.21 on 12 and 106 DF,  p-value: < 2.2e-16
#Multicollinearity

# Correlation matrix for N1726 data
cor(N1726,use="c")
##                data          M1          M2          M3          M4          M5
## data     1.00000000 -0.09369989  0.03051685  0.17447966  0.27804183  0.25698798
## M1      -0.09369989  1.00000000 -0.08663865 -0.08663865 -0.08663865 -0.08663865
## M2       0.03051685 -0.08663865  1.00000000 -0.09174312 -0.09174312 -0.09174312
## M3       0.17447966 -0.08663865 -0.09174312  1.00000000 -0.09174312 -0.09174312
## M4       0.27804183 -0.08663865 -0.09174312 -0.09174312  1.00000000 -0.09174312
## M5       0.25698798 -0.08663865 -0.09174312 -0.09174312 -0.09174312  1.00000000
## M6       0.03506903 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M7       0.00263472 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M8      -0.12824056 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M9      -0.11003183 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M10     -0.16921022 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## M11     -0.22725056 -0.08663865 -0.09174312 -0.09174312 -0.09174312 -0.09174312
## L1_data  0.70513338 -0.05457994 -0.05936873  0.02727868  0.17055840  0.27362919
##                  M6          M7            M8          M9         M10
## data     0.03506903  0.00263472 -0.1282405621 -0.11003183 -0.16921022
## M1      -0.08663865 -0.08663865 -0.0866386473 -0.08663865 -0.08663865
## M2      -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M3      -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M4      -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M5      -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M6       1.00000000 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## M7      -0.09174312  1.00000000 -0.0917431193 -0.09174312 -0.09174312
## M8      -0.09174312 -0.09174312  1.0000000000 -0.09174312 -0.09174312
## M9      -0.09174312 -0.09174312 -0.0917431193  1.00000000 -0.09174312
## M10     -0.09174312 -0.09174312 -0.0917431193 -0.09174312  1.00000000
## M11     -0.09174312 -0.09174312 -0.0917431193 -0.09174312 -0.09174312
## L1_data  0.25267524  0.03180927 -0.0004711427 -0.13072543 -0.11260310
##                 M11       L1_data
## data    -0.22725056  0.7051333790
## M1      -0.08663865 -0.0545799400
## M2      -0.09174312 -0.0593687349
## M3      -0.09174312  0.0272786844
## M4      -0.09174312  0.1705584040
## M5      -0.09174312  0.2736291904
## M6      -0.09174312  0.2526752393
## M7      -0.09174312  0.0318092685
## M8      -0.09174312 -0.0004711427
## M9      -0.09174312 -0.1307254332
## M10     -0.09174312 -0.1126030971
## M11      1.00000000 -0.1715006893
## L1_data -0.17150069  1.0000000000
#VIF for fit4
vif(fit4_n1726)
##       M1       M2       M3       M4       M5       M6       M7       M8 
## 1.786730 1.864821 1.906922 2.014076 2.120090 2.096577 1.909594 1.891577 
##       M9      M10      M11  L1_data 
## 1.842997 1.847440 1.835735 1.358051
#Analyse residuals of multiple components
tsdisplay(residuals(fit4_n1726))

#Lag 12 of Sales
L12_data <- lag(N1726[,"data"],-12)

N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L12_data)
colnames(N1726) <- c(N1726_colnames,"L12_data")

#Use seasonal dummies and lag
fit5_n1726 <- lm(data ~ ., data=N1726)
summary(fit5_n1726)
## 
## Call:
## lm(formula = data ~ ., data = N1726)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1170.71  -351.44   -37.35   328.49  1528.46 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1068.76716  236.19775   4.525 1.77e-05 ***
## M1           -71.74741  277.65344  -0.258    0.797    
## M2           124.51238  271.26577   0.459    0.647    
## M3           324.05966  275.09612   1.178    0.242    
## M4           428.31641  284.72091   1.504    0.136    
## M5           165.42520  294.01085   0.563    0.575    
## M6            -4.97982  296.01021  -0.017    0.987    
## M7            -9.52178  278.45551  -0.034    0.973    
## M8          -211.09958  282.13153  -0.748    0.456    
## M9           -31.79719  271.00590  -0.117    0.907    
## M10         -192.14422  278.53420  -0.690    0.492    
## M11         -318.57196  276.03397  -1.154    0.251    
## L1_data        0.13239    0.11090   1.194    0.236    
## L12_data       0.35960    0.08163   4.406 2.80e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 567.4 on 94 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.566,  Adjusted R-squared:  0.506 
## F-statistic:  9.43 on 13 and 94 DF,  p-value: 3.45e-12
#Fit Regression with Seasonally Dummies, and lag 12
fit6_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data, data=N1726)
summary(fit6_n1726)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L12_data, data = N1726)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1228.54  -357.45   -47.29   357.87  1596.13 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1124.34889  232.08128   4.845 4.92e-06 ***
## M1            17.16741  268.07545   0.064   0.9491    
## M2           173.85585  268.69767   0.647   0.5192    
## M3           374.50979  272.43848   1.375   0.1725    
## M4           508.09584  277.38594   1.832   0.0701 .  
## M5           280.51867  278.37593   1.008   0.3162    
## M6           143.75633  269.11402   0.534   0.5945    
## M7            81.12278  268.50247   0.302   0.7632    
## M8          -104.89621  268.33735  -0.391   0.6967    
## M9            19.26833  268.20706   0.072   0.9429    
## M10         -103.75633  269.11402  -0.386   0.7007    
## M11         -248.32208  270.29163  -0.919   0.3606    
## L12_data       0.42792    0.05833   7.336 7.36e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 568.7 on 95 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.5594, Adjusted R-squared:  0.5038 
## F-statistic: 10.05 on 12 and 95 DF,  p-value: 2.008e-12
#Lag 3 of Sales
L3_data <- lag(N1726[,"data"],-3)

N1726_colnames <- colnames(N1726)
N1726 <- cbind(N1726,L3_data)
colnames(N1726) <- c(N1726_colnames,"L3_data")

#Fit Regression with Seasonally Dummies, log (lag 3), lag 1, and lag 12
fit7_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L12_data + L1_data + I(log(L3_data)), data=N1726)
summary(fit7_n1726)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L12_data + L1_data + I(log(L3_data)), data = N1726)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1236.03  -410.89    -9.51   349.91  1371.60 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -3.823e+03  1.658e+03  -2.306 0.023342 *  
## M1               5.001e+01  2.698e+02   0.185 0.853361    
## M2               3.094e+02  2.679e+02   1.155 0.250999    
## M3               3.666e+02  2.646e+02   1.385 0.169291    
## M4               5.578e+02  2.769e+02   2.014 0.046889 *  
## M5               2.643e+02  2.844e+02   0.930 0.355006    
## M6              -8.126e+01  2.855e+02  -0.285 0.776565    
## M7              -2.089e+02  2.757e+02  -0.758 0.450647    
## M8              -3.780e+02  2.767e+02  -1.366 0.175264    
## M9              -1.164e+02  2.619e+02  -0.445 0.657687    
## M10             -2.602e+02  2.685e+02  -0.969 0.335117    
## M11             -2.857e+02  2.654e+02  -1.077 0.284455    
## L12_data         3.074e-01  8.034e-02   3.826 0.000236 ***
## L1_data          5.735e-02  1.095e-01   0.524 0.601554    
## I(log(L3_data))  6.747e+02  2.265e+02   2.978 0.003694 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 545 on 93 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:  0.6038, Adjusted R-squared:  0.5441 
## F-statistic: 10.12 on 14 and 93 DF,  p-value: 2.295e-13
#FORECASTING
# Split the data into train and test sets
N1726_train <- window(N1726, start(N1726), (1993+8/12))
#we have to sustitude all NA with 0
N1726_train[is.na(N1726_train)] = 0

N1726_test <- window(N1726, (1993+8/12), end(N1726))

plot(N1726[,"data"])

plot(N1726_train[,"data"])

plot(N1726_test[,"data"])

# Regression with no variables
N1726_Model0 <- lm(data ~ 1, data=N1726_train)
# Regression with all variables
N1726_Model1 <- lm(data ~ ., data=N1726_train)

# Summary of Regressions
summary(N1726_Model0)
## 
## Call:
## lm(formula = data ~ 1, data = N1726_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1346.3  -776.3  -276.3   403.7  4933.7 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2546.3      106.5   23.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1107 on 107 degrees of freedom
summary(N1726_Model1)
## 
## Call:
## lm(formula = data ~ ., data = N1726_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1557.02  -460.44   -13.62   379.37  2391.28 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1381.06188  325.09783   4.248 5.11e-05 ***
## M1          -132.34165  364.01452  -0.364   0.7170    
## M2            75.19565  373.33702   0.201   0.8408    
## M3           437.14654  371.65434   1.176   0.2425    
## M4           549.69565  384.37345   1.430   0.1560    
## M5           396.11947  395.38345   1.002   0.3190    
## M6          -691.99477  386.44755  -1.791   0.0766 .  
## M7          -419.95478  372.94282  -1.126   0.2630    
## M8          -867.40302  373.96340  -2.319   0.0226 *  
## M9          -445.50738  360.70928  -1.235   0.2199    
## M10         -742.88182  360.60844  -2.060   0.0422 *  
## M11         -764.87717  360.36638  -2.122   0.0365 *  
## L1_data        0.54874    0.09302   5.899 5.87e-08 ***
## L12_data      -0.12673    0.05902  -2.147   0.0344 *  
## L3_data        0.11499    0.09061   1.269   0.2076    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 759.7 on 93 degrees of freedom
## Multiple R-squared:  0.5906, Adjusted R-squared:  0.5289 
## F-statistic: 9.582 on 14 and 93 DF,  p-value: 9.285e-13
# AIC forward selection
N1726_Model2 <- step(N1726_Model0, formula(N1726_Model1), direction="forward")
## Start:  AIC=1515
## data ~ 1
## 
##            Df Sum of Sq       RSS    AIC
## + L1_data   1  57894715  73193003 1454.1
## + L3_data   1  11621037 119466682 1507.0
## + M5        1  10536308 120551410 1508.0
## + M4        1   9266167 121821552 1509.1
## + M11       1   6995072 124092646 1511.1
## + M10       1   4243266 126844453 1513.4
## + M3        1   3332430 127755289 1514.2
## <none>                  131087719 1515.0
## + M8        1   2054175 129033543 1515.3
## + M9        1   1568001 129519717 1515.7
## + L12_data  1    856011 130231708 1516.3
## + M1        1    445286 130642432 1516.6
## + M6        1    238708 130849010 1516.8
## + M2        1     41246 131046473 1517.0
## + M7        1      1294 131086424 1517.0
## 
## Step:  AIC=1454.06
## data ~ L1_data
## 
##            Df Sum of Sq      RSS    AIC
## + M4        1   3414946 69778057 1450.9
## + M6        1   2975832 70217172 1451.6
## + M3        1   2758855 70434148 1451.9
## + M8        1   2218628 70974375 1452.7
## + M11       1   1844762 71348242 1453.3
## + M10       1   1651773 71541230 1453.6
## + M5        1   1636050 71556954 1453.6
## <none>                  73193003 1454.1
## + L3_data   1    519713 72673290 1455.3
## + M2        1    366098 72826905 1455.5
## + M9        1    126346 73066657 1455.9
## + M7        1     99175 73093828 1455.9
## + L12_data  1     67418 73125585 1456.0
## + M1        1     42489 73150515 1456.0
## 
## Step:  AIC=1450.9
## data ~ L1_data + M4
## 
##            Df Sum of Sq      RSS    AIC
## + M3        1   3408415 66369643 1447.5
## + M5        1   2412423 67365635 1449.1
## + M6        1   2175138 67602920 1449.5
## + M8        1   1749947 68028110 1450.2
## + M11       1   1538652 68239406 1450.5
## + M10       1   1318848 68459209 1450.8
## <none>                  69778057 1450.9
## + M2        1    581132 69196925 1452.0
## + L12_data  1    323619 69454438 1452.4
## + L3_data   1    196594 69581464 1452.6
## + M1        1    118174 69659883 1452.7
## + M9        1     48565 69729493 1452.8
## + M7        1     17253 69760804 1452.9
## 
## Step:  AIC=1447.49
## data ~ L1_data + M4 + M3
## 
##            Df Sum of Sq      RSS    AIC
## + M5        1   3160912 63208731 1444.2
## + M6        1   1608307 64761336 1446.8
## + M8        1   1305019 65064624 1447.3
## <none>                  66369643 1447.5
## + M11       1   1143152 65226491 1447.6
## + M10       1    948011 65421632 1447.9
## + M2        1    900970 65468673 1448.0
## + L12_data  1    697667 65671976 1448.3
## + M1        1    275594 66094049 1449.0
## + L3_data   1     27065 66342578 1449.5
## + M7        1      3395 66366248 1449.5
## + M9        1      1765 66367878 1449.5
## 
## Step:  AIC=1444.22
## data ~ L1_data + M4 + M3 + M5
## 
##            Df Sum of Sq      RSS    AIC
## + L12_data  1   1740411 61468319 1443.2
## + M2        1   1306630 61902100 1444.0
## <none>                  63208731 1444.2
## + M11       1    898636 62310095 1444.7
## + M8        1    863121 62345610 1444.7
## + M6        1    810144 62398587 1444.8
## + M10       1    666630 62542100 1445.1
## + M1        1    464665 62744065 1445.4
## + M7        1     91994 63116736 1446.1
## + L3_data   1     26187 63182544 1446.2
## + M9        1     12233 63196497 1446.2
## 
## Step:  AIC=1443.21
## data ~ L1_data + M4 + M3 + M5 + L12_data
## 
##           Df Sum of Sq      RSS    AIC
## + M2       1   1550302 59918017 1442.5
## + M11      1   1261631 60206688 1443.0
## <none>                 61468319 1443.2
## + M8       1    965789 60502530 1443.5
## + M10      1    830483 60637836 1443.7
## + M6       1    618319 60850000 1444.1
## + M1       1    507679 60960641 1444.3
## + M7       1    141201 61327118 1445.0
## + L3_data  1     34429 61433891 1445.1
## + M9       1      6564 61461756 1445.2
## 
## Step:  AIC=1442.45
## data ~ L1_data + M4 + M3 + M5 + L12_data + M2
## 
##           Df Sum of Sq      RSS    AIC
## <none>                 59918017 1442.5
## + M11      1    965747 58952270 1442.7
## + M1       1    772057 59145960 1443.0
## + M8       1    699813 59218204 1443.2
## + M10      1    587563 59330455 1443.4
## + L3_data  1    412964 59505053 1443.7
## + M6       1    381958 59536059 1443.8
## + M7       1    293944 59624073 1443.9
## + M9       1     56462 59861555 1444.3
#backward
N1726_Model3 <- step(N1726_Model1, formula(N1726_Model0), direction="backward")
## Start:  AIC=1446.55
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + 
##     L1_data + L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M2        1     23412 53693860 1444.6
## - M1        1     76280 53746728 1444.7
## - M5        1    579252 54249701 1445.7
## - M7        1    731767 54402216 1446.0
## - M3        1    798414 54468862 1446.2
## - M9        1    880334 54550782 1446.3
## - L3_data   1    929307 54599756 1446.4
## <none>                  53670449 1446.5
## - M4        1   1180294 54850742 1446.9
## - M6        1   1850448 55520896 1448.2
## - M10       1   2449176 56119624 1449.4
## - M11       1   2599843 56270292 1449.7
## - L12_data  1   2660572 56331021 1449.8
## - M8        1   3104816 56775265 1450.6
## - L1_data   1  20082318 73752767 1478.9
## 
## Step:  AIC=1444.6
## data ~ M1 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M1        1    174330 53868190 1443.0
## - M5        1    639114 54332975 1443.9
## - L3_data   1    913869 54607730 1444.4
## - M3        1    915716 54609577 1444.4
## <none>                  53693860 1444.6
## - M7        1   1057848 54751708 1444.7
## - M9        1   1299640 54993501 1445.2
## - M4        1   1405678 55099538 1445.4
## - L12_data  1   2641506 56335367 1447.8
## - M6        1   2763249 56457110 1448.0
## - M10       1   3468115 57161975 1449.4
## - M11       1   3730147 57424008 1449.8
## - M8        1   4068871 57762731 1450.5
## - L1_data   1  21764685 75458546 1479.3
## 
## Step:  AIC=1442.95
## data ~ M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M7        1    896666 54764856 1442.7
## - L3_data   1    943115 54811305 1442.8
## - M5        1    955511 54823702 1442.8
## <none>                  53868190 1443.0
## - M9        1   1128868 54997059 1443.2
## - M3        1   1341875 55210065 1443.6
## - M4        1   1939830 55808020 1444.8
## - M6        1   2600148 56468338 1446.0
## - L12_data  1   2618511 56486702 1446.1
## - M10       1   3330379 57198569 1447.4
## - M11       1   3606714 57474905 1448.0
## - M8        1   3929270 57797461 1448.6
## - L1_data   1  21651929 75520120 1477.4
## 
## Step:  AIC=1442.73
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data + 
##     L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - L3_data   1    446298 55211154 1441.6
## - M9        1    646328 55411185 1442.0
## <none>                  54764856 1442.7
## - M5        1   1612225 56377081 1443.9
## - M6        1   1916952 56681808 1444.5
## - M3        1   2058115 56822971 1444.7
## - M10       1   2605242 57370098 1445.8
## - L12_data  1   2624302 57389158 1445.8
## - M4        1   2834277 57599133 1446.2
## - M11       1   2930582 57695438 1446.4
## - M8        1   3080521 57845377 1446.6
## - L1_data   1  22414552 77179409 1477.8
## 
## Step:  AIC=1441.61
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## - M9        1    500149 55711303 1440.6
## <none>                  55211154 1441.6
## - M5        1   1432643 56643797 1442.4
## - M3        1   1872870 57084023 1443.2
## - M6        1   1895529 57106683 1443.3
## - M10       1   2403610 57614764 1444.2
## - L12_data  1   2530766 57741919 1444.5
## - M4        1   2586171 57797324 1444.5
## - M8        1   2650203 57861357 1444.7
## - M11       1   2878934 58090088 1445.1
## - L1_data   1  38087741 93298895 1496.3
## 
## Step:  AIC=1440.58
## data ~ M3 + M4 + M5 + M6 + M8 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## <none>                  55711303 1440.6
## - M6        1   1620700 57332003 1441.7
## - M5        1   1765598 57476901 1442.0
## - M10       1   2062015 57773317 1442.5
## - M8        1   2302602 58013904 1443.0
## - M3        1   2330008 58041310 1443.0
## - L12_data  1   2410273 58121575 1443.2
## - M11       1   2510199 58221502 1443.3
## - M4        1   3086751 58798054 1444.4
## - L1_data   1  38399687 94110990 1495.2
#both
N1726_Model4 <- step(N1726_Model1, direction="both")
## Start:  AIC=1446.55
## data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + 
##     L1_data + L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M2        1     23412 53693860 1444.6
## - M1        1     76280 53746728 1444.7
## - M5        1    579252 54249701 1445.7
## - M7        1    731767 54402216 1446.0
## - M3        1    798414 54468862 1446.2
## - M9        1    880334 54550782 1446.3
## - L3_data   1    929307 54599756 1446.4
## <none>                  53670449 1446.5
## - M4        1   1180294 54850742 1446.9
## - M6        1   1850448 55520896 1448.2
## - M10       1   2449176 56119624 1449.4
## - M11       1   2599843 56270292 1449.7
## - L12_data  1   2660572 56331021 1449.8
## - M8        1   3104816 56775265 1450.6
## - L1_data   1  20082318 73752767 1478.9
## 
## Step:  AIC=1444.6
## data ~ M1 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M1        1    174330 53868190 1443.0
## - M5        1    639114 54332975 1443.9
## - L3_data   1    913869 54607730 1444.4
## - M3        1    915716 54609577 1444.4
## <none>                  53693860 1444.6
## - M7        1   1057848 54751708 1444.7
## - M9        1   1299640 54993501 1445.2
## - M4        1   1405678 55099538 1445.4
## + M2        1     23412 53670449 1446.5
## - L12_data  1   2641506 56335367 1447.8
## - M6        1   2763249 56457110 1448.0
## - M10       1   3468115 57161975 1449.4
## - M11       1   3730147 57424008 1449.8
## - M8        1   4068871 57762731 1450.5
## - L1_data   1  21764685 75458546 1479.3
## 
## Step:  AIC=1442.95
## data ~ M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + 
##     L12_data + L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - M7        1    896666 54764856 1442.7
## - L3_data   1    943115 54811305 1442.8
## - M5        1    955511 54823702 1442.8
## <none>                  53868190 1443.0
## - M9        1   1128868 54997059 1443.2
## - M3        1   1341875 55210065 1443.6
## + M1        1    174330 53693860 1444.6
## + M2        1    121462 53746728 1444.7
## - M4        1   1939830 55808020 1444.8
## - M6        1   2600148 56468338 1446.0
## - L12_data  1   2618511 56486702 1446.1
## - M10       1   3330379 57198569 1447.4
## - M11       1   3606714 57474905 1448.0
## - M8        1   3929270 57797461 1448.6
## - L1_data   1  21651929 75520120 1477.4
## 
## Step:  AIC=1442.73
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data + 
##     L3_data
## 
##            Df Sum of Sq      RSS    AIC
## - L3_data   1    446298 55211154 1441.6
## - M9        1    646328 55411185 1442.0
## <none>                  54764856 1442.7
## + M7        1    896666 53868190 1443.0
## - M5        1   1612225 56377081 1443.9
## + M2        1    345266 54419591 1444.0
## - M6        1   1916952 56681808 1444.5
## + M1        1     13148 54751708 1444.7
## - M3        1   2058115 56822971 1444.7
## - M10       1   2605242 57370098 1445.8
## - L12_data  1   2624302 57389158 1445.8
## - M4        1   2834277 57599133 1446.2
## - M11       1   2930582 57695438 1446.4
## - M8        1   3080521 57845377 1446.6
## - L1_data   1  22414552 77179409 1477.8
## 
## Step:  AIC=1441.61
## data ~ M3 + M4 + M5 + M6 + M8 + M9 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## - M9        1    500149 55711303 1440.6
## <none>                  55211154 1441.6
## - M5        1   1432643 56643797 1442.4
## + L3_data   1    446298 54764856 1442.7
## + M7        1    399848 54811305 1442.8
## - M3        1   1872870 57084023 1443.2
## - M6        1   1895529 57106683 1443.3
## + M2        1    129887 55081267 1443.4
## + M1        1     46576 55164577 1443.5
## - M10       1   2403610 57614764 1444.2
## - L12_data  1   2530766 57741919 1444.5
## - M4        1   2586171 57797324 1444.5
## - M8        1   2650203 57861357 1444.7
## - M11       1   2878934 58090088 1445.1
## - L1_data   1  38087741 93298895 1496.3
## 
## Step:  AIC=1440.58
## data ~ M3 + M4 + M5 + M6 + M8 + M10 + M11 + L1_data + L12_data
## 
##            Df Sum of Sq      RSS    AIC
## <none>                  55711303 1440.6
## + M9        1    500149 55211154 1441.6
## - M6        1   1620700 57332003 1441.7
## - M5        1   1765598 57476901 1442.0
## + L3_data   1    300118 55411185 1442.0
## + M2        1    274314 55436989 1442.0
## + M7        1    191909 55519394 1442.2
## - M10       1   2062015 57773317 1442.5
## + M1        1       937 55710366 1442.6
## - M8        1   2302602 58013904 1443.0
## - M3        1   2330008 58041310 1443.0
## - L12_data  1   2410273 58121575 1443.2
## - M11       1   2510199 58221502 1443.3
## - M4        1   3086751 58798054 1444.4
## - L1_data   1  38399687 94110990 1495.2
#fit 8
fit8_n1726 <- lm(data ~ L12_data + L1_data + I(log(L3_data)), data=N1726)
summary(fit8_n1726)
## 
## Call:
## lm(formula = data ~ L12_data + L1_data + I(log(L3_data)), data = N1726)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -991.91 -390.34  -55.01  358.80 1505.32 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.161e+03  1.399e+03  -0.830    0.408    
## L12_data         4.605e-01  7.185e-02   6.409 4.36e-09 ***
## L1_data          5.850e-02  1.006e-01   0.582    0.562    
## I(log(L3_data))  2.806e+02  1.903e+02   1.475    0.143    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 564.5 on 104 degrees of freedom
##   (28 observations deleted due to missingness)
## Multiple R-squared:  0.5247, Adjusted R-squared:  0.511 
## F-statistic: 38.27 on 3 and 104 DF,  p-value: < 2.2e-16
#fit 9
fit9_n1726 <- lm(data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + M10 + M11 + L1_data + I(log(L3_data)), data=N1726)
summary(fit9_n1726)
## 
## Call:
## lm(formula = data ~ M1 + M2 + M3 + M4 + M5 + M6 + M7 + M8 + M9 + 
##     M10 + M11 + L1_data + I(log(L3_data)), data = N1726)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1507.65  -454.53    -0.42   341.16  2233.58 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -4.947e+03  1.800e+03  -2.748 0.007076 ** 
## M1              -3.413e+02  3.096e+02  -1.103 0.272751    
## M2               8.275e+01  3.114e+02   0.266 0.790984    
## M3               1.374e+02  3.054e+02   0.450 0.653616    
## M4               4.411e+02  3.125e+02   1.412 0.161082    
## M5               1.204e+02  3.202e+02   0.376 0.707714    
## M6              -7.761e+02  3.143e+02  -2.470 0.015171 *  
## M7              -6.251e+02  3.064e+02  -2.040 0.043908 *  
## M8              -9.698e+02  3.026e+02  -3.205 0.001798 ** 
## M9              -5.212e+02  2.951e+02  -1.766 0.080312 .  
## M10             -7.375e+02  2.947e+02  -2.503 0.013901 *  
## M11             -6.737e+02  2.938e+02  -2.293 0.023863 *  
## L1_data          4.684e-01  8.456e-02   5.540 2.34e-07 ***
## I(log(L3_data))  8.465e+02  2.454e+02   3.449 0.000816 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 654.7 on 103 degrees of freedom
##   (19 observations deleted due to missingness)
## Multiple R-squared:  0.6403, Adjusted R-squared:  0.5949 
## F-statistic:  14.1 on 13 and 103 DF,  p-value: < 2.2e-16
#Forecast from Model 1
N1726_Model1Forecast <- predict(N1726_Model1, N1726_test)

plot(N1726_Model1Forecast)

plot.ts(N1726_Model1Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])

model1_errors_n1726 <- N1726_test_data - N1726_Model1Forecast

#eliminate NAs
model1_errors_n1726 <- ts(model1_errors_n1726[1:13], frequency=frequency(model1_errors_n1726),
start=start(model1_errors_n1726))

View(model1_errors_n1726)
#Error Metrics
model1_MAE_n1726 <- mean(abs(model1_errors_n1726))
model1_MAPE_n1726 <- 100 * mean(abs(model1_errors_n1726)/N1726_test_data)

#Forecast from fit 8 and 9
N1726_fit8Forecast <- predict(fit8_n1726, N1726_test)

plot(N1726_fit8Forecast)

plot.ts(N1726_fit8Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])

fit8_errors_n1726 <- N1726_test_data - N1726_fit8Forecast

#eliminate NAs
fit8_errors_n1726 <- ts(fit8_errors_n1726[1:13], frequency=frequency(fit8_errors_n1726),
start=start(fit8_errors_n1726))

#Error Metrics
fit8_MAE_n1726 <- mean(abs(fit8_errors_n1726))
fit8_MAPE_n1726 <- 100 * mean(abs(fit8_errors_n1726)/N1726_test_data)

#fit 9
N1726_fit9Forecast <- predict(fit9_n1726, N1726_test)

plot(N1726_fit9Forecast)

plot.ts(N1726_fit9Forecast)

# Errors
N1726_test_data <- (N1726_test[,"data"])

fit9_errors_n1726 <- N1726_test_data - N1726_fit9Forecast

#eliminate NAs
fit9_errors_n1726 <- ts(fit9_errors_n1726[1:13], frequency=frequency(fit9_errors_n1726),
start=start(fit9_errors_n1726))

#Error Metrics
fit9_MAE_n1726 <- mean(abs(fit9_errors_n1726))
fit9_MAPE_n1726 <- 100 * mean(abs(fit9_errors_n1726)/N1726_test_data)

#FORECAST FOR REGRESSION
regression_forecast_n1726 <- predict(N1726_Model1, N1726_test)

plot(regression_forecast_n1726)

plot.ts(regression_forecast_n1726)