03_TS_Assignment

#install.packages("locfit")
#install.packages("C:/Users/pattasx2/Documents/Personal/Personal -2020/Subha/R", repos = NULL, type="source")
#packageurl <- "https://cran.r-project.org/src/contrib/Archive/TSA/TSA_1.2.tar.gz"
#install.packages(packageurl, repos=NULL, type="source")

usgdp

##         Qtr1    Qtr2    Qtr3    Qtr4
## 1947  1570.5  1568.7  1568.0  1590.9
## 1948  1616.1  1644.6  1654.1  1658.0
## 1949  1633.2  1628.4  1646.7  1629.9
## 1950  1696.8  1747.3  1815.8  1848.9
## 1951  1871.3  1903.1  1941.1  1944.4
## 1952  1964.7  1966.0  1978.8  2043.8
## 1953  2082.3  2098.1  2085.4  2052.5
## 1954  2042.4  2044.3  2066.9  2107.8
## 1955  2168.5  2204.0  2233.4  2245.3
## 1956  2234.8  2252.5  2249.8  2286.5
## 1957  2300.3  2294.6  2317.0  2292.5
## 1958  2230.2  2243.4  2295.2  2348.0
## 1959  2392.9  2455.8  2453.9  2462.6
## 1960  2517.4  2504.8  2508.7  2476.2
## 1961  2491.2  2538.0  2579.1  2631.8
## 1962  2679.1  2708.4  2733.3  2740.0
## 1963  2775.9  2810.6  2863.5  2885.8
## 1964  2950.5  2984.8  3025.5  3033.6
## 1965  3108.2  3150.2  3214.1  3291.8
## 1966  3372.3  3384.0  3406.3  3433.7
## 1967  3464.1  3464.3  3491.8  3518.2
## 1968  3590.7  3651.6  3676.5  3692.0
## 1969  3750.2  3760.9  3784.2  3766.3
## 1970  3760.0  3767.1  3800.5  3759.8
## 1971  3864.1  3885.9  3916.7  3927.9
## 1972  3997.7  4092.1  4131.1  4198.7
## 1973  4305.3  4355.1  4331.9  4373.3
## 1974  4335.4  4347.9  4305.8  4288.9
## 1975  4237.6  4268.6  4340.9  4397.8
## 1976  4496.8  4530.3  4552.0  4584.6
## 1977  4640.0  4731.1  4815.8  4815.3
## 1978  4830.8  5021.2  5070.7  5137.4
## 1979  5147.4  5152.3  5189.4  5204.7
## 1980  5221.3  5115.9  5107.4  5202.1
## 1981  5307.5  5266.1  5329.8  5263.4
## 1982  5177.1  5204.9  5185.2  5189.8
## 1983  5253.8  5372.3  5478.4  5590.5
## 1984  5699.8  5797.9  5854.3  5902.4
## 1985  5956.9  6007.8  6101.7  6148.6
## 1986  6207.4  6232.0  6291.7  6323.4
## 1987  6365.0  6435.0  6493.4  6606.8
## 1988  6639.1  6723.5  6759.4  6848.6
## 1989  6918.1  6963.5  7013.1  7030.9
## 1990  7112.1  7130.3  7130.8  7076.9
## 1991  7040.8  7086.5  7120.7  7154.1
## 1992  7228.2  7297.9  7369.5  7450.7
## 1993  7459.7  7497.5  7536.0  7637.4
## 1994  7715.1  7815.7  7859.5  7951.6
## 1995  7973.7  7988.0  8053.1  8112.0
## 1996  8169.2  8303.1  8372.7  8470.6
## 1997  8536.1  8665.8  8773.7  8838.4
## 1998  8936.2  8995.3  9098.9  9237.1
## 1999  9315.5  9392.6  9502.2  9671.1
## 2000  9695.6  9847.9  9836.6  9887.7
## 2001  9875.6  9905.9  9871.1  9910.0
## 2002  9977.3 10031.6 10090.7 10095.8
## 2003 10138.6 10230.4 10410.9 10502.6
## 2004 10612.5 10704.1 10808.9 10897.1
## 2005 10999.3 11089.2 11202.3 11248.3
## 2006 11403.6

Question 1 Load the usgdp.rda dataset and split it into a training dataset (1947Q1 - 2005Q1) and a test dataset (2005Q2 - 2006Q1)

train <- window(usgdp, start=1947, end = 2005)
train

##         Qtr1    Qtr2    Qtr3    Qtr4
## 1947  1570.5  1568.7  1568.0  1590.9
## 1948  1616.1  1644.6  1654.1  1658.0
## 1949  1633.2  1628.4  1646.7  1629.9
## 1950  1696.8  1747.3  1815.8  1848.9
## 1951  1871.3  1903.1  1941.1  1944.4
## 1952  1964.7  1966.0  1978.8  2043.8
## 1953  2082.3  2098.1  2085.4  2052.5
## 1954  2042.4  2044.3  2066.9  2107.8
## 1955  2168.5  2204.0  2233.4  2245.3
## 1956  2234.8  2252.5  2249.8  2286.5
## 1957  2300.3  2294.6  2317.0  2292.5
## 1958  2230.2  2243.4  2295.2  2348.0
## 1959  2392.9  2455.8  2453.9  2462.6
## 1960  2517.4  2504.8  2508.7  2476.2
## 1961  2491.2  2538.0  2579.1  2631.8
## 1962  2679.1  2708.4  2733.3  2740.0
## 1963  2775.9  2810.6  2863.5  2885.8
## 1964  2950.5  2984.8  3025.5  3033.6
## 1965  3108.2  3150.2  3214.1  3291.8
## 1966  3372.3  3384.0  3406.3  3433.7
## 1967  3464.1  3464.3  3491.8  3518.2
## 1968  3590.7  3651.6  3676.5  3692.0
## 1969  3750.2  3760.9  3784.2  3766.3
## 1970  3760.0  3767.1  3800.5  3759.8
## 1971  3864.1  3885.9  3916.7  3927.9
## 1972  3997.7  4092.1  4131.1  4198.7
## 1973  4305.3  4355.1  4331.9  4373.3
## 1974  4335.4  4347.9  4305.8  4288.9
## 1975  4237.6  4268.6  4340.9  4397.8
## 1976  4496.8  4530.3  4552.0  4584.6
## 1977  4640.0  4731.1  4815.8  4815.3
## 1978  4830.8  5021.2  5070.7  5137.4
## 1979  5147.4  5152.3  5189.4  5204.7
## 1980  5221.3  5115.9  5107.4  5202.1
## 1981  5307.5  5266.1  5329.8  5263.4
## 1982  5177.1  5204.9  5185.2  5189.8
## 1983  5253.8  5372.3  5478.4  5590.5
## 1984  5699.8  5797.9  5854.3  5902.4
## 1985  5956.9  6007.8  6101.7  6148.6
## 1986  6207.4  6232.0  6291.7  6323.4
## 1987  6365.0  6435.0  6493.4  6606.8
## 1988  6639.1  6723.5  6759.4  6848.6
## 1989  6918.1  6963.5  7013.1  7030.9
## 1990  7112.1  7130.3  7130.8  7076.9
## 1991  7040.8  7086.5  7120.7  7154.1
## 1992  7228.2  7297.9  7369.5  7450.7
## 1993  7459.7  7497.5  7536.0  7637.4
## 1994  7715.1  7815.7  7859.5  7951.6
## 1995  7973.7  7988.0  8053.1  8112.0
## 1996  8169.2  8303.1  8372.7  8470.6
## 1997  8536.1  8665.8  8773.7  8838.4
## 1998  8936.2  8995.3  9098.9  9237.1
## 1999  9315.5  9392.6  9502.2  9671.1
## 2000  9695.6  9847.9  9836.6  9887.7
## 2001  9875.6  9905.9  9871.1  9910.0
## 2002  9977.3 10031.6 10090.7 10095.8
## 2003 10138.6 10230.4 10410.9 10502.6
## 2004 10612.5 10704.1 10808.9 10897.1
## 2005 10999.3

test <- window(usgdp, start = c(2005,2), end = 2006)
test

##         Qtr1    Qtr2    Qtr3    Qtr4
## 2005         11089.2 11202.3 11248.3
## 2006 11403.6

Question 2 Plot the training dataset. Is the Box-Cox transformation necessary for this data?

#ts.plot(train, type = 'l')
autoplot(train)

train %>% BoxCox(lambda = 0) %>% autoplot()

train %>% BoxCox(lambda = .1) %>% autoplot()

train %>% BoxCox(lambda = .2) %>% autoplot()

train %>% BoxCox(lambda = .3) %>% autoplot()

train %>% BoxCox(lambda = .5) %>% autoplot()

train %>% BoxCox(lambda = .7) %>% autoplot()

train %>% BoxCox(lambda = .9) %>% autoplot()

train %>% BoxCox(lambda = 1) %>% autoplot()

lambda <- BoxCox.lambda(train)
lambda

## [1] 0.3689656

the data does NOT show a strong need to have a box-cox transformation applied to it because the variation looks to be fairly similar no matter the level of the series

###Question 3: Plot the first and 2nd order difference of the data. Apply KPSS Test for Stationarity to determine which difference order results in a stationary dataset

cbind("1st order difference" =
        diff(train),
      "2nd order difference" =
        diff(diff(train))) %>%
  autoplot(facets=TRUE) +
  ggtitle("1st and 2nd Order Difference of USGDP data")

kpss.test(diff(train, differences = 1))

## Warning in kpss.test(diff(train, differences = 1)): p-value smaller than printed
## p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(train, differences = 1)
## KPSS Level = 1.6348, Truncation lag parameter = 4, p-value = 0.01

DIfference order 2 results in stationary ### Question 4: Fit a suitable ARIMA model to the training dataset using the auto.arima() function. Transform data first if needed. Report the resulting p,d,q and coefficients values

fit <- auto.arima(train)
summary(fit)

## Series: train 
## ARIMA(2,2,2) 
## 
## Coefficients:
##           ar1     ar2      ma1      ma2
##       -0.1138  0.3059  -0.5829  -0.3710
## s.e.   0.2849  0.0895   0.2971   0.2844
## 
## sigma^2 estimated as 1591:  log likelihood=-1178.16
## AIC=2366.32   AICc=2366.59   BIC=2383.53
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE     MASE
## Training set 4.304891 39.36568 29.47422 0.09278553 0.695896 0.170048
##                     ACF1
## Training set -0.01735678

p = 2 d = 2 q = 2

fit.train <- Arima(train,order = c(2,2,2))

Question 5: Compute the sample Extended ACF (EACF) and use the Arima() function to try some other plausible models by experimenting with the orders chosen. Limit your models to 𝑞, 𝑝 ≤ 2 and 𝑑 ≤ 2. Use the model summary() function to compare the Corrected Akaike information criterion (i.e., AICc) values (Note: Smaller values indicated better models).

eacf(train)

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x  x  x  x 
## 1 x x o o o o o x o o o  x  x  x 
## 2 x o x o o o o x x o o  x  o  o 
## 3 x x o o o o o x x o o  x  o  o 
## 4 x x x o o o o o o o o  x  o  o 
## 5 x x o o o o o o o o o  x  o  o 
## 6 x x o o o o o o o o o  x  o  o 
## 7 x x x o o o o o o o o  x  o  o

fit.1 <- train %>% Arima(order = c(1,1,1))
summary(fit.1)

## Series: . 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.9538  -0.6169
## s.e.  0.0289   0.0897
## 
## sigma^2 estimated as 1720:  log likelihood=-1192.99
## AIC=2391.97   AICc=2392.08   BIC=2402.31
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE      MAPE      MASE
## Training set 5.848178 41.21099 31.30102 0.1270063 0.7438238 0.1805875
##                     ACF1
## Training set 0.006590455

fit.2 <- train %>% Arima(order = c(1,2,1))
summary(fit.2)

## Series: . 
## ARIMA(1,2,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.3026  -0.9597
## s.e.  0.0662   0.0180
## 
## sigma^2 estimated as 1645:  log likelihood=-1183.09
## AIC=2372.18   AICc=2372.29   BIC=2382.51
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 4.401013 40.21124 29.88758 0.09440549 0.6983004 0.1724328
##                     ACF1
## Training set -0.07232273

fit.3 <- train %>% Arima(order = c(2,2,2))
summary(fit.3)

## Series: . 
## ARIMA(2,2,2) 
## 
## Coefficients:
##           ar1     ar2      ma1      ma2
##       -0.1138  0.3059  -0.5829  -0.3710
## s.e.   0.2849  0.0895   0.2971   0.2844
## 
## sigma^2 estimated as 1591:  log likelihood=-1178.16
## AIC=2366.32   AICc=2366.59   BIC=2383.53
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE     MASE
## Training set 4.304891 39.36568 29.47422 0.09278553 0.695896 0.170048
##                     ACF1
## Training set -0.01735678

Smallest AICc is when p = 2, d = 2, q = 2

Question 6: Use the model chosen in Question 4 to forecast and plot the GDP forecasts with 80 and 95 % confidence levels for 2005Q2 - 2006Q1 (Test Period)

predict <- forecast((fit.train), h = 4)
predict

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       11079.04 11027.93 11130.15 11000.87 11157.21
## 2005 Q3       11157.71 11073.75 11241.68 11029.31 11286.12
## 2005 Q4       11229.64 11111.69 11347.59 11049.26 11410.03
## 2006 Q1       11302.01 11154.80 11449.23 11076.87 11527.15

#win.graph(width=12, height=6,pointsize=12)
predict %>% autoplot()

Question 7: Compare your forecasts with the actual values using error = actual - estimate and plot the errors. (Note: Use the forecast $mean element for the forecast estimate)

# Express the forecast in the original scale
estimate <- InvBoxCox((predict$mean), lambda = 0.3689656)
estimate

##            Qtr1       Qtr2       Qtr3       Qtr4
## 2005            6143707098 6262644880 6372639482
## 2006 6484535521

checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,2)
## Q* = 9.8604, df = 4, p-value = 0.04285
## 
## Model df: 4.   Total lags used: 8

error<- test - estimate
error

##             Qtr1        Qtr2        Qtr3        Qtr4
## 2005             -6143696009 -6262633678 -6372628233
## 2006 -6484524117

# Use autoplot function to plot the error 
autoplot(error)

### Calculate the sum of squared error.

SSE <- sum(error^2)
SSE

## [1] 1.59625e+20

03_TS_Assignment

S_Pattanaik

7/14/2020

Question 1 Load the usgdp.rda dataset and split it into a training dataset (1947Q1 - 2005Q1) and a test dataset (2005Q2 - 2006Q1)

Question 2 Plot the training dataset. Is the Box-Cox transformation necessary for this data?

Question 6: Use the model chosen in Question 4 to forecast and plot the GDP forecasts with 80 and 95 % confidence levels for 2005Q2 - 2006Q1 (Test Period)

Question 7: Compare your forecasts with the actual values using error = actual - estimate and plot the errors. (Note: Use the forecast $mean element for the forecast estimate)