#install.packages("locfit")
#install.packages("C:/Users/pattasx2/Documents/Personal/Personal -2020/Subha/R", repos = NULL, type="source")
#packageurl <- "https://cran.r-project.org/src/contrib/Archive/TSA/TSA_1.2.tar.gz"
#install.packages(packageurl, repos=NULL, type="source")
usgdp
## Qtr1 Qtr2 Qtr3 Qtr4
## 1947 1570.5 1568.7 1568.0 1590.9
## 1948 1616.1 1644.6 1654.1 1658.0
## 1949 1633.2 1628.4 1646.7 1629.9
## 1950 1696.8 1747.3 1815.8 1848.9
## 1951 1871.3 1903.1 1941.1 1944.4
## 1952 1964.7 1966.0 1978.8 2043.8
## 1953 2082.3 2098.1 2085.4 2052.5
## 1954 2042.4 2044.3 2066.9 2107.8
## 1955 2168.5 2204.0 2233.4 2245.3
## 1956 2234.8 2252.5 2249.8 2286.5
## 1957 2300.3 2294.6 2317.0 2292.5
## 1958 2230.2 2243.4 2295.2 2348.0
## 1959 2392.9 2455.8 2453.9 2462.6
## 1960 2517.4 2504.8 2508.7 2476.2
## 1961 2491.2 2538.0 2579.1 2631.8
## 1962 2679.1 2708.4 2733.3 2740.0
## 1963 2775.9 2810.6 2863.5 2885.8
## 1964 2950.5 2984.8 3025.5 3033.6
## 1965 3108.2 3150.2 3214.1 3291.8
## 1966 3372.3 3384.0 3406.3 3433.7
## 1967 3464.1 3464.3 3491.8 3518.2
## 1968 3590.7 3651.6 3676.5 3692.0
## 1969 3750.2 3760.9 3784.2 3766.3
## 1970 3760.0 3767.1 3800.5 3759.8
## 1971 3864.1 3885.9 3916.7 3927.9
## 1972 3997.7 4092.1 4131.1 4198.7
## 1973 4305.3 4355.1 4331.9 4373.3
## 1974 4335.4 4347.9 4305.8 4288.9
## 1975 4237.6 4268.6 4340.9 4397.8
## 1976 4496.8 4530.3 4552.0 4584.6
## 1977 4640.0 4731.1 4815.8 4815.3
## 1978 4830.8 5021.2 5070.7 5137.4
## 1979 5147.4 5152.3 5189.4 5204.7
## 1980 5221.3 5115.9 5107.4 5202.1
## 1981 5307.5 5266.1 5329.8 5263.4
## 1982 5177.1 5204.9 5185.2 5189.8
## 1983 5253.8 5372.3 5478.4 5590.5
## 1984 5699.8 5797.9 5854.3 5902.4
## 1985 5956.9 6007.8 6101.7 6148.6
## 1986 6207.4 6232.0 6291.7 6323.4
## 1987 6365.0 6435.0 6493.4 6606.8
## 1988 6639.1 6723.5 6759.4 6848.6
## 1989 6918.1 6963.5 7013.1 7030.9
## 1990 7112.1 7130.3 7130.8 7076.9
## 1991 7040.8 7086.5 7120.7 7154.1
## 1992 7228.2 7297.9 7369.5 7450.7
## 1993 7459.7 7497.5 7536.0 7637.4
## 1994 7715.1 7815.7 7859.5 7951.6
## 1995 7973.7 7988.0 8053.1 8112.0
## 1996 8169.2 8303.1 8372.7 8470.6
## 1997 8536.1 8665.8 8773.7 8838.4
## 1998 8936.2 8995.3 9098.9 9237.1
## 1999 9315.5 9392.6 9502.2 9671.1
## 2000 9695.6 9847.9 9836.6 9887.7
## 2001 9875.6 9905.9 9871.1 9910.0
## 2002 9977.3 10031.6 10090.7 10095.8
## 2003 10138.6 10230.4 10410.9 10502.6
## 2004 10612.5 10704.1 10808.9 10897.1
## 2005 10999.3 11089.2 11202.3 11248.3
## 2006 11403.6
train <- window(usgdp, start=1947, end = 2005)
train
## Qtr1 Qtr2 Qtr3 Qtr4
## 1947 1570.5 1568.7 1568.0 1590.9
## 1948 1616.1 1644.6 1654.1 1658.0
## 1949 1633.2 1628.4 1646.7 1629.9
## 1950 1696.8 1747.3 1815.8 1848.9
## 1951 1871.3 1903.1 1941.1 1944.4
## 1952 1964.7 1966.0 1978.8 2043.8
## 1953 2082.3 2098.1 2085.4 2052.5
## 1954 2042.4 2044.3 2066.9 2107.8
## 1955 2168.5 2204.0 2233.4 2245.3
## 1956 2234.8 2252.5 2249.8 2286.5
## 1957 2300.3 2294.6 2317.0 2292.5
## 1958 2230.2 2243.4 2295.2 2348.0
## 1959 2392.9 2455.8 2453.9 2462.6
## 1960 2517.4 2504.8 2508.7 2476.2
## 1961 2491.2 2538.0 2579.1 2631.8
## 1962 2679.1 2708.4 2733.3 2740.0
## 1963 2775.9 2810.6 2863.5 2885.8
## 1964 2950.5 2984.8 3025.5 3033.6
## 1965 3108.2 3150.2 3214.1 3291.8
## 1966 3372.3 3384.0 3406.3 3433.7
## 1967 3464.1 3464.3 3491.8 3518.2
## 1968 3590.7 3651.6 3676.5 3692.0
## 1969 3750.2 3760.9 3784.2 3766.3
## 1970 3760.0 3767.1 3800.5 3759.8
## 1971 3864.1 3885.9 3916.7 3927.9
## 1972 3997.7 4092.1 4131.1 4198.7
## 1973 4305.3 4355.1 4331.9 4373.3
## 1974 4335.4 4347.9 4305.8 4288.9
## 1975 4237.6 4268.6 4340.9 4397.8
## 1976 4496.8 4530.3 4552.0 4584.6
## 1977 4640.0 4731.1 4815.8 4815.3
## 1978 4830.8 5021.2 5070.7 5137.4
## 1979 5147.4 5152.3 5189.4 5204.7
## 1980 5221.3 5115.9 5107.4 5202.1
## 1981 5307.5 5266.1 5329.8 5263.4
## 1982 5177.1 5204.9 5185.2 5189.8
## 1983 5253.8 5372.3 5478.4 5590.5
## 1984 5699.8 5797.9 5854.3 5902.4
## 1985 5956.9 6007.8 6101.7 6148.6
## 1986 6207.4 6232.0 6291.7 6323.4
## 1987 6365.0 6435.0 6493.4 6606.8
## 1988 6639.1 6723.5 6759.4 6848.6
## 1989 6918.1 6963.5 7013.1 7030.9
## 1990 7112.1 7130.3 7130.8 7076.9
## 1991 7040.8 7086.5 7120.7 7154.1
## 1992 7228.2 7297.9 7369.5 7450.7
## 1993 7459.7 7497.5 7536.0 7637.4
## 1994 7715.1 7815.7 7859.5 7951.6
## 1995 7973.7 7988.0 8053.1 8112.0
## 1996 8169.2 8303.1 8372.7 8470.6
## 1997 8536.1 8665.8 8773.7 8838.4
## 1998 8936.2 8995.3 9098.9 9237.1
## 1999 9315.5 9392.6 9502.2 9671.1
## 2000 9695.6 9847.9 9836.6 9887.7
## 2001 9875.6 9905.9 9871.1 9910.0
## 2002 9977.3 10031.6 10090.7 10095.8
## 2003 10138.6 10230.4 10410.9 10502.6
## 2004 10612.5 10704.1 10808.9 10897.1
## 2005 10999.3
test <- window(usgdp, start = c(2005,2), end = 2006)
test
## Qtr1 Qtr2 Qtr3 Qtr4
## 2005 11089.2 11202.3 11248.3
## 2006 11403.6
#ts.plot(train, type = 'l')
autoplot(train)
train %>% BoxCox(lambda = 0) %>% autoplot()
train %>% BoxCox(lambda = .1) %>% autoplot()
train %>% BoxCox(lambda = .2) %>% autoplot()
train %>% BoxCox(lambda = .3) %>% autoplot()
train %>% BoxCox(lambda = .5) %>% autoplot()
train %>% BoxCox(lambda = .7) %>% autoplot()
train %>% BoxCox(lambda = .9) %>% autoplot()
train %>% BoxCox(lambda = 1) %>% autoplot()
lambda <- BoxCox.lambda(train)
lambda
## [1] 0.3689656
the data does NOT show a strong need to have a box-cox transformation applied to it because the variation looks to be fairly similar no matter the level of the series
###Question 3: Plot the first and 2nd order difference of the data. Apply KPSS Test for Stationarity to determine which difference order results in a stationary dataset
cbind("1st order difference" =
diff(train),
"2nd order difference" =
diff(diff(train))) %>%
autoplot(facets=TRUE) +
ggtitle("1st and 2nd Order Difference of USGDP data")
kpss.test(diff(train, differences = 1))
## Warning in kpss.test(diff(train, differences = 1)): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(train, differences = 1)
## KPSS Level = 1.6348, Truncation lag parameter = 4, p-value = 0.01
DIfference order 2 results in stationary ### Question 4: Fit a suitable ARIMA model to the training dataset using the auto.arima() function. Transform data first if needed. Report the resulting p,d,q and coefficients values
fit <- auto.arima(train)
summary(fit)
## Series: train
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.1138 0.3059 -0.5829 -0.3710
## s.e. 0.2849 0.0895 0.2971 0.2844
##
## sigma^2 estimated as 1591: log likelihood=-1178.16
## AIC=2366.32 AICc=2366.59 BIC=2383.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.304891 39.36568 29.47422 0.09278553 0.695896 0.170048
## ACF1
## Training set -0.01735678
p = 2 d = 2 q = 2
fit.train <- Arima(train,order = c(2,2,2))
eacf(train)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x x x x
## 1 x x o o o o o x o o o x x x
## 2 x o x o o o o x x o o x o o
## 3 x x o o o o o x x o o x o o
## 4 x x x o o o o o o o o x o o
## 5 x x o o o o o o o o o x o o
## 6 x x o o o o o o o o o x o o
## 7 x x x o o o o o o o o x o o
fit.1 <- train %>% Arima(order = c(1,1,1))
summary(fit.1)
## Series: .
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.9538 -0.6169
## s.e. 0.0289 0.0897
##
## sigma^2 estimated as 1720: log likelihood=-1192.99
## AIC=2391.97 AICc=2392.08 BIC=2402.31
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.848178 41.21099 31.30102 0.1270063 0.7438238 0.1805875
## ACF1
## Training set 0.006590455
fit.2 <- train %>% Arima(order = c(1,2,1))
summary(fit.2)
## Series: .
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## 0.3026 -0.9597
## s.e. 0.0662 0.0180
##
## sigma^2 estimated as 1645: log likelihood=-1183.09
## AIC=2372.18 AICc=2372.29 BIC=2382.51
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.401013 40.21124 29.88758 0.09440549 0.6983004 0.1724328
## ACF1
## Training set -0.07232273
fit.3 <- train %>% Arima(order = c(2,2,2))
summary(fit.3)
## Series: .
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.1138 0.3059 -0.5829 -0.3710
## s.e. 0.2849 0.0895 0.2971 0.2844
##
## sigma^2 estimated as 1591: log likelihood=-1178.16
## AIC=2366.32 AICc=2366.59 BIC=2383.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.304891 39.36568 29.47422 0.09278553 0.695896 0.170048
## ACF1
## Training set -0.01735678
Smallest AICc is when p = 2, d = 2, q = 2
predict <- forecast((fit.train), h = 4)
predict
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 11079.04 11027.93 11130.15 11000.87 11157.21
## 2005 Q3 11157.71 11073.75 11241.68 11029.31 11286.12
## 2005 Q4 11229.64 11111.69 11347.59 11049.26 11410.03
## 2006 Q1 11302.01 11154.80 11449.23 11076.87 11527.15
#win.graph(width=12, height=6,pointsize=12)
predict %>% autoplot()
# Express the forecast in the original scale
estimate <- InvBoxCox((predict$mean), lambda = 0.3689656)
estimate
## Qtr1 Qtr2 Qtr3 Qtr4
## 2005 6143707098 6262644880 6372639482
## 2006 6484535521
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,2)
## Q* = 9.8604, df = 4, p-value = 0.04285
##
## Model df: 4. Total lags used: 8
error<- test - estimate
error
## Qtr1 Qtr2 Qtr3 Qtr4
## 2005 -6143696009 -6262633678 -6372628233
## 2006 -6484524117
# Use autoplot function to plot the error
autoplot(error)
### Calculate the sum of squared error.
SSE <- sum(error^2)
SSE
## [1] 1.59625e+20