#1 Check you working directory

getwd()
## [1] "/Users/zosiajiang/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565"

#2 Set your working directory to “ANLY 580/RScript”. Upload “nlme” library

setwd("~/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565")
# install.packages('nlme')
library(nlme)

#3 Download “trade.xls” data file and set the “date” variable to the date format and the “trade” variable to the numeric format. The “trade” variable represents the Ratio of Exports to Imports for China expressed in percentages.

library(readxl)
trade1 <- read_xls('trade.xls')
date <- as.Date(trade1$date)
trade <- as.numeric(trade1$trade)

#4 Create two stand alone varibales: “datev” and “tradev”. “datev” variable should represent values of the “date” variable from the “trade” data set, while, “tradev” variable should represent values of the “trade” variable from the “trade” data set.

datev <- trade1$date
tradev <- trade1$trade

#5 Use the “datev” variable and the range() function to check the time sample covered by the “trade” data set. What time period is covered? What is the frequency of the data?

Answer: it covers period from 1992-01-01 to 2019-04-01.

range(datev)
## [1] "1992-01-01 UTC" "2019-04-01 UTC"
head(datev) # The frequency is monthly.
## [1] "1992-01-01 UTC" "1992-02-01 UTC" "1992-03-01 UTC" "1992-04-01 UTC"
## [5] "1992-05-01 UTC" "1992-06-01 UTC"

#6 Transform “tradev” variable from numeric format to the time series format by using ts() function. Lable the new varibale as “tradets”.

tradets <- ts(tradev, start = c(1992,1), end = c(2019,4), freq=12)

#7 Plot the time series graph of the “tradets” variable. Please lable all axis correcly, and make sure to lable the graph. Based on this graph does the Ratio of Exports to Imports for China exhibit a trend? What about a regular seasonal fluctuation?

Answer: Yes, the graph exhibits a trend and a regular seasonal fluctuation, which can be observed using decompse function.

plot(tradets, xlab = "Year", ylab = "Trade / ratio (%)", main = "Trade Trend")

plot(decompose(tradets))

#8 Use “tradets” variable and window() function to create 2 new variables called “tradepre”, “tradepost”. The “tradepre” should include all observations for the period up until December 2018.(Last observation should be December 2018). The “tradepost” should include all observations starting from January 2019 and up until the last month in the dataset.

tradepre <- window(tradets,start = c(1992,1), end = c(2018,12), freq=12)
tradepost <- window(tradets, start = c(2019,1), end = c(2019,4), freq=12)

#9 Estimate autocorrelation function and partial autocorrelation function for the “tradepre” variable. Does the trade ratio for China exhibit autocorrelation? What process can explain this time series (white noise, random walk, AR, etc..)?

Answer: Based on the graph, the trade ratio for China exhibits a significant autocorrelation. The underlying process is AR.

acf(tradepre)

pacf(tradepre)

acf(diff(tradepre))

#10 Estimate AR(q) model for the “tradepre” time series. Use ar() function (set aic=FALSE) and rely on the corellologram to determine q, the order of the model. Moreover, use maximum liklehood method. After that, set aic=TRUE and estimate ar() again to see if you have identified the order correctly. Save the estimates as “trade.ar”.

Answer: If we set aic differently in the ar() function, the order is different. If we set aic equals to FALSE, the order number is 12. If TRUE, 3. I will choose the model with 3 orders.

tradepre.ar1 <- ar(tradepre, method = "mle", aic=FALSE)
tradepre.ar1$order # 12
## [1] 12
summary(tradepre.ar1)
##              Length Class  Mode     
## order          1    -none- numeric  
## ar            12    -none- numeric  
## var.pred       1    -none- numeric  
## x.mean         1    -none- numeric  
## aic            1    -none- numeric  
## n.used         1    -none- numeric  
## n.obs          1    -none- numeric  
## order.max      1    -none- numeric  
## partialacf     0    -none- NULL     
## resid        324    ts     numeric  
## method         1    -none- character
## series         1    -none- character
## frequency      1    -none- numeric  
## call           4    -none- call     
## asy.var.coef 144    -none- numeric
trade.ar <-ar(tradepre, method = "mle", aic=TRUE)
trade.ar$order # 3
## [1] 3
summary(trade.ar)
##              Length Class  Mode     
## order          1    -none- numeric  
## ar             3    -none- numeric  
## var.pred       1    -none- numeric  
## x.mean         1    -none- numeric  
## aic           13    -none- numeric  
## n.used         1    -none- numeric  
## n.obs          1    -none- numeric  
## order.max      1    -none- numeric  
## partialacf     0    -none- NULL     
## resid        324    ts     numeric  
## method         1    -none- character
## series         1    -none- character
## frequency      1    -none- numeric  
## call           4    -none- call     
## asy.var.coef   9    -none- numeric
acf(trade.ar$res[-(1:trade.ar$order)])

#11 For each of the AR coeficients estimate 95% confidence interval. To find 95% confidence intervals you need to add and subtract 2 standard deviations of the coefficient estimates. Hint you can obtain these standard deviations by applying sqrt() function to the diagonal elements of the asymptotic-theory variance matrix of the coefficient estimates.

trade.ar$ar
## [1] 0.3159134 0.3970605 0.1479268
trade.ar$ar[1] + c(-2, 2) * sqrt(trade.ar$asy.var[1,1])
## [1] 0.2061866 0.4256402
trade.ar$ar[2] + c(-2, 2) * sqrt(trade.ar$asy.var[2,2])
## [1] 0.2904095 0.5037114
trade.ar$ar[3] + c(-2, 2) * sqrt(trade.ar$asy.var[3,3])
## [1] 0.0382000 0.2576536

#12 Extract the residuals from the trade.ar model and estimate the autocorrelation function. Based on this correlogram would you say trade.ar model does a good job of explaining the trade ratio in China?

Answer: Yes, the model does a good job of explaining the trade ratio in China.

acf(trade.ar$res[-(1:trade.ar$order)])

#13 Use trade.ar model and predict() function to creat a 4 period ahead forecast of the trade ratio in China. Save these predicted values as “trade.ar.forc”.

trade.ar.forc <- predict(trade.ar, n.ahead=4)
trade.ar.forc
## $pred
##           Jan      Feb      Mar      Apr
## 2019 119.7743 120.6302 120.0276 119.6924
## 
## $se
##            Jan       Feb       Mar       Apr
## 2019  8.138049  8.534487  9.443900 10.072314

#14 Use ts.plot() function to plot side-by-side actual values of the trade ratio from January 2019-April 2019 period and their forecasted counterparts. (tradepost and trade.ar.forc) Please designate red color to represent the actual observed values, and blue doted lines to represent forecasted values. How does the ability to predict future trade ratio depends on the time horizon of the forecast?

ts.plot(tradepost, trade.ar.forc$pred, lty = c(1,10), col=c("red","blue"))

#15 Please calculate forecast’s mean absolute percentage error for the trade.ar.forc forecasting model. Why is it important to calculate mean absolute percentage error rather than mean percentage error?

Answer: MAPE is the ratio of the residual over the actual. It’s more robust to the effect of outliers and easier for people to conceptualize. MPE indicates to us that it actually systematically underestimates the sales. If the positive and negative errors cancel out, it will be less accuracy.

mean(abs((tradepost - trade.ar.forc$pred)/tradepost * 100))
## [1] 5.596669

#16 Use time() function and tradepre variable to create a variable called “Time”.

Time <- time(tradepre)

#17 Estimate linear regression model by regressing “Time” on “tradepre” variable. Save this regression model as “trade.lmt”. By using confint() function calculate 95% confidence intervals for the estimated model coeficients. What can you conclude based on the estimates of the model coeficients? What is the direction of the time trend?

Answer: Zero is not contained in the intervals, which implies that the estimates are likely significant. However, The residual series is autocorrelated at shorter lags, leading to an too narrow a confident interval for the slope. The direction of the time trend is positive based on the positive coefficient of Time.

trade.lmt <- lm(tradepre ~ Time)
coef(trade.lmt)
##   (Intercept)          Time 
## -1113.2056232     0.6130132
confint(trade.lmt)
##                     2.5 %      97.5 %
## (Intercept) -1457.3906131 -769.020633
## Time            0.4413904    0.784636
summary(trade.lmt)
## 
## Call:
## lm(formula = tradepre ~ Time)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.906  -7.849  -2.364   6.662  57.431 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.113e+03  1.749e+02  -6.363 6.79e-10 ***
## Time         6.130e-01  8.724e-02   7.027 1.26e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.24 on 322 degrees of freedom
## Multiple R-squared:  0.133,  Adjusted R-squared:  0.1303 
## F-statistic: 49.38 on 1 and 322 DF,  p-value: 1.263e-11
acf(resid(lm(tradepre ~ Time))) #The residual series is autocorrelated at shorter lags, leading to an too narrow a confident interval for the slope.

#18 By visually inspecting a time series plot of the “tradepre” variable, and given the seasonal nature of the trade relationships it is reasonable to assume that there are regular seasonal fluctuations in the trade ratio for China. Use “tradepre” variable and cycle() function to create a factor variable titled “Seas”.

plot.ts(tradepre)

Seas <- factor(cycle(tradepre))
str(Seas)
##  Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...

#19 Use lm() function to estimate linear regression model by regressing “Time” and “Seas” on “tradepre”. Save this regression model as “trade.lmts”. Set the value of the intercept to 0, in order to interpret the coeficients of the seasonal dummy variables as seasonal intercepts. (Setting intercept to 0 ensures that for each season there is a unique intercept) # What can you conclude based on the estimates of the model coeficients? # What is the direction of the time trend? Is there a seasonal component? # During which month should you expect the trade ratio to be the largest?

Answer: Zero is not contained in the intervals, which implies that the estimates are likely significant. Time is positively correlated and all 12 seasons are negative. December expects to have the largest trade ratio.

trade.lmts <- lm(tradepre ~ 0 + Time + Seas)
coef(trade.lmts)
##          Time         Seas1         Seas2         Seas3         Seas4 
##     0.6148695 -1114.7628735 -1114.3352172 -1117.9669538 -1117.7854925 
##         Seas5         Seas6         Seas7         Seas8         Seas9 
## -1117.6851004 -1117.6614236 -1117.6201180 -1117.4878374 -1117.5408019 
##        Seas10        Seas11        Seas12 
## -1117.5753960 -1115.7155582 -1117.0039878
summary(trade.lmts)
## 
## Call:
## lm(formula = tradepre ~ 0 + Time + Seas)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.050  -7.488  -2.213   6.182  55.259 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## Time    6.149e-01  8.839e-02   6.956 2.07e-11 ***
## Seas1  -1.115e+03  1.772e+02  -6.290 1.08e-09 ***
## Seas2  -1.114e+03  1.772e+02  -6.287 1.09e-09 ***
## Seas3  -1.118e+03  1.773e+02  -6.307 9.75e-10 ***
## Seas4  -1.118e+03  1.773e+02  -6.306 9.82e-10 ***
## Seas5  -1.118e+03  1.773e+02  -6.305 9.87e-10 ***
## Seas6  -1.118e+03  1.773e+02  -6.305 9.89e-10 ***
## Seas7  -1.118e+03  1.773e+02  -6.304 9.92e-10 ***
## Seas8  -1.117e+03  1.773e+02  -6.303 9.98e-10 ***
## Seas9  -1.118e+03  1.773e+02  -6.303 9.98e-10 ***
## Seas10 -1.118e+03  1.773e+02  -6.303 9.98e-10 ***
## Seas11 -1.116e+03  1.773e+02  -6.292 1.06e-09 ***
## Seas12 -1.117e+03  1.773e+02  -6.299 1.02e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.39 on 311 degrees of freedom
## Multiple R-squared:  0.9892, Adjusted R-squared:  0.9888 
## F-statistic:  2194 on 13 and 311 DF,  p-value: < 2.2e-16
confint(trade.lmts)
##                2.5 %       97.5 %
## Time       0.4409486    0.7887903
## Seas1  -1463.5057235 -766.0200235
## Seas2  -1463.0925593 -765.5778751
## Seas3  -1466.7387880 -769.1951196
## Seas4  -1466.5718188 -768.9991663
## Seas5  -1466.4859188 -768.8842821
## Seas6  -1466.4767341 -768.8461132
## Seas7  -1466.4499205 -768.7903155
## Seas8  -1466.3321321 -768.6435428
## Seas9  -1466.3995886 -768.6820152
## Seas10 -1466.4486748 -768.7021172
## Seas11 -1464.6033291 -766.8277873
## Seas12 -1465.9062508 -768.1017248

#20 Extract the residual series from the “trade.lmts” model and save them as “trade.lmts.resid”. Then, estimate autocorrelation function to check the goodness of the fit. What is the value of autocorrelation at lag 1? What can you conclude based on the correlogram of the residual series?

Answer: The residual series are positively correlated following AR(3) process.

trade.lmts.resid <- trade.lmts$residuals
acf(trade.lmts.resid)

pacf(trade.lmts.resid)

acf(trade.lmts.resid)[1]

## 
## Autocorrelations of series 'trade.lmts.resid', by lag
## 
##     1 
## 0.664

#21 Fit linear model by regressing “Time” and “Seas” on “tradepre” by uzing generalized least squares (gls() fucntion). Set the value of the intercept to 0, in order to interpret the coeficients of the seasonal dummy variables as seasonal intercepts. Save this model’s estimates as “trade.gls”.

trade.gls <- gls(tradepre ~ 0 + Time + Seas)
coef(trade.gls)
##          Time         Seas1         Seas2         Seas3         Seas4 
##     0.6148695 -1114.7628735 -1114.3352172 -1117.9669538 -1117.7854925 
##         Seas5         Seas6         Seas7         Seas8         Seas9 
## -1117.6851004 -1117.6614236 -1117.6201180 -1117.4878374 -1117.5408019 
##        Seas10        Seas11        Seas12 
## -1117.5753960 -1115.7155582 -1117.0039878

#22 Compute Akaike’s An Information Criterion for “trade.lmts” and “trade.gls”. # Which model performs better?

Answer: GLS model performs better with smaller AIC.

AIC(trade.lmts)
## [1] 2565.278
AIC(trade.gls) # smaller AIC
## [1] 2525.645

#23 Create the following new variables: # “new.Time”- sequence of 4 values starting from 2019 and each number going up by 1/12 # “alpha” - assumes value of the Time coeficient from the trade.gls model # “beta” - takes on values of the first, second, third, and fourth seasonal coeficients from the trade.gls model.

new.Time <- seq(2019, len = 4, by = 1/12)
alpha <- coef(trade.gls)[1]
beta <- coef(trade.gls)[2:5]

#24 By using the forecasting equation of x_(t+1)<-0+alpha*Time_(t+1)+beta create a 4 period ahead forecast of the trade ratio for China. Lable this forecast as “trade.gls.forc”

trade.gls.forc <- (0 + alpha * new.Time + beta)[1:4]
trade.gls.forc
##    Seas1    Seas2    Seas3    Seas4 
## 126.6586 127.1375 123.5570 123.7897

#25 Use ts.plot() function to plot side-by-side actual values of the trade ratio from January 2019-April 2019 period and their forecasted counterparts. (tradepost and trade.gls.forecast) Please designate red color to represent the actual observed values and blue doted lines to represent forecasted values.

ts.plot(tradepost, trade.gls.forc, lty = c(1,10), col=c("red","blue"))

#26 Please calculate forecast mean absolute percentage errorfor the “trade.gls.forc” forecasting model. Based on the forecast’s mean absolute percentage error, which of the two models, “trade.ar.forc” and trade.gls.forc" performs better?

Answer: trade.ar.forc model has smaller MAPE so it performs better based on this criteria.

mean(abs((tradepost - trade.gls.forc)/tradepost * 100))
## [1] 7.342898

#27 Create a variable called tradepreL, that represents the first lagged value of the “tradepre” variable. For example tradepreL_t=tradepre_(t-1). Moreover, transform “tradepreL” variable into a time series object by using ts(). It should cover the same time period as “tradepre”.

tradepreL <- tradepre
for (t in c(2:length(tradepre))) {
  tradepreL[t] <- tradepre[t-1]
}
tradepreL <- ts(tradepreL)

#28 Use lm() function to estimate linear regression model by regressing “tradepreL”, “Time” and “Seas” on “tradepre”. Set the value of the intercept to 0, in order to interpret the coeficients of the seasonal dummy variables as seasonal intercepts. Save this regression model as “trade.ar.lmts”.

trade.ar.lmts <- lm(tradepre ~ 0 + tradepreL + Time + Seas)
coef(trade.ar.lmts)
##    tradepreL         Time        Seas1        Seas2        Seas3        Seas4 
##    0.6674259    0.2139914 -388.8619941 -389.3312437 -393.2492009 -390.6446165 
##        Seas5        Seas6        Seas7        Seas8        Seas9       Seas10 
## -390.6661282 -390.7102475 -390.6855362 -390.5816159 -390.7236596 -390.7236957 
##       Seas11       Seas12 
## -388.8415607 -391.3720860

#29 By using new.Time variable, and the following forecasting equation x_(t+1)<-0+alpha1x_t+alpha2Time_(t+1)+beta create the following new variables: # “alpha1” - assumes value of the tradepreL coefiencient from the trade.ar.lmts model # “alpha2” - assumes value of the Time coeficient from the trade.ar.lmts model # “beta1” - takes on values of the first seasonal coeficient from the trade.ar.lmts. # “beta2” - takes on values of the second seasonal coeficient from the trade.ar.lmts. # “beta3” - takes on values of the third seasonal coeficient from the trade.ar.lmts. # “beta4” - takes on values of the fourth seasonal coeficient from the trade.ar.lmts. # “forc20191” - takes on the forecasted value of the trade ratio for January 2019 # “forc20192” - takes on the forecasted value of the trade ratio for February 2019 # “forc20193” - takes on the forecasted value of the trade ratio for March 2019 # “forc20194” - takes on the forecasted value of the trade ratio for April 2019 # “trade.ar.lmts.forc” a vector of four predicted trade ratios.

alpha1 <- coef(trade.ar.lmts)[1]
alpha2 <- coef(trade.ar.lmts)[2]
beta1 <- coef(trade.ar.lmts)[3]
beta2 <- coef(trade.ar.lmts)[4]
beta3 <- coef(trade.ar.lmts)[5]
beta4 <- coef(trade.ar.lmts)[5]
forc20191 <- 0 + alpha1*tradepre[length(tradepre)] + alpha2*new.Time[1] + beta1
forc20192 <- 0 +alpha1*forc20191 + alpha2*new.Time[2] +beta2
forc20193 <- 0 +alpha1*forc20192 + alpha2*new.Time[3] +beta3
forc20194 <- 0 +alpha1*forc20193 + alpha2*new.Time[4] +beta4
trade.ar.lmts.forc <- c(forc20191,forc20192,forc20193,forc20194)

#30 Please calculate forecast mean absolute percentage error for the trade.ar.lmts.forc forecasting model. # Which of the following models would you chose to based on this criteria? # Models: trade.ar.forc, trade.gls.forc, and trade.ar.lmts.forc)

Answer: I will choose trade.ar.forc since it has the smallest MAPE score.

mean(abs((tradepost - trade.ar.lmts.forc)/tradepost * 100))
## [1] 6.343938