1: False. Does not have to have normal distributions. 2: False. Here are the properties for a good forcasting method 1, Residuals are not correlated. 2, Residuals are not normally distributed. 3, There is no constant variance in the residuals. 3: False. Best measurement variace for different dataset. 4: False. Complicity is not the key point for a good model. 5: False. Residuals should be concidered too.

library(fma)

## Warning: package 'fma' was built under R version 3.5.2

## Loading required package: forecast

## Warning: package 'forecast' was built under R version 3.5.2

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.2

library(fpp)

## Warning: package 'fpp' was built under R version 3.5.2

## Loading required package: expsmooth

## Warning: package 'expsmooth' was built under R version 3.5.2

## Loading required package: lmtest

## Warning: package 'lmtest' was built under R version 3.5.2

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.5.2

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: tseries

## Warning: package 'tseries' was built under R version 3.5.2

autoplot(dowjones)

drift_dd<-rwf(dowjones)
autoplot(drift_dd)

naive_dd<- naive(dowjones)
autoplot(naive_dd)

meanf_dd<-meanf(dowjones)
autoplot(meanf_dd)

snaive_dd<- snaive(dowjones)
autoplot(snaive_dd)

I think Drift is the best because prodicting stock is better to use the last obsevtion.

autoplot(ibmclose)

i_training <- subset(ibmclose, end=300)
i_test <- subset(ibmclose,start=301)

ibm_avg <- meanf(i_training,h=69)$mean
ibm_naive <- naive(i_training ,h=69)$mean
ibm_drift <- rwf(i_training ,drift=TRUE,h=69)$mean

plot(i_training,main="IBM Close Prices",ylab="Price",xlab="Day")

lines(ibm_naive,col=2)
lines(ibm_drift,col=3)
lines(ibm_avg,col=4)
lines(i_test,col=8)

legend("topleft",lty=1,col=c(4,2,3),
  legend=c("Mean Method","Naive Method","Drift Method"))

i_avg <- meanf(i_training,h=69)
i_naive <- naive(i_training ,h=69)
i_drift <- rwf(i_training ,drift=TRUE,h=69)

accuracy(i_naive,i_test)

##                      ME      RMSE      MAE         MPE     MAPE     MASE
## Training set -0.2809365  7.302815  5.09699 -0.08262872 1.115844 1.000000
## Test set     -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
##                   ACF1 Theil's U
## Training set 0.1351052        NA
## Test set     0.9314689  2.973486

accuracy(i_avg,i_test)

##                         ME      RMSE       MAE        MPE     MAPE
## Training set  1.660438e-14  73.61532  58.72231  -2.642058 13.03019
## Test set     -1.306180e+02 132.12557 130.61797 -35.478819 35.47882
##                  MASE      ACF1 Theil's U
## Training set 11.52098 0.9895779        NA
## Test set     25.62649 0.9314689  19.05515

accuracy(i_drift,i_test)

##                        ME      RMSE       MAE         MPE     MAPE
## Training set 2.870480e-14  7.297409  5.127996 -0.02530123 1.121650
## Test set     6.108138e+00 17.066963 13.974747  1.41920066 3.707888
##                  MASE      ACF1 Theil's U
## Training set 1.006083 0.1351052        NA
## Test set     2.741765 0.9045875  2.361092

The drift and naive method is better than the average method.

checkresiduals(i_avg)

## 
##  Ljung-Box test
## 
## data:  Residuals from Mean
## Q* = 2697.2, df = 9, p-value < 2.2e-16
## 
## Model df: 1.   Total lags used: 10

checkresiduals(i_naive)

## 
##  Ljung-Box test
## 
## data:  Residuals from Naive method
## Q* = 22.555, df = 10, p-value = 0.01251
## 
## Model df: 0.   Total lags used: 10

checkresiduals(i_drift)

## 
##  Ljung-Box test
## 
## data:  Residuals from Random walk with drift
## Q* = 22.555, df = 9, p-value = 0.007278
## 
## Model df: 1.   Total lags used: 10

The best method is naive method because of the small error. There is not white noise.

autoplot(hsales)

h_train <- subset(hsales, end = length(hsales) - 24)
h_test <- subset(hsales, start = length(hsales) - 23)

avg_hsales <- meanf(h_train, h = 24)
naive_hsales <- naive(h_train, h = 24)
drift_hsales <- rwf(h_train, drift = TRUE, h = 24)
snaive_hsales <- snaive(h_train, h = 24)

autoplot(avg_hsales) +
  autolayer(h_test)

autoplot(naive_hsales) +
  autolayer(h_test)

autoplot(snaive_hsales) +
  autolayer(h_test)

autoplot(drift_hsales) +
  autolayer(h_test)

accuracy(naive_hsales,h_test)

##                     ME     RMSE      MAE       MPE      MAPE      MASE
## Training set -0.008000 6.301111 5.000000 -0.767457  9.903991 0.5892505
## Test set      2.791667 8.628924 7.208333  2.858639 12.849194 0.8495028
##                   ACF1 Theil's U
## Training set 0.1824472        NA
## Test set     0.5377994  1.098358

accuracy(snaive_hsales,h_test)

##                     ME      RMSE      MAE       MPE      MAPE      MASE
## Training set 0.1004184 10.582214 8.485356 -2.184269 17.633696 1.0000000
## Test set     1.0416667  5.905506 4.791667  0.972025  8.545729 0.5646984
##                   ACF1 Theil's U
## Training set 0.8369786        NA
## Test set     0.1687797 0.7091534

accuracy(avg_hsales,h_test)

##                        ME      RMSE      MAE       MPE     MAPE      MASE
## Training set 3.510503e-15 12.162811 9.532738 -6.144876 20.38306 1.1234341
## Test set     3.839475e+00  9.022555 7.561587  4.779122 13.26183 0.8911338
##                   ACF1 Theil's U
## Training set 0.8661998        NA
## Test set     0.5377994  1.131713

accuracy(drift_hsales,h_test)

##                        ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 1.506410e-15 6.301106 4.999872 -0.7511048  9.903063 0.5892354
## Test set     2.891667e+00 8.658795 7.249000  3.0426108 12.901697 0.8542954
##                   ACF1 Theil's U
## Training set 0.1824472        NA
## Test set     0.5378711  1.100276

The Seasonal naive method is the best method.

checkresiduals(avg_hsales)

## 
##  Ljung-Box test
## 
## data:  Residuals from Mean
## Q* = 887.75, df = 23, p-value < 2.2e-16
## 
## Model df: 1.   Total lags used: 24

checkresiduals(snaive_hsales)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 682.2, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

checkresiduals(naive_hsales)

## 
##  Ljung-Box test
## 
## data:  Residuals from Naive method
## Q* = 322.61, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

checkresiduals(drift_hsales)

## 
##  Ljung-Box test
## 
## data:  Residuals from Random walk with drift
## Q* = 322.61, df = 23, p-value < 2.2e-16
## 
## Model df: 1.   Total lags used: 24

There is no white noise.

snaive_w<- snaive(WWWusage)
autoplot(snaive_w)

checkresiduals(snaive_w)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 145.58, df = 10, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 10

snaive_bricksq <-snaive(bricksq)
autoplot(snaive_bricksq)

checkresiduals(snaive_bricksq)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 233.2, df = 8, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 8

There is not white noise, and both af them are normal distributed.

autoplot(dole)

lambda_dole <- BoxCox.lambda(dole)
autoplot(BoxCox(dole, lambda_dole))

autoplot(bricksq)

lambda_bricksq <- BoxCox.lambda(bricksq)
autoplot(BoxCox(bricksq, lambda_bricksq))

autoplot(usgdp)

lambda_usgdp <- BoxCox.lambda(usgdp)
autoplot(BoxCox(usgdp, lambda_usgdp))

autoplot(enplanements)

lambda_enplanements <- BoxCox.lambda(enplanements)
autoplot(BoxCox(enplanements, lambda_enplanements))

DSCI-FXP-SP19- Assignment #2 Forecasting Basics “Yangni”