Assignment

P.1 $\textit{Analysis of Canadian Manufacturing Workers Work-Hours:}$

The time series plot in the given figure 7.9 describes the average annual number of weekly hours spent by Canadian manufacturing workers. The data is available in CandianWorkHours.xls

library(forecast)

## Warning: package 'forecast' was built under R version 3.2.5

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.2.5

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: timeDate

## This is forecast 7.3

setwd("/Users/yusufsultan/rWork")
WorkHours <- read.csv("CanadianWorkHours.csv",stringsAsFactors = FALSE)
str(WorkHours)

## 'data.frame':    35 obs. of  2 variables:
##  $ Year        : int  1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 ...
##  $ HoursPerWeek: num  37.2 37 37.4 37.5 37.7 37.7 37.4 37.2 37.3 37.2 ...

head(WorkHours)

##   Year HoursPerWeek
## 1 1966         37.2
## 2 1967         37.0
## 3 1968         37.4
## 4 1969         37.5
## 5 1970         37.7
## 6 1971         37.7

tail(WorkHours)

##    Year HoursPerWeek
## 30 1995         35.7
## 31 1996         35.7
## 32 1997         35.5
## 33 1998         35.6
## 34 1999         36.3
## 35 2000         36.5

# Create a time series object out of it
WorkHoursTS <- ts(WorkHours$HoursPerWeek,start=c(1966,1),frequency=1)
yrange <- range(WorkHoursTS)
# Set up the plot
plot(c(1966, 2000), yrange, type="n", xlab="Year",  ylab="Hours Per Week", bty="l", xaxt="n", yaxt="n")
# Add the time series
lines(WorkHoursTS,bty="l",lwd=2)
# Add the x-axis
axis(1, at=seq(1965,2000,5), labels=format(seq(1965,2000,5)))
# Add the y-axis
axis(2, at=seq(34.5,38.0,0.5), labels=format(seq(34.5,38.0,0.5)), las=2)

1.(a)

if we compute the autocorrelation of this series, would the lag_1 autocorrelation exhibit negative, positive, or no autocorrelation? how can you see this from the plot?

# Let's just simply call the Acf function with a lag.max=1
# This will compute and plot the lag-1 autocorrelation
Acf(WorkHoursTS, lag.max=1,lwd=2)

The autocorrelation at lag zero is always one.The plot starts with a high autocorrelation at lag 1 (only slightly less than 1) that slowly declines. It continues decreasing until it becomes negative and starts showing an incresing negative autocorrelation. The decreasing autocorrelation is generally linear with little noise. Such a pattern is the autocorrelation plot signature of “strong autocorrelation”, which in turn provides high predictability if modeled properly.The model with strong positive autocorrelation

by lag from 1 to 15 shows

Autocorrelations of series 'Work.TS', by lag
## 
##      0      1      2      3      4      5      6      7      8      9 
##  1.000  0.928  0.839  0.752  0.665  0.571  0.473  0.369  0.265  0.164 
##     10     11     12     13     14     15 
##  0.047 -0.082 -0.185 -0.261 -0.310 -0.346

1.(b)
Compute the autocorrelation and produce an ACF plot.verify your answer to the previous question.

WorkHours.ACF <- Acf(WorkHoursTS,lwd=2)

WorkHours.ACF

## 
## Autocorrelations of series 'WorkHoursTS', by lag
## 
##      0      1      2      3      4      5      6      7      8      9 
##  1.000  0.928  0.839  0.752  0.665  0.571  0.473  0.369  0.265  0.164 
##     10     11     12     13     14     15 
##  0.047 -0.082 -0.185 -0.261 -0.310 -0.346

# as we see in the output of ACF ,there is Positive lag-1 autocorreletion of 0.928

Acf(diff(WorkHoursTS, lag=1), lag.max=12, main="ACF Plot for Differenced Series",lwd=2)

plot(diff(WorkHoursTS, lag=1), bty="l",lwd=2)

P.2 $\textit{forecasting Wal-Mart Stock:}$

Figure 7.10 shows at time plot of Wal-Mart daily closing prices between February 2001 and February 2002.The data is available at finance.yahoo.com and in WalMartStock.cls. The ACF plot of these daily closing prices and its lag-1 differenced series are in figure 7.11 Table 7.4 shows the output from fitting an AR(1) model to the series of closing prices and to the series of differences.Use all the information to answer the following questions.

a. Create a time plot of differenced series.

library(forecast)
setwd("/Users/yusufsultan/rWork")
WalMart <- read.csv("WalMartStock.csv",stringsAsFactors = FALSE)
str(WalMart)

## 'data.frame':    248 obs. of  2 variables:
##  $ Date : chr  "5-Feb-01" "6-Feb-01" "7-Feb-01" "8-Feb-01" ...
##  $ Close: num  53.8 53.2 54.7 52.3 50.4 ...

head(WalMart)

##        Date Close
## 1  5-Feb-01 53.84
## 2  6-Feb-01 53.20
## 3  7-Feb-01 54.66
## 4  8-Feb-01 52.30
## 5  9-Feb-01 50.40
## 6 12-Feb-01 53.45

tail(WalMart)

##          Date Close
## 243 28-Jan-02 58.63
## 244 29-Jan-02 57.91
## 245 30-Jan-02 59.75
## 246 31-Jan-02 59.98
## 247  1-Feb-02 59.26
## 248  4-Feb-02 58.90

# Create a time series object out of it
WalMartTS <- ts(WalMart$Close,start=c(2001,2),frequency=250)
yrange <- range(WalMartTS)
# Set up the plot
plot(WalMartTS, xlab="Time", ylab="Daily Closing Prices ($)", bty="l", xaxt="n", yaxt="n")
lines(WalMartTS,bty="l",lwd=2)
# Add the x-axis
axis(1, at=c(2001.0, 2001.2, 2001.4, 2001.6, 2001.8, 2002.0), labels=c("Mar-01", "May-01", "Jul-01","Sep-01", "Nov-01", "Jan-02"))
# Add the y-axis
axis(2, at=seq(40.5,60.5,5), labels=format(seq(40.5,60.5,5)), las=2)

# Let's just simply call the Acf function with a lag.max=1
# This will compute and plot the lag-1 autocorrelation
Acf(WalMartTS, lag.max=1,lwd=2)

Acf(WalMartTS, lag.max=12,lwd=2)

WalMartACF <- Acf(WalMartTS)

WalMartACF

## 
## Autocorrelations of series 'WalMartTS', by lag
## 
##      0      1      2      3      4      5      6      7      8      9 
##  1.000  0.943  0.892  0.846  0.811  0.780  0.747  0.714  0.669  0.618 
##     10     11     12     13     14     15     16     17     18     19 
##  0.569  0.531  0.499  0.470  0.441  0.418  0.390  0.346  0.309  0.274 
##     20     21     22     23     24     25     26     27     28     29 
##  0.252  0.232  0.206  0.175  0.143  0.115  0.090  0.079  0.079  0.069 
##     30     31     32     33     34     35     36     37     38     39 
##  0.051  0.034  0.027  0.016  0.017  0.029  0.039  0.048  0.050  0.046 
##     40     41     42     43     44     45     46     47     48     49 
##  0.045  0.050  0.063  0.090  0.110  0.123  0.127  0.135  0.137  0.143 
##     50     51     52     53     54     55     56     57     58     59 
##  0.158  0.164  0.161  0.155  0.140  0.121  0.115  0.115  0.118  0.116 
##     60     61     62     63     64     65     66     67     68     69 
##  0.105  0.086  0.064  0.045  0.033  0.031  0.028  0.017  0.000 -0.020 
##     70     71     72     73     74     75     76     77     78     79 
## -0.038 -0.045 -0.051 -0.053 -0.056 -0.063 -0.078 -0.096 -0.115 -0.126 
##     80     81     82     83     84     85     86     87     88     89 
## -0.146 -0.163 -0.181 -0.201 -0.220 -0.239 -0.249 -0.253 -0.253 -0.256 
##     90     91     92     93     94     95     96     97     98     99 
## -0.258 -0.260 -0.256 -0.239 -0.222 -0.197 -0.174 -0.145 -0.133 -0.113 
##    100    101    102    103    104    105    106    107    108    109 
## -0.095 -0.079 -0.059 -0.034 -0.008  0.007  0.020  0.029  0.038  0.052 
##    110    111    112    113    114    115    116    117    118    119 
##  0.063  0.081  0.092  0.098  0.095  0.100  0.104  0.117  0.127  0.132 
##    120    121    122    123    124    125    126    127    128    129 
##  0.128  0.118  0.112  0.104  0.095  0.091  0.079  0.063  0.040  0.017 
##    130    131    132    133    134    135    136    137    138    139 
## -0.003 -0.019 -0.033 -0.045 -0.058 -0.078 -0.093 -0.113 -0.129 -0.142 
##    140    141    142    143    144    145    146    147    148    149 
## -0.138 -0.138 -0.142 -0.144 -0.146 -0.150 -0.152 -0.148 -0.146 -0.143 
##    150    151    152    153    154    155    156    157    158    159 
## -0.140 -0.140 -0.139 -0.136 -0.132 -0.122 -0.113 -0.106 -0.107 -0.110 
##    160    161    162    163    164    165    166    167    168    169 
## -0.111 -0.109 -0.104 -0.103 -0.101 -0.099 -0.101 -0.112 -0.114 -0.112 
##    170    171    172    173    174    175    176    177    178    179 
## -0.111 -0.111 -0.116 -0.123 -0.133 -0.142 -0.142 -0.136 -0.132 -0.134 
##    180    181    182    183    184    185    186    187    188    189 
## -0.142 -0.148 -0.155 -0.153 -0.149 -0.150 -0.156 -0.164 -0.170 -0.181 
##    190    191    192    193    194    195    196    197    198    199 
## -0.191 -0.197 -0.197 -0.195 -0.189 -0.196 -0.199 -0.200 -0.205 -0.198 
##    200    201    202    203    204    205    206    207    208    209 
## -0.191 -0.186 -0.184 -0.188 -0.186 -0.185 -0.185 -0.183 -0.181 -0.177 
##    210    211    212    213    214    215    216    217    218    219 
## -0.179 -0.178 -0.175 -0.168 -0.162 -0.149 -0.135 -0.124 -0.115 -0.104 
##    220    221    222    223    224    225    226    227    228    229 
## -0.094 -0.085 -0.074 -0.066 -0.056 -0.056 -0.057 -0.057 -0.050 -0.036 
##    230    231    232    233    234    235    236    237    238    239 
## -0.029 -0.019 -0.013 -0.009 -0.004  0.000  0.008  0.016  0.016  0.017 
##    240    241    242    243    244    245    246    247 
##  0.016  0.012  0.009  0.007  0.013  0.013  0.006  0.004

Acf(diff(WalMartTS, lag=1), lag.max=12, main="ACF Plot for Differenced Series",lwd=2)

plot(diff(WalMartTS, lag=1), bty="l" ,lwd=2)

Time plot of the differenced series

b. Which of the following is/are relevant for testing whether this stock is a random walk?

The following are a randow stock:

$\bullet$ The autocorrelations of the closing price series.

$\bullet$ The AR(1) slope coefficient for the closing price series.

$\bullet$ The autocorrelations of the differenced series.

c. Recreate the AR(1) model output for the Close price series shown in the left of table 7.4. Does the AR model indicate that this is a random walk? Explain how you reached your conclusion.

fitWalMart <- Arima(WalMartTS, order=c(1,0,0))
fitWalMart

## Series: WalMartTS 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1  intercept
##       0.9558    52.9497
## s.e.  0.0187     1.3280
## 
## sigma^2 estimated as 0.9815:  log likelihood=-349.8
## AIC=705.59   AICc=705.69   BIC=716.13

#Calculated two-tailed p-value using S.E. value above
2*pt(-abs((1 - fitWalMart$coef["ar1"]) / 0.0187), df=length(WalMartTS)-1)

##        ar1 
## 0.01896261

# will get arl = 0.01896261 

#P-value, normal distribution
2*pnorm(-abs((1-fitWalMart$coef["ar1"])/0.0187))

##        ar1 
## 0.01818593

both arl we get the values(0.01896261 and 0.01818593) $ < $ 0.05 and that will not indicate a random walk

d. What are the implications of finding that a time series is a random walk? Choose the correct statements below.

$\bullet$ Yes true .It is impossible to obtain useful forecasts of the series if it random walk.

$\bullet$ Not True .The series is random.

$\bullet$ Yes true for changes in the series from one period to the other in random walk.

P.3 $\textit{Souvenir Sales:}$

The file SouvenirSales.xls contains monthly sales for a souvenir shop at a beach resort town in Queensland, Australia, between 1995 and 2001.Back in 2001, the store wanted to use the data to forecast sales for the next 12 months (the year 2002). They hired an analyst to generate forecasts. The analyst first partitioned the data into training and validation periods, with the validation set containing the last 12 months of data (the year 2001). She then fit a regression model to sales, using the training period.

3(a) Run a regression model with log(Sales) as the output variable and with a linear trend and monthly predictors. Remember to fit only the training period. Use this model to forecast the sales in February 2002

library(forecast)
library(zoo)
setwd("/Users/yusufsultan/rWork")
SouvenirSales <- read.csv("SouvenirSales.csv",stringsAsFactors = FALSE)
str(SouvenirSales)

## 'data.frame':    84 obs. of  2 variables:
##  $ Date : chr  "Jan-95" "Feb-95" "Mar-95" "Apr-95" ...
##  $ Sales: num  1665 2398 2841 3547 3753 ...

head(SouvenirSales)

##     Date   Sales
## 1 Jan-95 1664.81
## 2 Feb-95 2397.53
## 3 Mar-95 2840.71
## 4 Apr-95 3547.29
## 5 May-95 3752.96
## 6 Jun-95 3714.74

tail(SouvenirSales)

##     Date     Sales
## 79 1-Jul  26155.15
## 80 1-Aug  28586.52
## 81 1-Sep  30505.41
## 82 1-Oct  30821.33
## 83 1-Nov  46634.38
## 84 1-Dec 104660.67

SouvenirSalesTS <- ts(SouvenirSales$Sales,start=c(1995,1),frequency=12)
yrange = range(SouvenirSalesTS)
plot(c(1995,2001),yrange,type="n",xlab="Year",ylab="Sales $",bty="l",xaxt="n",yaxt="n",lwd=2)
lines(SouvenirSalesTS,bty="l",lwd=2)
axis(1,at=seq(1995,2002,1),labels=format(seq(1995,2002,1)))
axis(2,at=seq(0,110000,10000),labels=format(seq(0,110,10)),las=2)

validLength <- 12
trainLength <- length(SouvenirSalesTS) - validLength
souvsalesTrain <- window(SouvenirSalesTS,end=c(1995,trainLength))
souvsalesValid <- window(SouvenirSalesTS,start=c(1995,trainLength+1))

SouvenirSalesLogLinearSeason <- tslm(log(souvsalesTrain) ~ trend + season)
summary(SouvenirSalesLogLinearSeason)

## 
## Call:
## tslm(formula = log(souvsalesTrain) ~ trend + season)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4529 -0.1163  0.0001  0.1005  0.3438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.646363   0.084120  90.898  < 2e-16 ***
## trend       0.021120   0.001086  19.449  < 2e-16 ***
## season2     0.282015   0.109028   2.587 0.012178 *  
## season3     0.694998   0.109044   6.374 3.08e-08 ***
## season4     0.373873   0.109071   3.428 0.001115 ** 
## season5     0.421710   0.109109   3.865 0.000279 ***
## season6     0.447046   0.109158   4.095 0.000130 ***
## season7     0.583380   0.109217   5.341 1.55e-06 ***
## season8     0.546897   0.109287   5.004 5.37e-06 ***
## season9     0.635565   0.109368   5.811 2.65e-07 ***
## season10    0.729490   0.109460   6.664 9.98e-09 ***
## season11    1.200954   0.109562  10.961 7.38e-16 ***
## season12    1.952202   0.109675  17.800  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1888 on 59 degrees of freedom
## Multiple R-squared:  0.9424, Adjusted R-squared:  0.9306 
## F-statistic:  80.4 on 12 and 59 DF,  p-value: < 2.2e-16

SouvenirSalesLogLinearSeasonForecast <- forecast(SouvenirSalesLogLinearSeason,h=validLength)
feb2002Forecast <- SouvenirSalesLogLinearSeason$coefficients["(Intercept)"] + SouvenirSalesLogLinearSeason$coefficients["trend"]*86 + SouvenirSalesLogLinearSeason$coefficients["season2"]
exp(feb2002Forecast)

## (Intercept) 
##    17062.99

The forecast for February 2002 is $17,063.

3(b) Create an ACF plot until lag-15 for the forecast errors.Now fit an AR model with lag-2 ARIMA(2,0,0)] to the forecast errors.

ACF Plot of lag-15 SouvenirSalesTS

fit an AR model with lag-2

ARIMA(2,0,0)] to forecast errors.

resACF <- Acf(SouvenirSalesLogLinearSeason$residuals,lag.max=15,lwd=2)

3(b)(i)

Examining the ACF plot and the coefficients of the AR(2)model (and their statistical significance), what can we learn about the regression model forecasts?

resACF

## 
## Autocorrelations of series 'SouvenirSalesLogLinearSeason$residuals', by lag
## 
##      0      1      2      3      4      5      6      7      8      9 
##  1.000  0.459  0.485  0.194  0.088  0.154  0.016  0.030  0.106  0.034 
##     10     11     12     13     14     15 
##  0.152 -0.055 -0.012 -0.047 -0.077 -0.023

ARModel2 <- Arima(SouvenirSalesLogLinearSeason$residuals,order=c(2,0,0))
ARModel2

## Series: SouvenirSalesLogLinearSeason$residuals 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.3072  0.3687    -0.0025
## s.e.  0.1090  0.1102     0.0489
## 
## sigma^2 estimated as 0.0205:  log likelihood=39.03
## AIC=-70.05   AICc=-69.46   BIC=-60.95

# Estimate p-value based on the normal distribution
2*pnorm(-abs(ARModel2$coef["ar1"]/0.1089629))

##         ar1 
## 0.004810732

2*pnorm(-abs(ARModel2$coef["ar2"]/0.1101939))

##          ar2 
## 0.0008187679

Acf(ARModel2$residuals,lwd=2)

For the AR1 And AR2 are statisticlly significant at 2.621683 and 3.064909 by run t-stats test the value of AR1 and AR2 very small and that make both statistically signicifcant as a result those outcomes make the model is a good fit.

(ii)

Use the autocorrelation information to compute an improved forecast for January 2001, using the regression model and the AR(2) model above

linearRegForecast <- forecast(SouvenirSalesLogLinearSeason,h=validLength)
linearRegForecast

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Jan 2001       9.188097  8.917220  9.458974  8.769890  9.606304
## Feb 2001       9.491232  9.220354  9.762109  9.073024  9.909439
## Mar 2001       9.925335  9.654457 10.196212  9.507127 10.343542
## Apr 2001       9.625329  9.354452  9.896207  9.207122 10.043537
## May 2001       9.694286  9.423408  9.965163  9.276078 10.112493
## Jun 2001       9.740741  9.469864 10.011619  9.322534 10.158949
## Jul 2001       9.898195  9.627318 10.169072  9.479988 10.316402
## Aug 2001       9.882831  9.611954 10.153708  9.464624 10.301038
## Sep 2001       9.992619  9.721742 10.263496  9.574412 10.410826
## Oct 2001      10.107664  9.836787 10.378542  9.689457 10.525872
## Nov 2001      10.600248 10.329370 10.871125 10.182040 11.018455
## Dec 2001      11.372615 11.101738 11.643493 10.954408 11.790823

# Generate a forecast for the error terms using residuals AR(2) model
errorForecast <- forecast(ARModel2,h=validLength)
errorForecast

##          Point Forecast       Lo 80     Hi 80      Lo 95     Hi 95
## Jan 2001    0.107882119 -0.07561892 0.2913832 -0.1727585 0.3885227
## Feb 2001    0.098551352 -0.09341395 0.2905167 -0.1950342 0.3921370
## Mar 2001    0.069245043 -0.14069093 0.2791810 -0.2518243 0.3903144
## Apr 2001    0.056801003 -0.15830920 0.2719112 -0.2721817 0.3857837
## May 2001    0.042171322 -0.17774923 0.2620919 -0.2941681 0.3785108
## Jun 2001    0.033088136 -0.18905521 0.2552315 -0.3066508 0.3728271
## Jul 2001    0.024902959 -0.19881529 0.2486212 -0.3172446 0.3670505
## Aug 2001    0.019038932 -0.20554500 0.2436229 -0.3244325 0.3625104
## Sep 2001    0.014219137 -0.21092154 0.2393598 -0.3301038 0.3585421
## Oct 2001    0.010576068 -0.21489090 0.2360430 -0.3342459 0.3553980
## Nov 2001    0.007679567 -0.21798999 0.2333491 -0.3374522 0.3528114
## Dec 2001    0.005446339 -0.22034478 0.2312375 -0.3398714 0.3507641

#adjust forecast
adjustedForecast <- linearRegForecast$mean + errorForecast$mean
adjustedForecast

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2001  9.295979  9.589783  9.994580  9.682130  9.736457  9.773830  9.923098
##            Aug       Sep       Oct       Nov       Dec
## 2001  9.901870 10.006838 10.118240 10.607927 11.378062

# exponential adjust forecast
exp(adjustedForecast)

##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 2001 10894.13 14614.70 21907.40 16028.61 16923.47 17567.92 20396.07
##           Aug      Sep      Oct      Nov      Dec
## 2001 19967.68 22177.61 24791.11 40454.26 87383.49

Improved Sales forecast of Souvenirs in month of jan 2001 $10894.13

plot(souvsalesValid,xlab="Year of 2001",ylab="Sales $",bty="l",xaxt="n",yaxt="n",lwd=2)
axis(1, at=seq(2001,2001+11/12,1/12), labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
axis(2,at=seq(0,110000,10000),labels=format(seq(0,110,10)),las=2)
lines(exp(adjustedForecast),col="Red",lwd=2)
legend(2001,110000, c("Actuals", "Adjusted Forecast"), lty=c(1,1), col=c("black", "Red"), bty="n")

P.4 $\textit{Shipment of Household Appliances:}$

a. If we compute the autocorrelation of the Appliance series, which lag (> 0) is most likely to have the largest coefficient (in absolute value)?

Data is quarterly so lag -4

b. Create an ACF plot and compare it with your answer.

#Imported, made time series
Shipment <- read.csv("ApplianceShipments.csv")
ShipmentTS <- ts(Shipment$Shipments, start = c(1985,1), end = c(1989,4), freq=4)

#Created ACF plot
ShipmentACF <- Acf(ShipmentTS,lwd=2)

ShipmentACF

## 
## Autocorrelations of series 'ShipmentTS', by lag
## 
##      0      1      2      3      4      5      6      7      8      9 
##  1.000  0.279  0.044  0.021 -0.070  0.150 -0.088 -0.471 -0.367 -0.274 
##     10     11     12     13 
##  0.011  0.002 -0.022 -0.090

Assignment_6

Yusuf Sultan

4/7/2017

Time plot of the differenced series