CARSALES<-read.table("/Users/shuaibismail/Documents copy/809/set3/CARSALES.txt", header=FALSE)
attach(CARSALES)
head(CARSALES)
## V1
## 1 0.6702
## 2 0.6079
## 3 0.6541
## 4 0.6892
## 5 0.7118
## 6 0.6875
CARSALES["Month"]<-rep(1:12, times=24)
train<- CARSALES[1:276,]
test<-CARSALES[277:288,]
head(test)
## V1 Month
## 277 0.7126 1
## 278 0.6751 2
## 279 0.6552 3
## 280 0.7135 4
## 281 0.6850 5
## 282 0.7061 6
test$V1
## [1] 0.7126 0.6751 0.6552 0.7135 0.6850 0.7061 0.5048 0.4257 0.6372 0.6842
## [11] 0.5560 0.5608
test$Month
## [1] 1 2 3 4 5 6 7 8 9 10 11 12
Time Series Plot: The data looks stationary, with no obvious trend.
#Plots
plot.ts(V1, col="steelblue")
grid()
ACF Plot: We can see that this is not a white noise data. Also this plot proves that the data is stationary.
acf(V1,lag=20)
par(mfrow=c(1,1))
Box-plot:
boxplot(CARSALES$V1~CARSALES$Month, col="darkblue")
2a
A monthly seasonal model: From the p-value (0.66711) of the second month, February, we can see that there’s no difference in the car sales between January and Febuary.
M1<-lm(V1~as.factor(Month), data=train)
summary(M1)
##
## Call:
## lm(formula = V1 ~ as.factor(Month), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37752 -0.08219 0.00847 0.09470 0.25672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.65062 0.02764 23.541 < 2e-16 ***
## as.factor(Month)2 -0.01514 0.03908 -0.387 0.69882
## as.factor(Month)3 0.09482 0.03908 2.426 0.01594 *
## as.factor(Month)4 0.06606 0.03908 1.690 0.09217 .
## as.factor(Month)5 0.10189 0.03908 2.607 0.00966 **
## as.factor(Month)6 0.10853 0.03908 2.777 0.00589 **
## as.factor(Month)7 -0.10827 0.03908 -2.770 0.00600 **
## as.factor(Month)8 -0.27053 0.03908 -6.922 3.38e-11 ***
## as.factor(Month)9 -0.02717 0.03908 -0.695 0.48765
## as.factor(Month)10 0.08552 0.03908 2.188 0.02954 *
## as.factor(Month)11 0.03785 0.03908 0.968 0.33376
## as.factor(Month)12 -0.02304 0.03908 -0.590 0.55598
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1325 on 264 degrees of freedom
## Multiple R-squared: 0.3928, Adjusted R-squared: 0.3675
## F-statistic: 15.53 on 11 and 264 DF, p-value: < 2.2e-16
Seasonal MLR Plot
plot.ts(train$V1, col="grey")
lines(predict(M1),col="blue")
time<-seq(1, length(train$V1))
M2 = lm(formula = V1~poly(time, 7, raw=TRUE), data = train)
summary(M2)
##
## Call:
## lm(formula = V1 ~ poly(time, 7, raw = TRUE), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57444 -0.08616 0.02260 0.10731 0.28420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.499e-01 7.983e-02 8.141 1.49e-14 ***
## poly(time, 7, raw = TRUE)1 -2.800e-03 1.035e-02 -0.271 0.7870
## poly(time, 7, raw = TRUE)2 3.670e-04 4.300e-04 0.853 0.3942
## poly(time, 7, raw = TRUE)3 -1.013e-05 7.977e-06 -1.270 0.2050
## poly(time, 7, raw = TRUE)4 1.179e-07 7.599e-08 1.551 0.1220
## poly(time, 7, raw = TRUE)5 -6.665e-10 3.866e-10 -1.724 0.0858 .
## poly(time, 7, raw = TRUE)6 1.808e-12 9.979e-13 1.812 0.0712 .
## poly(time, 7, raw = TRUE)7 -1.884e-15 1.027e-15 -1.835 0.0676 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1564 on 268 degrees of freedom
## Multiple R-squared: 0.1416, Adjusted R-squared: 0.1192
## F-statistic: 6.316 on 7 and 268 DF, p-value: 7.091e-07
plot.ts(train$V1, col="grey")
lines(predict(M2),col="blue")
2c Sine/ Cosine Model The highest amplitudes are at harmonic pair 4, 24, 48, and 72
#Sine/cosine Model
detrend<-lm(V1~time, data = train)
library(TSA)
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
periodogram(detrend$residuals)
periodogram(detrend$residuals)$spec
## [1] 1.703555e-01 2.312587e-01 2.519508e-01 8.770960e-01 5.904886e-02
## [6] 3.386375e-01 7.849398e-02 2.072980e-02 1.329196e-01 5.440722e-02
## [11] 8.808130e-03 5.372124e-02 9.141739e-03 8.613809e-03 3.432540e-02
## [16] 5.470449e-02 2.991793e-02 9.558215e-02 5.172149e-03 2.177372e-02
## [21] 9.529948e-03 5.666406e-02 1.418320e-01 6.302088e-01 1.501522e-01
## [26] 2.657747e-02 1.396774e-02 1.719792e-01 7.606126e-04 4.891187e-02
## [31] 1.717143e-02 2.567737e-02 5.143376e-02 4.996388e-02 3.186046e-03
## [36] 6.508615e-03 2.316507e-02 5.577669e-03 5.030968e-03 6.326763e-02
## [41] 9.990330e-03 5.163040e-03 1.658274e-02 2.820184e-02 7.673678e-03
## [46] 4.809640e-02 8.428820e-03 1.411449e+00 1.888962e-01 7.234546e-02
## [51] 1.101069e-02 2.306781e-02 1.035200e-02 4.726531e-03 4.921338e-03
## [56] 1.338301e-02 4.102599e-02 8.791486e-03 9.752199e-03 1.200168e-02
## [61] 5.263800e-03 7.776127e-03 4.813600e-03 2.804591e-04 1.108947e-02
## [66] 1.192507e-02 7.937137e-03 6.435570e-03 1.109707e-02 5.337676e-03
## [71] 3.998181e-02 6.338395e-01 3.344098e-02 4.476999e-02 4.583622e-03
## [76] 1.588496e-02 3.131753e-03 2.255086e-03 7.073736e-03 2.284598e-02
## [81] 3.185289e-02 1.296507e-02 3.968388e-03 9.556662e-03 9.090289e-04
## [86] 1.827275e-02 5.462417e-03 2.339912e-03 2.083213e-03 3.116932e-03
## [91] 1.254509e-02 6.873838e-03 1.448886e-03 9.724950e-03 5.798647e-03
## [96] 2.654159e-01 3.155626e-02 3.797480e-02 1.208714e-03 5.003437e-02
## [101] 3.589425e-03 3.968364e-04 4.936286e-03 5.274201e-03 1.047305e-02
## [106] 1.272671e-02 5.430866e-03 6.016316e-04 9.899312e-04 6.130800e-03
## [111] 5.109193e-03 4.108296e-03 3.455484e-03 4.344982e-04 1.861794e-03
## [116] 4.779966e-03 9.065506e-03 4.164756e-03 4.885951e-03 3.451651e-03
## [121] 2.284642e-02 6.462603e-03 5.201565e-03 2.049005e-02 9.994523e-06
## [126] 3.135920e-03 2.046527e-03 8.498329e-04 3.000765e-03 4.027777e-03
## [131] 4.522290e-03 8.669189e-03 1.790652e-03 1.879790e-03 3.798115e-03
## [136] 1.722870e-03 5.108081e-04 5.723722e-03 1.554305e-02 7.552721e-03
## [141] 1.122718e-03 1.727446e-03 5.266139e-03 8.074048e-02
order(periodogram(detrend$residuals)$spec)
## [1] 125 64 102 114 137 108 29 128 85 109 141 99 93 136 142 133 115 134
## [19] 127 89 78 88 129 90 77 126 35 120 113 101 135 83 130 112 118 131
## [37] 75 54 116 63 119 55 103 39 111 42 19 123 61 143 104 70 107 87
## [55] 38 138 95 110 68 122 36 92 79 140 45 62 67 47 14 132 58 11
## [73] 117 13 21 84 94 59 41 53 105 51 65 69 66 60 91 106 82 56
## [91] 27 139 76 43 31 86 124 8 20 80 121 52 37 32 26 44 17 97
## [109] 81 73 15 98 71 57 74 46 30 34 100 33 12 10 16 22 5 40
## [127] 50 7 144 18 9 23 25 1 28 49 2 3 96 6 24 72 4 48
n<-276
sin1<-sin(2*pi*time*4/n)
cos1<-cos(2*pi*time*4/n)
sin2<-sin(2*pi*time*24/n)
cos2<-cos(2*pi*time*24/n)
sin3<-sin(2*pi*time*48/n)
cos3<-cos(2*pi*time*48/n)
sin4<-sin(2*pi*time*72/n)
cos4<-cos(2*pi*time*72/n)
M3<-lm(V1~time+sin1+cos1+sin2+cos2+sin3+cos3+sin4+cos4, data=train)
summary(M3)
##
## Call:
## lm(formula = V1 ~ time + sin1 + cos1 + sin2 + cos2 + sin3 + cos3 +
## sin4 + cos4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56942 -0.09758 0.01556 0.10354 0.36860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7309675 0.0188956 38.685 < 2e-16 ***
## time -0.0005497 0.0001188 -4.625 5.84e-06 ***
## sin1 -0.0745228 0.0133802 -5.570 6.24e-08 ***
## cos1 -0.0126685 0.0131240 -0.965 0.3353
## sin2 0.0325734 0.0131303 2.481 0.0137 *
## cos2 -0.0106103 0.0131240 -0.808 0.4195
## sin3 0.0189477 0.0131249 1.444 0.1500
## cos3 0.0071638 0.0131240 0.546 0.5856
## sin4 0.0005992 0.0131239 0.046 0.9636
## cos4 0.0052193 0.0131240 0.398 0.6912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1542 on 266 degrees of freedom
## Multiple R-squared: 0.1723, Adjusted R-squared: 0.1443
## F-statistic: 6.154 on 9 and 266 DF, p-value: 7.102e-08
3
shapiro.test(rstandard(M1))
##
## Shapiro-Wilk normality test
##
## data: rstandard(M1)
## W = 0.9825, p-value = 0.001841
hist(rstandard(M1),breaks=15, col="steelblue")
shapiro.test(rstandard(M2))
##
## Shapiro-Wilk normality test
##
## data: rstandard(M2)
## W = 0.96506, p-value = 3.088e-06
hist(rstandard(M2),breaks=15, col="steelblue")
shapiro.test(rstandard(M3))
##
## Shapiro-Wilk normality test
##
## data: rstandard(M3)
## W = 0.97683, p-value = 0.0001853
hist(rstandard(M3),breaks=15, col="steelblue")
4 The residuals are scattered around with no obvious pattern.
plot((rstandard(M1)^2), col="steelblue")
plot((rstandard(M2)^2), col="steelblue")
plot((rstandard(M3)^2), col="steelblue")
The residuals are scattered around with no obvious pattern.
5.ACF of Residuals: Residuals is still a white-noise, but now becoming stationary, due to the slow decay of the ACF.
acf(rstandard(M1))
acf(rstandard(M2))
acf(rstandard(M3))
6.
plot.ts(train$V1, col="gray")
lines(predict(M1),col="red")
plot.ts(train$V1, col="gray")
lines(predict(M2),col="blue")
plot.ts(train$V1, col="gray")
lines(predict(M3),col="green")
7.
summary(M1)
##
## Call:
## lm(formula = V1 ~ as.factor(Month), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37752 -0.08219 0.00847 0.09470 0.25672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.65062 0.02764 23.541 < 2e-16 ***
## as.factor(Month)2 -0.01514 0.03908 -0.387 0.69882
## as.factor(Month)3 0.09482 0.03908 2.426 0.01594 *
## as.factor(Month)4 0.06606 0.03908 1.690 0.09217 .
## as.factor(Month)5 0.10189 0.03908 2.607 0.00966 **
## as.factor(Month)6 0.10853 0.03908 2.777 0.00589 **
## as.factor(Month)7 -0.10827 0.03908 -2.770 0.00600 **
## as.factor(Month)8 -0.27053 0.03908 -6.922 3.38e-11 ***
## as.factor(Month)9 -0.02717 0.03908 -0.695 0.48765
## as.factor(Month)10 0.08552 0.03908 2.188 0.02954 *
## as.factor(Month)11 0.03785 0.03908 0.968 0.33376
## as.factor(Month)12 -0.02304 0.03908 -0.590 0.55598
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1325 on 264 degrees of freedom
## Multiple R-squared: 0.3928, Adjusted R-squared: 0.3675
## F-statistic: 15.53 on 11 and 264 DF, p-value: < 2.2e-16
summary(M2)
##
## Call:
## lm(formula = V1 ~ poly(time, 7, raw = TRUE), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57444 -0.08616 0.02260 0.10731 0.28420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.499e-01 7.983e-02 8.141 1.49e-14 ***
## poly(time, 7, raw = TRUE)1 -2.800e-03 1.035e-02 -0.271 0.7870
## poly(time, 7, raw = TRUE)2 3.670e-04 4.300e-04 0.853 0.3942
## poly(time, 7, raw = TRUE)3 -1.013e-05 7.977e-06 -1.270 0.2050
## poly(time, 7, raw = TRUE)4 1.179e-07 7.599e-08 1.551 0.1220
## poly(time, 7, raw = TRUE)5 -6.665e-10 3.866e-10 -1.724 0.0858 .
## poly(time, 7, raw = TRUE)6 1.808e-12 9.979e-13 1.812 0.0712 .
## poly(time, 7, raw = TRUE)7 -1.884e-15 1.027e-15 -1.835 0.0676 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1564 on 268 degrees of freedom
## Multiple R-squared: 0.1416, Adjusted R-squared: 0.1192
## F-statistic: 6.316 on 7 and 268 DF, p-value: 7.091e-07
summary(M3)
##
## Call:
## lm(formula = V1 ~ time + sin1 + cos1 + sin2 + cos2 + sin3 + cos3 +
## sin4 + cos4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56942 -0.09758 0.01556 0.10354 0.36860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7309675 0.0188956 38.685 < 2e-16 ***
## time -0.0005497 0.0001188 -4.625 5.84e-06 ***
## sin1 -0.0745228 0.0133802 -5.570 6.24e-08 ***
## cos1 -0.0126685 0.0131240 -0.965 0.3353
## sin2 0.0325734 0.0131303 2.481 0.0137 *
## cos2 -0.0106103 0.0131240 -0.808 0.4195
## sin3 0.0189477 0.0131249 1.444 0.1500
## cos3 0.0071638 0.0131240 0.546 0.5856
## sin4 0.0005992 0.0131239 0.046 0.9636
## cos4 0.0052193 0.0131240 0.398 0.6912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1542 on 266 degrees of freedom
## Multiple R-squared: 0.1723, Adjusted R-squared: 0.1443
## F-statistic: 6.154 on 9 and 266 DF, p-value: 7.102e-08
AIC(M1)
## [1] -318.5202
AIC(M2)
## [1] -230.9624
AIC(M3)
## [1] -237.02
BIC(M1)
## [1] -271.455
BIC(M2)
## [1] -198.3788
BIC(M3)
## [1] -197.1956
plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M1),col="blue")
plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M2),col="red")
plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M3),col="green")
pred<-array(0,c(3,12))
pred[1,1:12] = predict(M1, test)
pred[2,1:12] = predict(M2, data.frame(time=c(277:288)))
pred[3,1:12] = predict(M3, data.frame(time=c(277:288),
sin1<-sin(2*pi*(4/n)*c(277:288)),
cos1<-cos(2*pi*(4/n)*c(277:288)),
sin2<-sin(2*pi*(24/n)*c(277:288)),
cos2<-cos(2*pi*(24/n)*c(277:288)),
sin3<-sin(2*pi*(48/n)*c(277:288)),
cos3<-cos(2*pi*(48/n)*c(277:288)),
sin4<-sin(2*pi*(72/n)*c(277:288)),
cos4<-cos(2*pi*(72/n)*c(277:288))
))
mae1<- mean(abs(test$V1 - pred[1,1:12]))
mae1
## [1] 0.05530652
mae2<- mean(abs(test$V1 - pred[2,1:12]))
mae2
## [1] 0.07602622
mae3<- mean(abs(test$V1 - pred[3,1:12]))
mae3
## [1] 0.1158031
mse1<- mean((test$V1 - pred[1,1:12])^2)
mse1
## [1] 0.004110149
mse2<- mean((test$V1 - pred[2,1:12])^2)
mse2
## [1] 0.01285999
mse3<- mean((test$V1 - pred[3,1:12])^2)
mse3
## [1] 0.01681781
mape1<- mean(abs(test$V1 - pred[1,1:12])/test$V1)
mape1
## [1] 0.09149061
mape2<- mean(abs(test$V1 - pred[2,1:12])/test$V1)
mape2
## [1] 0.1477129
mape3<- mean(abs(test$V1 - pred[3,1:12])/test$V1)
mape3
## [1] 0.1791411