CARSALES<-read.table("/Users/shuaibismail/Documents copy/809/set3/CARSALES.txt", header=FALSE)
attach(CARSALES)

head(CARSALES)
##       V1
## 1 0.6702
## 2 0.6079
## 3 0.6541
## 4 0.6892
## 5 0.7118
## 6 0.6875
  1. Train-Test Split
CARSALES["Month"]<-rep(1:12, times=24)
train<- CARSALES[1:276,]
test<-CARSALES[277:288,]

head(test)
##         V1 Month
## 277 0.7126     1
## 278 0.6751     2
## 279 0.6552     3
## 280 0.7135     4
## 281 0.6850     5
## 282 0.7061     6
test$V1
##  [1] 0.7126 0.6751 0.6552 0.7135 0.6850 0.7061 0.5048 0.4257 0.6372 0.6842
## [11] 0.5560 0.5608
test$Month
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12

Time Series Plot: The data looks stationary, with no obvious trend.

#Plots
plot.ts(V1, col="steelblue")
grid()

ACF Plot: We can see that this is not a white noise data. Also this plot proves that the data is stationary.

acf(V1,lag=20)

par(mfrow=c(1,1))

Box-plot:

boxplot(CARSALES$V1~CARSALES$Month, col="darkblue")

2a

A monthly seasonal model: From the p-value (0.66711) of the second month, February, we can see that there’s no difference in the car sales between January and Febuary.

M1<-lm(V1~as.factor(Month), data=train)
summary(M1)
## 
## Call:
## lm(formula = V1 ~ as.factor(Month), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37752 -0.08219  0.00847  0.09470  0.25672 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.65062    0.02764  23.541  < 2e-16 ***
## as.factor(Month)2  -0.01514    0.03908  -0.387  0.69882    
## as.factor(Month)3   0.09482    0.03908   2.426  0.01594 *  
## as.factor(Month)4   0.06606    0.03908   1.690  0.09217 .  
## as.factor(Month)5   0.10189    0.03908   2.607  0.00966 ** 
## as.factor(Month)6   0.10853    0.03908   2.777  0.00589 ** 
## as.factor(Month)7  -0.10827    0.03908  -2.770  0.00600 ** 
## as.factor(Month)8  -0.27053    0.03908  -6.922 3.38e-11 ***
## as.factor(Month)9  -0.02717    0.03908  -0.695  0.48765    
## as.factor(Month)10  0.08552    0.03908   2.188  0.02954 *  
## as.factor(Month)11  0.03785    0.03908   0.968  0.33376    
## as.factor(Month)12 -0.02304    0.03908  -0.590  0.55598    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1325 on 264 degrees of freedom
## Multiple R-squared:  0.3928, Adjusted R-squared:  0.3675 
## F-statistic: 15.53 on 11 and 264 DF,  p-value: < 2.2e-16

Seasonal MLR Plot

plot.ts(train$V1, col="grey")
lines(predict(M1),col="blue")

time<-seq(1, length(train$V1))
M2 = lm(formula = V1~poly(time, 7, raw=TRUE), data = train)
summary(M2)
## 
## Call:
## lm(formula = V1 ~ poly(time, 7, raw = TRUE), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57444 -0.08616  0.02260  0.10731  0.28420 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 6.499e-01  7.983e-02   8.141 1.49e-14 ***
## poly(time, 7, raw = TRUE)1 -2.800e-03  1.035e-02  -0.271   0.7870    
## poly(time, 7, raw = TRUE)2  3.670e-04  4.300e-04   0.853   0.3942    
## poly(time, 7, raw = TRUE)3 -1.013e-05  7.977e-06  -1.270   0.2050    
## poly(time, 7, raw = TRUE)4  1.179e-07  7.599e-08   1.551   0.1220    
## poly(time, 7, raw = TRUE)5 -6.665e-10  3.866e-10  -1.724   0.0858 .  
## poly(time, 7, raw = TRUE)6  1.808e-12  9.979e-13   1.812   0.0712 .  
## poly(time, 7, raw = TRUE)7 -1.884e-15  1.027e-15  -1.835   0.0676 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1564 on 268 degrees of freedom
## Multiple R-squared:  0.1416, Adjusted R-squared:  0.1192 
## F-statistic: 6.316 on 7 and 268 DF,  p-value: 7.091e-07
plot.ts(train$V1, col="grey")
lines(predict(M2),col="blue")

2c Sine/ Cosine Model The highest amplitudes are at harmonic pair 4, 24, 48, and 72

#Sine/cosine Model
detrend<-lm(V1~time, data = train)

library(TSA)
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
periodogram(detrend$residuals)

periodogram(detrend$residuals)$spec

##   [1] 1.703555e-01 2.312587e-01 2.519508e-01 8.770960e-01 5.904886e-02
##   [6] 3.386375e-01 7.849398e-02 2.072980e-02 1.329196e-01 5.440722e-02
##  [11] 8.808130e-03 5.372124e-02 9.141739e-03 8.613809e-03 3.432540e-02
##  [16] 5.470449e-02 2.991793e-02 9.558215e-02 5.172149e-03 2.177372e-02
##  [21] 9.529948e-03 5.666406e-02 1.418320e-01 6.302088e-01 1.501522e-01
##  [26] 2.657747e-02 1.396774e-02 1.719792e-01 7.606126e-04 4.891187e-02
##  [31] 1.717143e-02 2.567737e-02 5.143376e-02 4.996388e-02 3.186046e-03
##  [36] 6.508615e-03 2.316507e-02 5.577669e-03 5.030968e-03 6.326763e-02
##  [41] 9.990330e-03 5.163040e-03 1.658274e-02 2.820184e-02 7.673678e-03
##  [46] 4.809640e-02 8.428820e-03 1.411449e+00 1.888962e-01 7.234546e-02
##  [51] 1.101069e-02 2.306781e-02 1.035200e-02 4.726531e-03 4.921338e-03
##  [56] 1.338301e-02 4.102599e-02 8.791486e-03 9.752199e-03 1.200168e-02
##  [61] 5.263800e-03 7.776127e-03 4.813600e-03 2.804591e-04 1.108947e-02
##  [66] 1.192507e-02 7.937137e-03 6.435570e-03 1.109707e-02 5.337676e-03
##  [71] 3.998181e-02 6.338395e-01 3.344098e-02 4.476999e-02 4.583622e-03
##  [76] 1.588496e-02 3.131753e-03 2.255086e-03 7.073736e-03 2.284598e-02
##  [81] 3.185289e-02 1.296507e-02 3.968388e-03 9.556662e-03 9.090289e-04
##  [86] 1.827275e-02 5.462417e-03 2.339912e-03 2.083213e-03 3.116932e-03
##  [91] 1.254509e-02 6.873838e-03 1.448886e-03 9.724950e-03 5.798647e-03
##  [96] 2.654159e-01 3.155626e-02 3.797480e-02 1.208714e-03 5.003437e-02
## [101] 3.589425e-03 3.968364e-04 4.936286e-03 5.274201e-03 1.047305e-02
## [106] 1.272671e-02 5.430866e-03 6.016316e-04 9.899312e-04 6.130800e-03
## [111] 5.109193e-03 4.108296e-03 3.455484e-03 4.344982e-04 1.861794e-03
## [116] 4.779966e-03 9.065506e-03 4.164756e-03 4.885951e-03 3.451651e-03
## [121] 2.284642e-02 6.462603e-03 5.201565e-03 2.049005e-02 9.994523e-06
## [126] 3.135920e-03 2.046527e-03 8.498329e-04 3.000765e-03 4.027777e-03
## [131] 4.522290e-03 8.669189e-03 1.790652e-03 1.879790e-03 3.798115e-03
## [136] 1.722870e-03 5.108081e-04 5.723722e-03 1.554305e-02 7.552721e-03
## [141] 1.122718e-03 1.727446e-03 5.266139e-03 8.074048e-02
order(periodogram(detrend$residuals)$spec)

##   [1] 125  64 102 114 137 108  29 128  85 109 141  99  93 136 142 133 115 134
##  [19] 127  89  78  88 129  90  77 126  35 120 113 101 135  83 130 112 118 131
##  [37]  75  54 116  63 119  55 103  39 111  42  19 123  61 143 104  70 107  87
##  [55]  38 138  95 110  68 122  36  92  79 140  45  62  67  47  14 132  58  11
##  [73] 117  13  21  84  94  59  41  53 105  51  65  69  66  60  91 106  82  56
##  [91]  27 139  76  43  31  86 124   8  20  80 121  52  37  32  26  44  17  97
## [109]  81  73  15  98  71  57  74  46  30  34 100  33  12  10  16  22   5  40
## [127]  50   7 144  18   9  23  25   1  28  49   2   3  96   6  24  72   4  48
n<-276
sin1<-sin(2*pi*time*4/n)
cos1<-cos(2*pi*time*4/n)
sin2<-sin(2*pi*time*24/n)
cos2<-cos(2*pi*time*24/n)  
sin3<-sin(2*pi*time*48/n)
cos3<-cos(2*pi*time*48/n)
sin4<-sin(2*pi*time*72/n)
cos4<-cos(2*pi*time*72/n)


M3<-lm(V1~time+sin1+cos1+sin2+cos2+sin3+cos3+sin4+cos4, data=train)
summary(M3)
## 
## Call:
## lm(formula = V1 ~ time + sin1 + cos1 + sin2 + cos2 + sin3 + cos3 + 
##     sin4 + cos4, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56942 -0.09758  0.01556  0.10354  0.36860 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.7309675  0.0188956  38.685  < 2e-16 ***
## time        -0.0005497  0.0001188  -4.625 5.84e-06 ***
## sin1        -0.0745228  0.0133802  -5.570 6.24e-08 ***
## cos1        -0.0126685  0.0131240  -0.965   0.3353    
## sin2         0.0325734  0.0131303   2.481   0.0137 *  
## cos2        -0.0106103  0.0131240  -0.808   0.4195    
## sin3         0.0189477  0.0131249   1.444   0.1500    
## cos3         0.0071638  0.0131240   0.546   0.5856    
## sin4         0.0005992  0.0131239   0.046   0.9636    
## cos4         0.0052193  0.0131240   0.398   0.6912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1542 on 266 degrees of freedom
## Multiple R-squared:  0.1723, Adjusted R-squared:  0.1443 
## F-statistic: 6.154 on 9 and 266 DF,  p-value: 7.102e-08

3

shapiro.test(rstandard(M1))
## 
##  Shapiro-Wilk normality test
## 
## data:  rstandard(M1)
## W = 0.9825, p-value = 0.001841
hist(rstandard(M1),breaks=15, col="steelblue")

shapiro.test(rstandard(M2))
## 
##  Shapiro-Wilk normality test
## 
## data:  rstandard(M2)
## W = 0.96506, p-value = 3.088e-06
hist(rstandard(M2),breaks=15, col="steelblue")

shapiro.test(rstandard(M3))
## 
##  Shapiro-Wilk normality test
## 
## data:  rstandard(M3)
## W = 0.97683, p-value = 0.0001853
hist(rstandard(M3),breaks=15, col="steelblue")

4 The residuals are scattered around with no obvious pattern.

plot((rstandard(M1)^2), col="steelblue")

plot((rstandard(M2)^2), col="steelblue")

plot((rstandard(M3)^2), col="steelblue")

The residuals are scattered around with no obvious pattern.

5.ACF of Residuals: Residuals is still a white-noise, but now becoming stationary, due to the slow decay of the ACF.

acf(rstandard(M1))

acf(rstandard(M2))

acf(rstandard(M3))

6.

plot.ts(train$V1, col="gray")
lines(predict(M1),col="red")

plot.ts(train$V1, col="gray")
lines(predict(M2),col="blue")

plot.ts(train$V1, col="gray")
lines(predict(M3),col="green")

7.

summary(M1)
## 
## Call:
## lm(formula = V1 ~ as.factor(Month), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37752 -0.08219  0.00847  0.09470  0.25672 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.65062    0.02764  23.541  < 2e-16 ***
## as.factor(Month)2  -0.01514    0.03908  -0.387  0.69882    
## as.factor(Month)3   0.09482    0.03908   2.426  0.01594 *  
## as.factor(Month)4   0.06606    0.03908   1.690  0.09217 .  
## as.factor(Month)5   0.10189    0.03908   2.607  0.00966 ** 
## as.factor(Month)6   0.10853    0.03908   2.777  0.00589 ** 
## as.factor(Month)7  -0.10827    0.03908  -2.770  0.00600 ** 
## as.factor(Month)8  -0.27053    0.03908  -6.922 3.38e-11 ***
## as.factor(Month)9  -0.02717    0.03908  -0.695  0.48765    
## as.factor(Month)10  0.08552    0.03908   2.188  0.02954 *  
## as.factor(Month)11  0.03785    0.03908   0.968  0.33376    
## as.factor(Month)12 -0.02304    0.03908  -0.590  0.55598    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1325 on 264 degrees of freedom
## Multiple R-squared:  0.3928, Adjusted R-squared:  0.3675 
## F-statistic: 15.53 on 11 and 264 DF,  p-value: < 2.2e-16
summary(M2)
## 
## Call:
## lm(formula = V1 ~ poly(time, 7, raw = TRUE), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57444 -0.08616  0.02260  0.10731  0.28420 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 6.499e-01  7.983e-02   8.141 1.49e-14 ***
## poly(time, 7, raw = TRUE)1 -2.800e-03  1.035e-02  -0.271   0.7870    
## poly(time, 7, raw = TRUE)2  3.670e-04  4.300e-04   0.853   0.3942    
## poly(time, 7, raw = TRUE)3 -1.013e-05  7.977e-06  -1.270   0.2050    
## poly(time, 7, raw = TRUE)4  1.179e-07  7.599e-08   1.551   0.1220    
## poly(time, 7, raw = TRUE)5 -6.665e-10  3.866e-10  -1.724   0.0858 .  
## poly(time, 7, raw = TRUE)6  1.808e-12  9.979e-13   1.812   0.0712 .  
## poly(time, 7, raw = TRUE)7 -1.884e-15  1.027e-15  -1.835   0.0676 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1564 on 268 degrees of freedom
## Multiple R-squared:  0.1416, Adjusted R-squared:  0.1192 
## F-statistic: 6.316 on 7 and 268 DF,  p-value: 7.091e-07
summary(M3)
## 
## Call:
## lm(formula = V1 ~ time + sin1 + cos1 + sin2 + cos2 + sin3 + cos3 + 
##     sin4 + cos4, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56942 -0.09758  0.01556  0.10354  0.36860 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.7309675  0.0188956  38.685  < 2e-16 ***
## time        -0.0005497  0.0001188  -4.625 5.84e-06 ***
## sin1        -0.0745228  0.0133802  -5.570 6.24e-08 ***
## cos1        -0.0126685  0.0131240  -0.965   0.3353    
## sin2         0.0325734  0.0131303   2.481   0.0137 *  
## cos2        -0.0106103  0.0131240  -0.808   0.4195    
## sin3         0.0189477  0.0131249   1.444   0.1500    
## cos3         0.0071638  0.0131240   0.546   0.5856    
## sin4         0.0005992  0.0131239   0.046   0.9636    
## cos4         0.0052193  0.0131240   0.398   0.6912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1542 on 266 degrees of freedom
## Multiple R-squared:  0.1723, Adjusted R-squared:  0.1443 
## F-statistic: 6.154 on 9 and 266 DF,  p-value: 7.102e-08
AIC(M1)
## [1] -318.5202
AIC(M2)
## [1] -230.9624
AIC(M3)
## [1] -237.02
BIC(M1)
## [1] -271.455
BIC(M2)
## [1] -198.3788
BIC(M3)
## [1] -197.1956
plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M1),col="blue")

plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M2),col="red")

plot.ts(test$V1, ylim=c(0,2.0), xlim=c(0.,12), col='darkgrey')
lines(predict(M3),col="green")

pred<-array(0,c(3,12))

pred[1,1:12] = predict(M1,  test)

pred[2,1:12] = predict(M2, data.frame(time=c(277:288)))

pred[3,1:12] = predict(M3, data.frame(time=c(277:288),
                                      sin1<-sin(2*pi*(4/n)*c(277:288)),
                                      cos1<-cos(2*pi*(4/n)*c(277:288)), 
                                      sin2<-sin(2*pi*(24/n)*c(277:288)),
                                      cos2<-cos(2*pi*(24/n)*c(277:288)),
                                      sin3<-sin(2*pi*(48/n)*c(277:288)),
                                      cos3<-cos(2*pi*(48/n)*c(277:288)),
                                      sin4<-sin(2*pi*(72/n)*c(277:288)),
                                      cos4<-cos(2*pi*(72/n)*c(277:288))
                                       ))
mae1<- mean(abs(test$V1 - pred[1,1:12]))
mae1
## [1] 0.05530652
mae2<- mean(abs(test$V1 - pred[2,1:12]))
mae2
## [1] 0.07602622
mae3<- mean(abs(test$V1 - pred[3,1:12]))
mae3
## [1] 0.1158031
mse1<- mean((test$V1 - pred[1,1:12])^2)
mse1
## [1] 0.004110149
mse2<- mean((test$V1 - pred[2,1:12])^2)
mse2
## [1] 0.01285999
mse3<- mean((test$V1 - pred[3,1:12])^2)
mse3
## [1] 0.01681781
mape1<- mean(abs(test$V1 - pred[1,1:12])/test$V1)
mape1
## [1] 0.09149061
mape2<- mean(abs(test$V1 - pred[2,1:12])/test$V1)
mape2
## [1] 0.1477129
mape3<- mean(abs(test$V1 - pred[3,1:12])/test$V1)
mape3
## [1] 0.1791411