###################################################
#
# Assignment 3
#
######################################################
#
#download the file Boston.dat from Blackboard to your MA611 work directory
#Under the file tab on R console, change the directory to your MA611 work directory
#Use the command dir() to see if Boston.dat is in the list of files
#Read the data and create a time series using the following commands ####
#### Analyzing time series data of robberies in Boston from Jan 1966 to Oct 1975 and forecasting for the months of Nov & Dec 1975. ####
## We would be using moving average decomposition, loess decomposition, Regression & Holt Winter Techniques
bosdata=scan(file='Boston.dat')
library(FinTS)
## Warning: package 'FinTS' was built under R version 3.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.2.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#To change the Boston.dat data into a ts object
tsbos=ts(bosdata,start=c(1966,1),frequency=12)
tsbos
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1966 41 39 50 40 43 38 44 35 39 35 29 49
## 1967 50 59 63 32 39 47 53 60 57 52 70 90
## 1968 74 62 55 84 94 70 108 139 120 97 126 149
## 1969 158 124 140 109 114 77 120 133 110 92 97 78
## 1970 99 107 112 90 98 125 155 190 236 189 174 178
## 1971 136 161 171 149 184 155 276 224 213 279 268 287
## 1972 238 213 257 293 212 246 353 339 308 247 257 322
## 1973 298 273 312 249 286 279 309 401 309 328 353 354
## 1974 327 324 285 243 241 287 355 460 364 487 452 391
## 1975 500 451 375 372 302 316 398 394 431 431
#
plot(tsbos)
## The above figure indicates that the robberies have been on the rise from 1966 to 1975
## After visual analysis, we could conclude the presence of Positive Trend and Seasonality.
## Further analysis to confirming these statements & for forecasts
bosdc=decompose(tsbos,type="additive")
str(bosdc)
## List of 6
## $ x : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
## $ seasonal: Time-Series [1:118] from 1966 to 1976: 10.24 -4.84 -8.76 -28.96 -30.82 ...
## $ trend : Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
## $ random : Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
## $ figure : num [1:12] 10.24 -4.84 -8.76 -28.96 -30.82 ...
## $ type : chr "additive"
## - attr(*, "class")= chr "decomposed.ts"
layout(1:4)
plot(tsbos)
plot(bosdc$seasonal)
plot(bosdc$trend)
plot(bosdc$random)

## Above plots shows the robbery data series along with seasonal, trend & random components of the Additive Decomposition Model
## Adding the Trend & Seasonal for the Additive Model ##
predad=bosdc$trend+bosdc$seasonal
layout(1:1)
plot(tsbos, lwd = 2)
lines(predad, col ="red", lwd = 2)

#### fit seems to be bad for the seasonal peaks and as it worse as the trend increases, We can clearly see the overestimation at the lower tail & underestimation at the upper tail ####
bosadresid = window(bosdc$random,start = c(1966, 7), end = c(1975, 4))
#### Removing some of the NA values from the data
rmsebosad = sqrt(sum(bosadresid^2)/length(bosadresid))
rmsebosad
## [1] 28.75867
#### RMSE is found to be 28.75 for the Additive Model ####
plot(bosadresid)
abline(0,0)

plot(bosdc$random)

## From the above plot, random fluctuations seems to increase with the level
## of the time series hence it could be interesting use of multiplicative method or even try transfromation
#### Multiplicative Decomposition Model ####
bosmt=decompose(tsbos,type="multiplicative")
layout(1:3)
plot(tsbos)
plot(bosmt$seasonal)
plot(bosmt$trend)

### The above shows the trend and seasonal components of the multiplicative model
## We are multiplying these components in this model
predmt=bosmt$trend * bosmt$seasonal
residmt=tsbos - predmt
residbosmt = window(residmt,start = c(1966, 7), end = c(1975, 4))
rmsemt=sqrt(sum(residbosmt^2)/length(residbosmt))
rmsemt
## [1] 27.10955
#### RMSE for multiplicative model is found to be 27.10, which is lower than the Additive Decomp model
layout(1:1)
plot(tsbos,lwd=2)
lines(predmt,col='red',lwd=2)

#### The fitted lines looks to be doing well at the lower tail and better than the Additive Model at the higher tail
plot(bosmt$random)

plot(residbosmt)
abline(0,0)

#### Finalizing the Decomposition Moving Average Models ####
### After looking at the RMSE value (though there's no right value) and comparison of fitted lines & resuidal plots, its safe to say Multiplicative Decomposition has a better fit
## Hence let's use the mutiplicative model for forecasting
##### Fit the trend line
mttr=bosmt$trend
plot(mttr)

str(mttr)
## Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
class(mttr)
## [1] "ts"
timetr=time(mttr)
timr = unclass(timetr)
class(timr)
## [1] "numeric"
mtreg= lm(mttr~timr)
timwin=window(timr,start = c(1966, 7), end = c(1975, 4))
plot(mttr,lwd=2)
lines(timwin,fitted(mtreg),col='red',lwd=2)

timr=as.ts(timr)
summary(mtreg)
##
## Call:
## lm(formula = mttr ~ timr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.249 -13.466 5.194 13.493 37.282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.570e+04 1.560e+03 -54.95 <2e-16 ***
## timr 4.358e+01 7.913e-01 55.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.77 on 104 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.9669, Adjusted R-squared: 0.9665
## F-statistic: 3033 on 1 and 104 DF, p-value: < 2.2e-16
### We are getting an adjusted R-Square value of 96.65 which is more that good.
### The pv-value of the F-test in regression suggests that all the variables in the model are significant
##### Prediction of 1975 Nov & Dec ####
forecastmt=predict(mtreg,data.frame(timr=c(1975.833,1975.917)),se.fit=TRUE)
forecastmt$fit
## 1 2
## 409.9966 413.6573
### Adding the seasonal Component ###
semt=tail(bosmt$seasonal)
forecastfinal=forecastmt$fit+semt[5:6]
forecaster=forecastmt$se.fit
forecastup=forecastfinal + 2 * forecaster
forecastlw=forecastfinal - 2 * forecaster
forecastup
## 1 2
## 419.8782 423.5956
forecastlw
## 1 2
## 402.2318 405.7123
#### Forecasts for November & December with 95% confidence interval are 420 to 402 & 424 to 406 respectively ####
######### END Of Question 1 ###############
#### Start of Question 2 ###################
##### LOESS decomposition model ####
# LOESS Model also uses both additive & Multiplicative methods for the fit
bosloess=stl(tsbos,s.window = "periodic")
str(bosloess)
## List of 8
## $ time.series: mts [1:118, 1:3] 6.8 -6.58 -8.46 -27.01 -34.46 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "seasonal" "trend" "remainder"
## ..- attr(*, "tsp")= num [1:3] 1966 1976 12
## ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
## $ weights : num [1:118] 1 1 1 1 1 1 1 1 1 1 ...
## $ call : language stl(x = tsbos, s.window = "periodic")
## $ win : Named num [1:3] 1181 19 13
## ..- attr(*, "names")= chr [1:3] "s" "t" "l"
## $ deg : Named int [1:3] 0 1 1
## ..- attr(*, "names")= chr [1:3] "s" "t" "l"
## $ jump : Named num [1:3] 119 2 2
## ..- attr(*, "names")= chr [1:3] "s" "t" "l"
## $ inner : int 2
## $ outer : int 0
## - attr(*, "class")= chr "stl"
plot(bosloess)

### Plot indicates the presence of trend, seasonal & random elements in the data ####
#### We are starting our analysis witht the Stl additive model ####
pred1=bosloess$time.series[,'seasonal'] + bosloess$time.series[,'trend']
plot(tsbos,lwd=2)
lines(pred1,col='red',lwd=2)

##### fitted line seems to be doing badly at both tails
##### missing the peaks as the trend increases (Overestimating at the lower tail & Underestimating at the higher tail)
rmse1=sqrt(sum(bosloess$time.series[,'remainder']^2)/length(bosloess$time.series[,'remainder']))
rmse1
## [1] 28.47619
#### rmse for loess additive is 28.47 #####
#### visualizing the remainder values ####
plot(bosloess$time.series[,'remainder'],ylab='REMAINDER')
abline(0,0)

#### Indicates the variance seem to be slightly increasing as the time increases
#### LOESS multiplicative model ####
## When using the LOESS multiplicative model we apply the natural log to turn the method into a additive model
logbos=log(tsbos)
plot(logbos,ylab='log(boston robbery)')

### Plot shows the distribution of the log(robbery data)
class(logbos)
## [1] "ts"
boslog=stl(logbos,s.window = 'periodic')
plot(boslog)

## Plot indicates the seasonal, trend & random elements in the data
#### seasonality seems to be constant over the time period and trend is linearly positive
pred2=boslog$time.series[,'seasonal'] + boslog$time.series[,'trend']
## We are adding seasonal & trend component because of the natural log
plot(logbos,lwd=1)
lines(pred2,col='red',lwd=3)

#### Fitted lines doesn't seem to fit well on the seasonal higher & lower peaks, We can see slight underestimation in the fit
## calculating the RMSE ####
residlg=tsbos - exp(pred2)
rsmelog=sqrt(sum(residlg^2)/length(residlg))
rsmelog
## [1] 27.0506
#### RSME for LOESS Multiplicative model is 27.0506 #####
#### Looking at the both the models(LOESS addictive & LOESS Multiplicative), the multiplicative one has the better RSME and fitted plot looks better as well, hence it's logical to choose the multiplicative model for forecast
## Forecasting for the last two months ##
trlogmt=boslog$time.series[,'trend']
plot(trlogmt)

#### There seems to be a positive trend with the increase in time, but we can see a dip in robberies just before the year 1970
str(trlogmt)
## Time-Series [1:118] from 1966 to 1976: 3.76 3.75 3.74 3.73 3.73 ...
class(trlogmt)
## [1] "ts"
timelogmt=time(trlogmt)
timelgmt=unclass(timelogmt)
logmtreg=lm(trlogmt~timelgmt)
summary(logmtreg)
##
## Call:
## lm(formula = trlogmt ~ timelgmt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34574 -0.11822 -0.00523 0.12403 0.28454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.067e+02 1.003e+01 -50.52 <2e-16 ***
## timelgmt 2.597e-01 5.089e-03 51.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1569 on 116 degrees of freedom
## Multiple R-squared: 0.9573, Adjusted R-squared: 0.957
## F-statistic: 2603 on 1 and 116 DF, p-value: < 2.2e-16
#### We get a good Adjusted R-squared of 95.7% & p-value is very small hence the variables are significant
plot(trlogmt,lwd=2)
lines(timelgmt,fitted(logmtreg),col='red',lwd=2)

#### Forecasting for the logmultiplicative model ####
predmt=predict(logmtreg,data.frame(timelgmt=c(1975.833,1975.917)),se.fit = TRUE)
predmt
## $fit
## 1 2
## 6.303092 6.324903
##
## $se.fit
## 1 2
## 0.02907656 0.02944834
##
## $df
## [1] 116
##
## $residual.scale
## [1] 0.1569314
##### Adding the seasonality value into the predicted values ####
seas=tail(boslog$time.series[,'seasonal'])
predval=predmt$fit + seas[5:6]
predval
## 1 2
## 6.377526 6.340839
prederlg=predmt$se.fit
prederlgup=predval + 2*prederlg
prederlglw=predval - 2*prederlg
forecastlogmtup=exp(prederlgup)
forecastlogmtlw=exp(prederlglw)
forecastlogmtup
## 1 2
## 623.7061 601.6857
forecastlogmtlw
## 1 2
## 555.2248 534.8262
##### 95% confidence intervals for the predicted armed robberies in Nov 1975 are 624 and 555, whereas for Dec 1975, they are 602 and 535. ####
##### End of Question 2 #####
##### Start of Question 3 (Linear Regression Model) #######
### Here we would be using function of time to represent the trend component and the seasona using dummy variables as factors ###
ltime=time(tsbos)
ltime
## Jan Feb Mar Apr May Jun Jul
## 1966 1966.000 1966.083 1966.167 1966.250 1966.333 1966.417 1966.500
## 1967 1967.000 1967.083 1967.167 1967.250 1967.333 1967.417 1967.500
## 1968 1968.000 1968.083 1968.167 1968.250 1968.333 1968.417 1968.500
## 1969 1969.000 1969.083 1969.167 1969.250 1969.333 1969.417 1969.500
## 1970 1970.000 1970.083 1970.167 1970.250 1970.333 1970.417 1970.500
## 1971 1971.000 1971.083 1971.167 1971.250 1971.333 1971.417 1971.500
## 1972 1972.000 1972.083 1972.167 1972.250 1972.333 1972.417 1972.500
## 1973 1973.000 1973.083 1973.167 1973.250 1973.333 1973.417 1973.500
## 1974 1974.000 1974.083 1974.167 1974.250 1974.333 1974.417 1974.500
## 1975 1975.000 1975.083 1975.167 1975.250 1975.333 1975.417 1975.500
## Aug Sep Oct Nov Dec
## 1966 1966.583 1966.667 1966.750 1966.833 1966.917
## 1967 1967.583 1967.667 1967.750 1967.833 1967.917
## 1968 1968.583 1968.667 1968.750 1968.833 1968.917
## 1969 1969.583 1969.667 1969.750 1969.833 1969.917
## 1970 1970.583 1970.667 1970.750 1970.833 1970.917
## 1971 1971.583 1971.667 1971.750 1971.833 1971.917
## 1972 1972.583 1972.667 1972.750 1972.833 1972.917
## 1973 1973.583 1973.667 1973.750 1973.833 1973.917
## 1974 1974.583 1974.667 1974.750 1974.833 1974.917
## 1975 1975.583 1975.667 1975.750
lseas=cycle(tsbos)
ldata=coredata(tsbos)
ldata
## [1] 41 39 50 40 43 38 44 35 39 35 29 49 50 59 63 32 39
## [18] 47 53 60 57 52 70 90 74 62 55 84 94 70 108 139 120 97
## [35] 126 149 158 124 140 109 114 77 120 133 110 92 97 78 99 107 112
## [52] 90 98 125 155 190 236 189 174 178 136 161 171 149 184 155 276 224
## [69] 213 279 268 287 238 213 257 293 212 246 353 339 308 247 257 322 298
## [86] 273 312 249 286 279 309 401 309 328 353 354 327 324 285 243 241 287
## [103] 355 460 364 487 452 391 500 451 375 372 302 316 398 394 431 431
lseas
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1966 1 2 3 4 5 6 7 8 9 10 11 12
## 1967 1 2 3 4 5 6 7 8 9 10 11 12
## 1968 1 2 3 4 5 6 7 8 9 10 11 12
## 1969 1 2 3 4 5 6 7 8 9 10 11 12
## 1970 1 2 3 4 5 6 7 8 9 10 11 12
## 1971 1 2 3 4 5 6 7 8 9 10 11 12
## 1972 1 2 3 4 5 6 7 8 9 10 11 12
## 1973 1 2 3 4 5 6 7 8 9 10 11 12
## 1974 1 2 3 4 5 6 7 8 9 10 11 12
## 1975 1 2 3 4 5 6 7 8 9 10
### Extracting the seasonality, time & the core data from the time series dataset
## Regression function ##
lnreg=lm(ldata~0+ltime+factor(lseas))
summary(lnreg)
##
## Call:
## lm(formula = ldata ~ 0 + ltime + factor(lseas))
##
## Residuals:
## Min 1Q Median 3Q Max
## -90.911 -28.339 -0.222 23.333 119.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ltime 41.978 1.349 31.11 <2e-16 ***
## factor(lseas)1 -82525.111 2658.540 -31.04 <2e-16 ***
## factor(lseas)2 -82539.409 2658.652 -31.05 <2e-16 ***
## factor(lseas)3 -82542.207 2658.765 -31.05 <2e-16 ***
## factor(lseas)4 -82561.606 2658.877 -31.05 <2e-16 ***
## factor(lseas)5 -82569.904 2658.989 -31.05 <2e-16 ***
## factor(lseas)6 -82570.702 2659.102 -31.05 <2e-16 ***
## factor(lseas)7 -82521.100 2659.214 -31.03 <2e-16 ***
## factor(lseas)8 -82504.198 2659.327 -31.02 <2e-16 ***
## factor(lseas)9 -82526.496 2659.439 -31.03 <2e-16 ***
## factor(lseas)10 -82524.994 2659.552 -31.03 <2e-16 ***
## factor(lseas)11 -82528.315 2658.993 -31.04 <2e-16 ***
## factor(lseas)12 -82523.813 2659.106 -31.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.47 on 105 degrees of freedom
## Multiple R-squared: 0.9721, Adjusted R-squared: 0.9686
## F-statistic: 281 on 13 and 105 DF, p-value: < 2.2e-16
### Adjusted R Square is 96.86 which is a good value and the P-value indicates that time variable is sufficient to predict the robberies in Boston
plot(lnreg)




### About the fitted vs Residual
## The Fitted Vs Residual plots above indicates the non-linear relationship between them hence we may have to look at other models for a better fit
##### The model has a good Adjusted R-Square value of 96.86% with very low p-values hence making all the varibales significant
lnresid=resid(lnreg)
lnhat=fitted(lnreg)
lnresid
## 1 2 3 4 5 6
## 37.8000000 46.6000000 56.9000000 62.8000000 70.6000000 62.9000000
## 7 8 9 10 11 12
## 15.8000000 -13.6000000 9.2000000 0.2000000 -5.9777778 6.0222222
## 13 14 15 16 17 18
## 4.8222222 24.6222222 27.9222222 12.8222222 24.6222222 29.9222222
## 19 20 21 22 23 24
## -17.1777778 -30.5777778 -14.7777778 -24.7777778 -6.9555556 5.0444444
## 25 26 27 28 29 30
## -13.1555556 -14.3555556 -22.0555556 22.8444444 37.6444444 10.9444444
## 31 32 33 34 35 36
## -4.1555556 6.4444444 6.2444444 -21.7555556 7.0666667 22.0666667
## 37 38 39 40 41 42
## 28.8666667 5.6666667 20.9666667 5.8666667 15.6666667 -24.0333333
## 43 44 45 46 47 48
## -34.1333333 -41.5333333 -45.7333333 -68.7333333 -63.9111111 -90.9111111
## 49 50 51 52 53 54
## -72.1111111 -53.3111111 -49.0111111 -55.1111111 -42.3111111 -18.0111111
## 55 56 57 58 59 60
## -41.1111111 -26.5111111 38.2888889 -13.7111111 -28.8888889 -32.8888889
## 61 62 63 64 65 66
## -77.0888889 -41.2888889 -31.9888889 -38.0888889 1.7111111 -29.9888889
## 67 68 69 70 71 72
## 37.9111111 -34.4888889 -26.6888889 34.3111111 23.1333333 34.1333333
## 73 74 75 76 77 78
## -17.0666667 -31.2666667 12.0333333 63.9333333 -12.2666667 19.0333333
## 79 80 81 82 83 84
## 72.9333333 38.5333333 26.3333333 -39.6666667 -29.8444444 27.1555556
## 85 86 87 88 89 90
## 0.9555556 -13.2444444 25.0555556 -22.0444444 19.7555556 10.0555556
## 91 92 93 94 95 96
## -13.0444444 58.5555556 -14.6444444 -0.6444444 24.1777778 17.1777778
## 97 98 99 100 101 102
## -12.0222222 -4.2222222 -43.9222222 -70.0222222 -67.2222222 -23.9222222
## 103 104 105 106 107 108
## -9.0222222 75.5777778 -1.6222222 116.3777778 81.2000000 12.2000000
## 109 110 111 112 113 114
## 119.0000000 80.8000000 4.1000000 17.0000000 -48.2000000 -36.9000000
## 115 116 117 118
## -8.0000000 -32.4000000 23.4000000 18.4000000
ltime=unclass(ltime)
class(lnhat)
## [1] "numeric"
plot(tsbos)
lines(ltime,lnhat,col='red',lwd=2)

rmseln=sqrt(sum(lnresid^2)/length(lnresid))
rmseln
## [1] 39.1229
### RMSE value for Linear model of time is 39.12 ###
### Fitted lines seems to be doing badly as the trend increases, mostly at the higher tail it is undersetimating
### Hence this may not be the optimial model
### We must check for Auto-correaltions
#### Histogram of Residuals ####
hist(lnresid)

### Histogram indicates the slight left skewness of the residuals
acf(lnresid)

### The plot indicates the correlations between the time series and their lags.
## In the above plot significant evidence that the last 4 months of data are significant in the time series estimation
AutocorTest(lnresid)
##
## Box-Ljung test
##
## data: lnresid
## X-squared = 85.372, df = 5, p-value < 2.2e-16
## The Bos-Ljung values is small hence we can conclude that the auto-correlation is less between the residual hence the fit is good.
#### Quardratic Model of Time ####
# Testing to see if the fit has improved or not #
lnregqu=lm(ldata~0+ltime + I(ltime^2)+factor(lseas))
summary(lnregqu)
##
## Call:
## lm(formula = ldata ~ 0 + ltime + I(ltime^2) + factor(lseas))
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.785 -20.525 -7.814 23.835 102.406
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ltime -8.271e+03 1.941e+03 -4.260 4.50e-05 ***
## I(ltime^2) 2.109e+00 4.925e-01 4.282 4.14e-05 ***
## factor(lseas)1 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)2 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)3 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)4 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)5 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)6 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)7 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)8 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)9 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)10 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)11 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## factor(lseas)12 8.109e+06 1.913e+06 4.239 4.88e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.42 on 104 degrees of freedom
## Multiple R-squared: 0.9762, Adjusted R-squared: 0.9731
## F-statistic: 305.3 on 14 and 104 DF, p-value: < 2.2e-16
## The Adjusted R-Square value seems to be increased from the linear model of time
plot(lnregqu)




## The Residual vs Fitted line has become more straigther than linear model also, which means its closer to the linear trend and relationship between the residuals
## Normal QQ plots indicates better Normality in the residual of quadratic model than the linear.
quresid=resid(lnregqu)
quhat=fitted(lnregqu)
rmsequ=sqrt(sum(quresid^2)/length(quresid))
rmsequ
## [1] 36.07243
### RMSE for the quardratic is found to be 36.07 ###
ltime=unclass(ltime)
class(quhat)
## [1] "numeric"
plot(tsbos)
lines(ltime,quhat,col='red',lwd=2)

## Fit gets underestimated as the trend increases ##
hist(quresid)

## Historgram of residuals indicates a better normal distribution of resiual values when compared to the linear regression model
acf(quresid)

## ACF plot of the quadartic model of time is looks slightly better than the linear one
AutocorTest(quresid)
##
## Box-Ljung test
##
## data: quresid
## X-squared = 55.621, df = 5, p-value = 9.724e-11
## After looking at the residual plots and the RMSE value of the linear & Quadratic model of time(also the ACF plots), it's right to conclude that the Quadratic function has improved the fitted hence lets use the same for forecasting.
timpr=c(1975.833,1974.917)
seapr=c(11:12)
predreg=predict(lnregqu,data.frame(ltime=timpr,lseas=seapr),se.fit = TRUE )
predrval=predreg$fit
predreger=predreg$se.fit
class(predreger)
## [1] "numeric"
predupse=predrval + 2 * predreger
predlwse=predrval - 2 * predreger
predupse
## 1 2
## 484.0639 428.3073
predlwse
## 1 2
## 417.0189 370.1042
### 95% confidence intervals for the predicted robberies in Nov 1975 are 417 and 484, whereas for Dec 1975, they are 428 and 370.
######## End of Question 3 #######
##### Start of Question 4 (Holt-Winter Model) ######
## Model uses exponential weighted moving averages method ##
hwbos=HoltWinters(tsbos,seasonal = 'multiplicative')
str(hwbos)
## List of 9
## $ fitted : mts [1:106, 1:4] 47.1 56 59.6 30.4 36.6 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:4] "xhat" "level" "trend" "season"
## ..- attr(*, "tsp")= num [1:3] 1967 1976 12
## ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
## $ x : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
## $ alpha : Named num 0.213
## ..- attr(*, "names")= chr "alpha"
## $ beta : Named num 0.00645
## ..- attr(*, "names")= chr "beta"
## $ gamma : Named num 0.425
## ..- attr(*, "names")= chr "gamma"
## $ coefficients: Named num [1:14] 355.5 2.01 1.16 1.13 1.15 ...
## ..- attr(*, "names")= chr [1:14] "a" "b" "s1" "s2" ...
## $ seasonal : chr "multiplicative"
## $ SSE : num 193990
## $ call : language HoltWinters(x = tsbos, seasonal = "multiplicative")
## - attr(*, "class")= chr "HoltWinters"
hwbos$fitted
## xhat level trend season
## Jan 1967 47.10544 38.60606 1.038170 1.1882042
## Feb 1967 55.96233 40.16347 1.041519 1.3581447
## Mar 1967 59.60059 41.68171 1.044594 1.3949390
## Apr 1967 30.43937 43.24573 1.047945 0.6872172
## May 1967 36.56040 44.77771 1.051067 0.7977609
## Jun 1967 42.82511 46.48058 1.055271 0.9009013
## Jul 1967 56.00360 48.52358 1.061642 1.1294412
## Aug 1967 43.68749 49.01840 1.057986 0.8724170
## Sep 1967 51.89885 54.06176 1.083692 0.9411266
## Oct 1967 48.24019 56.30075 1.091144 0.8405402
## Nov 1967 41.88190 58.34530 1.097294 0.7045773
## Dec 1967 81.86587 67.94868 1.152160 1.1847304
## Jan 1968 86.95338 70.56424 1.161599 1.2123020
## Feb 1968 97.59894 69.44841 1.146909 1.3825128
## Mar 1968 94.11124 65.10699 1.111509 1.4212228
## Apr 1968 42.92925 60.35291 1.073674 0.6988709
## May 1968 61.23548 73.95246 1.154469 0.8153107
## Jun 1968 78.91238 83.67243 1.209718 0.9296699
## Jul 1968 93.19124 82.83882 1.196538 1.1089527
## Aug 1968 85.74517 86.88165 1.214897 0.9733091
## Sep 1968 98.16112 99.75875 1.290121 0.9714222
## Oct 1968 92.38269 105.84062 1.321028 0.8620872
## Nov 1968 92.41333 108.30324 1.328392 0.8429440
## Dec 1968 146.19032 118.12423 1.383171 1.2232741
## Jan 1969 139.58303 119.99697 1.386329 1.1499360
## Feb 1969 151.40690 124.79693 1.408347 1.1996876
## Mar 1969 147.81230 121.33600 1.376940 1.2045371
## Apr 1969 108.53532 121.33055 1.368023 0.8845687
## May 1969 117.50383 122.81054 1.368745 0.9462434
## Jun 1969 111.49189 123.39004 1.363654 0.8936961
## Jul 1969 137.39274 116.52749 1.310594 1.1659452
## Aug 1969 133.55054 114.65856 1.290085 1.1518077
## Sep 1969 121.87027 115.84676 1.289428 1.0404152
## Oct 1969 101.63657 114.70440 1.273742 0.8763424
## Nov 1969 107.77149 113.63434 1.258624 0.9380165
## Dec 1969 139.96193 112.44538 1.242837 1.2311033
## Jan 1970 124.88621 102.96061 1.173642 1.1992808
## Feb 1970 113.17783 99.53359 1.143967 1.1241615
## Mar 1970 119.06100 99.50623 1.136411 1.1830076
## Apr 1970 89.02511 99.37045 1.128205 0.8858339
## May 1970 95.41996 100.73323 1.129718 0.9367485
## Jun 1970 82.32023 102.44999 1.133505 0.7947234
## Jul 1970 129.63085 115.03015 1.207338 1.1152241
## Aug 1970 140.70015 121.08609 1.238613 1.1502187
## Sep 1970 133.52953 131.46031 1.297539 1.0058127
## Oct 1970 132.20976 154.47256 1.437603 0.8479868
## Nov 1970 155.57070 170.18454 1.529676 0.9059862
## Dec 1970 182.91427 176.04992 1.557642 1.0298789
## Jan 1971 198.15038 176.59050 1.551082 1.1123196
## Feb 1971 185.04776 166.23227 1.474264 1.1034022
## Mar 1971 190.70275 163.06124 1.444301 1.1592482
## Apr 1971 144.29947 160.88292 1.420935 0.8890699
## May 1971 155.81959 163.43075 1.428203 0.9451691
## Jun 1971 158.65861 171.21388 1.469194 0.9187849
## Jul 1971 205.40654 171.83433 1.463719 1.1852790
## Aug 1971 239.22612 185.99257 1.545602 1.2756129
## Sep 1971 228.97947 184.99402 1.529191 1.2276191
## Oct 1971 177.76915 183.74880 1.511296 0.9595652
## Nov 1971 197.04526 207.74602 1.656335 0.9409887
## Dec 1971 231.90942 225.47434 1.760002 1.0205738
## Jan 1972 237.52036 238.73984 1.834215 0.9873066
## Feb 1972 255.63008 240.67760 1.834883 1.0540904
## Mar 1972 263.55145 233.89241 1.779281 1.1182991
## Apr 1972 212.26467 234.42301 1.771227 0.8986869
## May 1972 257.28954 255.34243 1.894737 1.0002036
## Jun 1972 227.38624 247.58595 1.832485 0.9116657
## Jul 1972 335.43592 253.77026 1.860555 1.3121889
## Aug 1972 324.95677 258.48381 1.878957 1.2480923
## Sep 1972 317.20042 262.76101 1.894426 1.1985411
## Oct 1972 297.35310 263.01927 1.883873 1.1224974
## Nov 1972 269.04814 255.34192 1.822201 1.0462118
## Dec 1972 281.58563 254.70957 1.806369 1.0977315
## Jan 1973 263.01824 264.36309 1.856984 0.9879730
## Feb 1973 273.78346 273.76698 1.905663 0.9931470
## Mar 1973 307.63404 275.50450 1.904579 1.1089545
## Apr 1973 281.39354 278.24823 1.909991 1.0044094
## May 1973 258.37636 273.28405 1.865652 0.9390392
## Jun 1973 265.24719 281.41973 1.906095 0.9361914
## Jul 1973 384.96551 286.45695 1.926291 1.3349095
## Aug 1973 352.06669 276.25390 1.848054 1.2659627
## Sep 1973 342.09830 286.34060 1.901195 1.1868449
## Oct 1973 300.23276 282.29773 1.862855 1.0565602
## Nov 1973 300.52638 289.76217 1.898986 1.0303956
## Dec 1973 349.80660 302.51564 1.969000 1.1488481
## Jan 1974 316.66836 305.26264 1.974018 1.0306985
## Feb 1974 308.93117 309.37319 1.987799 0.9921961
## Mar 1974 352.76357 314.59808 2.008679 1.1142010
## Apr 1974 294.80520 303.64377 1.925065 0.9647751
## May 1974 287.64633 294.12373 1.851242 0.9718603
## Jun 1974 273.79883 285.74473 1.785255 0.9522444
## Jul 1974 363.30462 290.48484 1.804314 1.2429631
## Aug 1974 387.21981 290.86507 1.795129 1.3231037
## Sep 1974 351.47134 304.38463 1.870753 1.1476414
## Oct 1974 337.97612 308.58225 1.885762 1.0886021
## Nov 1974 371.92643 339.64625 2.073968 1.0883946
## Dec 1974 414.75027 357.40128 2.175114 1.1534413
## Jan 1975 372.29417 355.18759 2.146805 1.0418649
## Feb 1975 388.94364 383.46033 2.315323 1.0082120
## Mar 1975 417.17820 398.89484 2.399944 1.0395804
## Apr 1975 357.81513 392.64704 2.344164 0.9058812
## May 1975 367.54749 398.32875 2.365692 0.9172762
## Jun 1975 375.10637 385.46344 2.267449 0.9674400
## Jul 1975 464.86499 374.70874 2.183454 1.2334163
## Aug 1975 515.54688 365.33742 2.108923 1.4030535
## Sep 1975 407.55404 348.98166 1.989822 1.1612170
## Oct 1975 441.36740 355.27503 2.017581 1.2353107
plot(tsbos,lwd=1)
lines(hwbos$fitted[,1],col='red',lwd=2)

### Fitted lines looks to miss the peaks at the middle as well as at the upper tail
hwresid=tsbos - hwbos$fitted[,'xhat']
rmsehwm=sqrt(sum(hwresid^2)/length(hwresid))
rmsehwm
## [1] 42.77963
#### RMSE for the holtwinter Multiplicative model is found to be 42.779 ####
plot(hwresid)

### Holt Winters Additive model ###
hwbosad=HoltWinters(tsbos,seasonal = 'additive')
str(hwbosad)
## List of 9
## $ fitted : mts [1:106, 1:4] 47.8 57.7 61.7 31.1 37.3 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:4] "xhat" "level" "trend" "season"
## ..- attr(*, "tsp")= num [1:3] 1967 1976 12
## ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
## $ x : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
## $ alpha : Named num 0.6
## ..- attr(*, "names")= chr "alpha"
## $ beta : Named num 0.000254
## ..- attr(*, "names")= chr "beta"
## $ gamma : Named num 0.205
## ..- attr(*, "names")= chr "gamma"
## $ coefficients: Named num [1:14] 421.6 1.11 -6.89 2.33 5.76 ...
## ..- attr(*, "names")= chr [1:14] "a" "b" "s1" "s2" ...
## $ seasonal : chr "additive"
## $ SSE : num 170403
## $ call : language HoltWinters(x = tsbos, seasonal = "additive")
## - attr(*, "class")= chr "HoltWinters"
hwbosad$fitted
## xhat level trend season
## Jan 1967 47.79006 38.60606 1.038170 8.14583333
## Feb 1967 57.73686 40.96919 1.038507 15.72916667
## Mar 1967 61.74121 42.76501 1.038699 17.93750000
## Apr 1967 31.07647 44.55841 1.038891 -14.52083333
## May 1967 37.25253 46.15100 1.039032 -9.93750000
## Jun 1967 43.92285 48.23772 1.039298 -5.35416667
## Jul 1967 57.55751 51.12191 1.039766 5.39583333
## Aug 1967 45.65581 49.42924 1.039072 -4.81250000
## Sep 1967 57.92208 59.06832 1.041257 -2.18750000
## Oct 1967 54.20203 59.55675 1.041117 -6.39583333
## Nov 1967 48.42259 59.27764 1.040781 -11.89583333
## Dec 1967 82.19499 73.25509 1.044068 7.89583333
## Jan 1968 88.35090 78.97862 1.045257 8.32701705
## Feb 1968 88.29564 71.41985 1.043071 15.83272609
## Mar 1968 75.77722 56.69745 1.039066 18.04070348
## Apr 1968 31.87039 45.27960 1.035901 -14.44511677
## May 1968 68.81925 77.56964 1.043841 -9.79423232
## Jun 1968 89.65631 93.71052 1.047677 -5.10188378
## Jul 1968 89.04018 82.97332 1.044683 5.02218130
## Aug 1968 92.79639 95.38530 1.047571 -3.63647784
## Sep 1968 122.92560 124.13409 1.054608 -2.26309749
## Oct 1968 117.91246 123.43466 1.054163 -6.57636910
## Nov 1968 102.87502 111.95083 1.050977 -10.12678892
## Dec 1968 136.45655 126.86631 1.054500 8.53573456
## Jan 1969 143.64805 135.44120 1.056410 7.15044490
## Feb 1969 159.83772 145.10227 1.058596 13.67685244
## Mar 1969 142.06488 124.67448 1.053138 16.33726306
## Apr 1969 115.37123 124.48963 1.052823 -10.17122059
## May 1969 115.04469 121.72260 1.051853 -7.72976431
## Jun 1969 116.48638 122.14811 1.051693 -6.71342515
## Jul 1969 107.14818 99.52588 1.045679 6.57662012
## Aug 1969 109.47603 108.27682 1.047636 0.15156945
## Sep 1969 121.97644 123.42818 1.051220 -2.50295551
## Oct 1969 110.05746 117.29896 1.049395 -8.29089691
## Nov 1969 100.33785 107.52207 1.046645 -8.23086531
## Dec 1969 117.17778 106.56752 1.046136 9.56412149
## Jan 1970 93.49202 84.12475 1.040169 8.32710299
## Feb 1970 100.24688 88.46721 1.041008 10.73866271
## Mar 1970 110.76704 93.55703 1.042037 16.16797172
## Apr 1970 85.68694 95.33828 1.042224 -10.69357185
## May 1970 92.19386 98.96639 1.042881 -7.81541409
## Jun 1970 94.58334 103.49033 1.043766 -9.95075397
## Jul 1970 131.44899 122.77030 1.048399 7.63028913
## Aug 1970 141.07082 137.93863 1.051986 2.08020463
## Sep 1970 165.90053 168.32594 1.059439 -3.48485564
## Oct 1970 202.71205 211.41329 1.070116 -9.77135510
## Nov 1970 196.82590 204.26239 1.068028 -8.50452187
## Dec 1970 199.06187 191.64523 1.064551 6.35209406
## Jan 1971 189.92223 180.08220 1.061343 8.77867992
## Feb 1971 161.16011 148.81466 1.053129 11.29232379
## Mar 1971 167.09395 149.77179 1.053105 16.26905747
## Apr 1971 143.88049 153.16675 1.053700 -10.33996118
## May 1971 151.00493 157.28984 1.054480 -7.33939201
## Jun 1971 171.72890 178.12640 1.059506 -7.45701472
## Jul 1971 179.77426 169.15616 1.056957 9.56114142
## Aug 1971 235.06826 227.90494 1.071614 6.09171031
## Sep 1971 225.67286 222.34061 1.069929 2.26231693
## Oct 1971 205.98502 215.81257 1.067998 -10.89555057
## Nov 1971 251.35965 260.65646 1.079120 -10.37592488
## Dec 1971 277.41921 271.71224 1.081654 4.62531620
## Jan 1972 283.97895 278.53803 1.083114 4.35781456
## Feb 1972 264.40992 252.05461 1.076110 11.27919716
## Mar 1972 239.96565 222.30808 1.068280 16.58929834
## Apr 1972 224.73988 233.58924 1.070874 -9.92023345
## May 1972 272.03225 275.58524 1.081272 -4.63425918
## Jun 1972 232.91795 240.67438 1.072128 -8.82854922
## Jul 1972 268.11422 249.58980 1.074120 17.45030129
## Aug 1972 307.82822 301.55691 1.087050 5.18426812
## Sep 1972 323.64801 321.33289 1.091798 1.22332051
## Oct 1972 309.22304 313.04298 1.089414 -4.90934740
## Nov 1972 268.89506 276.82677 1.079937 -9.01165010
## Dec 1972 277.26400 270.77507 1.078125 5.41080618
## Jan 1973 300.34764 298.67451 1.084939 0.58818595
## Feb 1973 306.50082 298.35193 1.084581 7.06430575
## Mar 1973 298.41656 279.35121 1.079479 17.98587553
## Apr 1973 285.33228 288.57459 1.081548 -4.32386226
## May 1973 259.39320 267.87324 1.076014 -9.55606106
## Jun 1973 278.22534 284.90128 1.080066 -7.75600512
## Jul 1973 311.93572 286.44579 1.080184 24.40974315
## Aug 1973 294.58553 285.76587 1.079737 7.73991604
## Sep 1973 351.68240 350.64605 1.095946 -0.05959649
## Oct 1973 317.23059 326.15191 1.089445 -10.01076326
## Nov 1973 324.80232 333.69812 1.091085 -9.98687793
## Dec 1973 361.86893 351.69502 1.095380 9.07853023
## Jan 1974 349.56251 348.07261 1.094182 0.39571264
## Feb 1974 341.04797 335.63952 1.090745 4.31770889
## Mar 1974 346.69688 326.50921 1.088148 19.09952658
## Apr 1974 284.38335 290.60720 1.078751 -7.30259884
## May 1974 260.57247 266.87470 1.072447 -7.37467676
## Jun 1974 249.58951 256.21254 1.069466 -7.69249380
## Jul 1974 304.95556 279.71134 1.075164 24.16905558
## Aug 1974 328.33768 310.79048 1.082787 16.46440851
## Sep 1974 388.35486 390.81098 1.102842 -3.55895425
## Oct 1974 369.28325 377.31194 1.099132 -9.12782276
## Nov 1974 442.42976 448.98776 1.117062 -7.67506443
## Dec 1974 465.39454 455.84263 1.118520 8.43338806
## Jan 1975 412.01124 412.35815 1.107188 -1.45409596
## Feb 1975 470.25931 466.21871 1.120591 2.92001461
## Mar 1975 470.95135 455.79244 1.117657 14.04124854
## Apr 1975 389.79038 399.38279 1.103042 -10.69545222
## May 1975 381.94066 389.81967 1.100332 -8.97934433
## Jun 1975 339.45464 342.99185 1.088156 -4.62535884
## Jul 1975 359.37443 330.01785 1.084583 28.27199713
## Aug 1975 382.60960 354.26026 1.090466 27.25887028
## Sep 1975 357.71630 362.17981 1.092201 -5.55571099
## Oct 1975 408.83567 407.20901 1.103364 0.52329785
plot(tsbos,lwd=1)
lines(hwbos$fitted[,1],col='red',lwd=2)

hwresidad=tsbos - hwbosad$fitted[,'xhat']
plot(hwresidad)

rmsehwad=sqrt(sum(hwresidad^2)/length(hwresidad))
rmsehwad
## [1] 40.09463
### RMSE for Holt Winter Additive is found to be 40.094
## Since RMSE is smaller for holt winter additive model than the multiplicative model, let's choose additive model for the forecast
## Even the residual plot looks more spread out and constant variance than the multiplicative method
prehwad=predict(hwbosad,n.ahead = 10,prediction.interval = TRUE)
plot(hwbosad,predicted.values = prehwad)

prehwad
## fit upr lwr
## Nov 1975 415.8172 494.3304 337.3040
## Dec 1975 426.1485 517.6976 334.5993
## Jan 1976 430.6809 533.6338 327.7280
## Feb 1976 427.3689 540.5876 314.1502
## Mar 1976 433.3092 555.9420 310.6765
## Apr 1976 416.0873 547.4655 284.7092
## May 1976 413.8147 553.3955 274.2340
## Jun 1976 423.9065 571.2377 276.5753
## Jul 1976 463.0003 617.6978 308.3029
## Aug 1976 460.8610 622.5931 299.1290
##95% confidence intervals for the predicted armed robberies in Nov 1975 are 337 and 494, whereas for Dec 1975, they are 518 and 335
###### End of Question 4 #######
#### Start of Question 5 ######
# RMSE of Decomposition Additive Model --------- 28.75
# RMSE of Decomposition Multiplicative Model --- 27.10
# RMSE of LOESS Additive Model ----------------- 28.47
# RMSE of LOESS Multiplicative Model ----------- 27.05
# RMSE of Linear Model of time ----------------- 39.12
# RMSE of Quadratic Model of time -------------- 36.07
# RMSE of Holt-Winters Multiplicative Model ---- 42.78
# RMSE of Holt-Winters Additive Model ---------- 40.094
## Looking at the RMSE values of all the models LOESS Multiplicative & Decomposition Multiplicative seems to be best among the lot
plot(residmt)

## Residual of Decomposition Multiplicative model ##
plot(residlg)

## Residual of LOESS Multiplicative model ##
## From the RMSE values there's no much difference between the two models
## But when we look at their residual plots, Decompose multiplicative model seems to have lesser variance on it's residuals
## Hence Let's pick Decomposition Multiplicative model
#### End of Question 5 ####