###################################################
#
# Assignment 3
#
######################################################
#
#download the file Boston.dat from Blackboard to your MA611 work directory
#Under the file tab on R console, change the directory to your MA611 work directory
#Use the command dir() to see if Boston.dat is in the list of files
#Read the data and create a time series using the following commands ####
#### Analyzing time series data of robberies in Boston from Jan 1966 to Oct 1975 and forecasting for the months of Nov & Dec 1975. ####
## We would be using moving average decomposition, loess decomposition, Regression & Holt Winter Techniques 
bosdata=scan(file='Boston.dat')
library(FinTS)
## Warning: package 'FinTS' was built under R version 3.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.2.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
#To change the Boston.dat data into a ts object
tsbos=ts(bosdata,start=c(1966,1),frequency=12)
tsbos
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1966  41  39  50  40  43  38  44  35  39  35  29  49
## 1967  50  59  63  32  39  47  53  60  57  52  70  90
## 1968  74  62  55  84  94  70 108 139 120  97 126 149
## 1969 158 124 140 109 114  77 120 133 110  92  97  78
## 1970  99 107 112  90  98 125 155 190 236 189 174 178
## 1971 136 161 171 149 184 155 276 224 213 279 268 287
## 1972 238 213 257 293 212 246 353 339 308 247 257 322
## 1973 298 273 312 249 286 279 309 401 309 328 353 354
## 1974 327 324 285 243 241 287 355 460 364 487 452 391
## 1975 500 451 375 372 302 316 398 394 431 431
#
plot(tsbos)
## The above figure indicates that the robberies have been on the rise from 1966 to 1975
## After visual analysis, we could conclude the presence of Positive Trend and Seasonality.
## Further analysis to confirming these statements & for forecasts

bosdc=decompose(tsbos,type="additive")
str(bosdc)
## List of 6
##  $ x       : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
##  $ seasonal: Time-Series [1:118] from 1966 to 1976: 10.24 -4.84 -8.76 -28.96 -30.82 ...
##  $ trend   : Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
##  $ random  : Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
##  $ figure  : num [1:12] 10.24 -4.84 -8.76 -28.96 -30.82 ...
##  $ type    : chr "additive"
##  - attr(*, "class")= chr "decomposed.ts"
layout(1:4)
plot(tsbos)
plot(bosdc$seasonal)
plot(bosdc$trend)
plot(bosdc$random)

## Above plots shows the robbery data series along with seasonal, trend & random components of the Additive Decomposition Model

## Adding the Trend & Seasonal for the Additive Model ##
predad=bosdc$trend+bosdc$seasonal
layout(1:1)
plot(tsbos, lwd = 2)
lines(predad, col ="red", lwd = 2)

#### fit seems to be bad for the seasonal peaks and as it worse as the trend increases, We can clearly see the overestimation at the lower tail & underestimation at the upper tail ####


bosadresid = window(bosdc$random,start = c(1966, 7), end = c(1975, 4))
#### Removing some of the NA values from the data

rmsebosad = sqrt(sum(bosadresid^2)/length(bosadresid))
rmsebosad
## [1] 28.75867
#### RMSE is found to be 28.75 for the Additive Model ####
plot(bosadresid)
abline(0,0)

plot(bosdc$random)

## From the above plot, random fluctuations seems to increase with the level
## of the time series hence it could be interesting use of multiplicative method or even try transfromation


#### Multiplicative Decomposition Model ####
bosmt=decompose(tsbos,type="multiplicative")
layout(1:3)
plot(tsbos)
plot(bosmt$seasonal)
plot(bosmt$trend)

### The above shows the trend and seasonal components of the multiplicative model

## We are multiplying these components in this model
predmt=bosmt$trend * bosmt$seasonal
residmt=tsbos - predmt
residbosmt = window(residmt,start = c(1966, 7), end = c(1975, 4))
rmsemt=sqrt(sum(residbosmt^2)/length(residbosmt))
rmsemt
## [1] 27.10955
#### RMSE for multiplicative model is found to be 27.10, which is lower than the Additive Decomp model

layout(1:1)
plot(tsbos,lwd=2)
lines(predmt,col='red',lwd=2)

#### The fitted lines looks to be doing well at the lower tail and better than the Additive Model at the higher tail

plot(bosmt$random)

plot(residbosmt)
abline(0,0)

#### Finalizing the Decomposition Moving Average Models ####
### After looking at the RMSE value (though there's no right value) and comparison of fitted lines & resuidal plots, its safe to say Multiplicative Decomposition has a better fit
## Hence let's use the mutiplicative model for forecasting

##### Fit the trend line
mttr=bosmt$trend
plot(mttr)

str(mttr)
##  Time-Series [1:118] from 1966 to 1976: NA NA NA NA NA ...
class(mttr)
## [1] "ts"
timetr=time(mttr)

timr = unclass(timetr)

class(timr)
## [1] "numeric"
mtreg= lm(mttr~timr)

timwin=window(timr,start = c(1966, 7), end = c(1975, 4))

plot(mttr,lwd=2)
lines(timwin,fitted(mtreg),col='red',lwd=2)

timr=as.ts(timr)
summary(mtreg)
## 
## Call:
## lm(formula = mttr ~ timr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.249 -13.466   5.194  13.493  37.282 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.570e+04  1.560e+03  -54.95   <2e-16 ***
## timr         4.358e+01  7.913e-01   55.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.77 on 104 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.9669, Adjusted R-squared:  0.9665 
## F-statistic:  3033 on 1 and 104 DF,  p-value: < 2.2e-16
### We are getting an adjusted R-Square value of 96.65 which is more that good.
### The pv-value of the F-test in regression suggests that all the variables in the model are significant
##### Prediction of 1975 Nov & Dec ####
forecastmt=predict(mtreg,data.frame(timr=c(1975.833,1975.917)),se.fit=TRUE)
forecastmt$fit
##        1        2 
## 409.9966 413.6573
### Adding the seasonal Component ###
semt=tail(bosmt$seasonal)

forecastfinal=forecastmt$fit+semt[5:6]


forecaster=forecastmt$se.fit
forecastup=forecastfinal + 2 * forecaster
forecastlw=forecastfinal - 2 * forecaster
forecastup
##        1        2 
## 419.8782 423.5956
forecastlw
##        1        2 
## 402.2318 405.7123
#### Forecasts for November & December with 95% confidence interval are 420 to 402 & 424 to 406 respectively ####
######### END Of Question 1 ###############

#### Start of Question 2 ###################
##### LOESS decomposition model ####
# LOESS Model also uses both additive & Multiplicative methods for the fit
bosloess=stl(tsbos,s.window = "periodic")
str(bosloess)
## List of 8
##  $ time.series: mts [1:118, 1:3] 6.8 -6.58 -8.46 -27.01 -34.46 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:3] "seasonal" "trend" "remainder"
##   ..- attr(*, "tsp")= num [1:3] 1966 1976 12
##   ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
##  $ weights    : num [1:118] 1 1 1 1 1 1 1 1 1 1 ...
##  $ call       : language stl(x = tsbos, s.window = "periodic")
##  $ win        : Named num [1:3] 1181 19 13
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ deg        : Named int [1:3] 0 1 1
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ jump       : Named num [1:3] 119 2 2
##   ..- attr(*, "names")= chr [1:3] "s" "t" "l"
##  $ inner      : int 2
##  $ outer      : int 0
##  - attr(*, "class")= chr "stl"
plot(bosloess)

### Plot indicates the presence of trend, seasonal & random elements in the data ####

#### We are starting our analysis witht the Stl additive model ####
pred1=bosloess$time.series[,'seasonal'] + bosloess$time.series[,'trend']
plot(tsbos,lwd=2)
lines(pred1,col='red',lwd=2)

##### fitted line seems to be doing badly at both tails
##### missing the peaks as the trend increases (Overestimating at the lower tail & Underestimating at the higher tail)

rmse1=sqrt(sum(bosloess$time.series[,'remainder']^2)/length(bosloess$time.series[,'remainder']))
rmse1
## [1] 28.47619
#### rmse for loess additive is 28.47 #####

#### visualizing the remainder values ####
plot(bosloess$time.series[,'remainder'],ylab='REMAINDER')
abline(0,0)

#### Indicates the variance seem to be slightly increasing as the time increases

#### LOESS multiplicative model ####
## When using the LOESS multiplicative model we apply the natural log to turn the method into a additive model

logbos=log(tsbos)

plot(logbos,ylab='log(boston robbery)')

### Plot shows the distribution of the log(robbery data)
class(logbos)
## [1] "ts"
boslog=stl(logbos,s.window = 'periodic')
plot(boslog)

## Plot indicates the seasonal, trend & random elements in the data
#### seasonality seems to be constant over the time period and trend is linearly positive

pred2=boslog$time.series[,'seasonal'] + boslog$time.series[,'trend']
## We are adding seasonal & trend component because of the natural log
plot(logbos,lwd=1)
lines(pred2,col='red',lwd=3)

#### Fitted lines doesn't seem to fit well on the seasonal higher & lower peaks, We can see slight underestimation in the fit

## calculating the RMSE ####
residlg=tsbos - exp(pred2)
rsmelog=sqrt(sum(residlg^2)/length(residlg))
rsmelog
## [1] 27.0506
#### RSME for LOESS Multiplicative model is 27.0506 #####
#### Looking at the both the models(LOESS addictive & LOESS Multiplicative), the multiplicative one has the better RSME and fitted plot looks better as well, hence it's logical to choose the multiplicative model for forecast

## Forecasting for the last two months ##
trlogmt=boslog$time.series[,'trend']
plot(trlogmt)

#### There seems to be a positive trend with the increase in time, but we can see a dip in robberies just before the year 1970

str(trlogmt)
##  Time-Series [1:118] from 1966 to 1976: 3.76 3.75 3.74 3.73 3.73 ...
class(trlogmt)
## [1] "ts"
timelogmt=time(trlogmt)

timelgmt=unclass(timelogmt)

logmtreg=lm(trlogmt~timelgmt)
summary(logmtreg)
## 
## Call:
## lm(formula = trlogmt ~ timelgmt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34574 -0.11822 -0.00523  0.12403  0.28454 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.067e+02  1.003e+01  -50.52   <2e-16 ***
## timelgmt     2.597e-01  5.089e-03   51.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1569 on 116 degrees of freedom
## Multiple R-squared:  0.9573, Adjusted R-squared:  0.957 
## F-statistic:  2603 on 1 and 116 DF,  p-value: < 2.2e-16
#### We get a good Adjusted R-squared of 95.7% & p-value is very small hence the variables are significant

plot(trlogmt,lwd=2)
lines(timelgmt,fitted(logmtreg),col='red',lwd=2)

#### Forecasting for the logmultiplicative model ####
predmt=predict(logmtreg,data.frame(timelgmt=c(1975.833,1975.917)),se.fit = TRUE)
predmt
## $fit
##        1        2 
## 6.303092 6.324903 
## 
## $se.fit
##          1          2 
## 0.02907656 0.02944834 
## 
## $df
## [1] 116
## 
## $residual.scale
## [1] 0.1569314
##### Adding the seasonality value into the predicted values ####
seas=tail(boslog$time.series[,'seasonal'])
predval=predmt$fit + seas[5:6]
predval
##        1        2 
## 6.377526 6.340839
prederlg=predmt$se.fit
prederlgup=predval + 2*prederlg
prederlglw=predval - 2*prederlg

forecastlogmtup=exp(prederlgup)
forecastlogmtlw=exp(prederlglw)
forecastlogmtup
##        1        2 
## 623.7061 601.6857
forecastlogmtlw
##        1        2 
## 555.2248 534.8262
##### 95% confidence intervals for the predicted armed robberies in Nov 1975 are 624 and 555, whereas for Dec 1975, they are 602 and 535.  ####

##### End of Question 2 #####


##### Start of Question 3  (Linear Regression Model) #######

### Here we would be using function of time to represent the trend component and the seasona using dummy variables as factors ###

ltime=time(tsbos)
ltime
##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 1966 1966.000 1966.083 1966.167 1966.250 1966.333 1966.417 1966.500
## 1967 1967.000 1967.083 1967.167 1967.250 1967.333 1967.417 1967.500
## 1968 1968.000 1968.083 1968.167 1968.250 1968.333 1968.417 1968.500
## 1969 1969.000 1969.083 1969.167 1969.250 1969.333 1969.417 1969.500
## 1970 1970.000 1970.083 1970.167 1970.250 1970.333 1970.417 1970.500
## 1971 1971.000 1971.083 1971.167 1971.250 1971.333 1971.417 1971.500
## 1972 1972.000 1972.083 1972.167 1972.250 1972.333 1972.417 1972.500
## 1973 1973.000 1973.083 1973.167 1973.250 1973.333 1973.417 1973.500
## 1974 1974.000 1974.083 1974.167 1974.250 1974.333 1974.417 1974.500
## 1975 1975.000 1975.083 1975.167 1975.250 1975.333 1975.417 1975.500
##           Aug      Sep      Oct      Nov      Dec
## 1966 1966.583 1966.667 1966.750 1966.833 1966.917
## 1967 1967.583 1967.667 1967.750 1967.833 1967.917
## 1968 1968.583 1968.667 1968.750 1968.833 1968.917
## 1969 1969.583 1969.667 1969.750 1969.833 1969.917
## 1970 1970.583 1970.667 1970.750 1970.833 1970.917
## 1971 1971.583 1971.667 1971.750 1971.833 1971.917
## 1972 1972.583 1972.667 1972.750 1972.833 1972.917
## 1973 1973.583 1973.667 1973.750 1973.833 1973.917
## 1974 1974.583 1974.667 1974.750 1974.833 1974.917
## 1975 1975.583 1975.667 1975.750
lseas=cycle(tsbos)
ldata=coredata(tsbos)
ldata
##   [1]  41  39  50  40  43  38  44  35  39  35  29  49  50  59  63  32  39
##  [18]  47  53  60  57  52  70  90  74  62  55  84  94  70 108 139 120  97
##  [35] 126 149 158 124 140 109 114  77 120 133 110  92  97  78  99 107 112
##  [52]  90  98 125 155 190 236 189 174 178 136 161 171 149 184 155 276 224
##  [69] 213 279 268 287 238 213 257 293 212 246 353 339 308 247 257 322 298
##  [86] 273 312 249 286 279 309 401 309 328 353 354 327 324 285 243 241 287
## [103] 355 460 364 487 452 391 500 451 375 372 302 316 398 394 431 431
lseas
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1966   1   2   3   4   5   6   7   8   9  10  11  12
## 1967   1   2   3   4   5   6   7   8   9  10  11  12
## 1968   1   2   3   4   5   6   7   8   9  10  11  12
## 1969   1   2   3   4   5   6   7   8   9  10  11  12
## 1970   1   2   3   4   5   6   7   8   9  10  11  12
## 1971   1   2   3   4   5   6   7   8   9  10  11  12
## 1972   1   2   3   4   5   6   7   8   9  10  11  12
## 1973   1   2   3   4   5   6   7   8   9  10  11  12
## 1974   1   2   3   4   5   6   7   8   9  10  11  12
## 1975   1   2   3   4   5   6   7   8   9  10
### Extracting the seasonality, time & the core data from the time series dataset

## Regression function ##
lnreg=lm(ldata~0+ltime+factor(lseas))
summary(lnreg)
## 
## Call:
## lm(formula = ldata ~ 0 + ltime + factor(lseas))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -90.911 -28.339  -0.222  23.333 119.000 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## ltime               41.978      1.349   31.11   <2e-16 ***
## factor(lseas)1  -82525.111   2658.540  -31.04   <2e-16 ***
## factor(lseas)2  -82539.409   2658.652  -31.05   <2e-16 ***
## factor(lseas)3  -82542.207   2658.765  -31.05   <2e-16 ***
## factor(lseas)4  -82561.606   2658.877  -31.05   <2e-16 ***
## factor(lseas)5  -82569.904   2658.989  -31.05   <2e-16 ***
## factor(lseas)6  -82570.702   2659.102  -31.05   <2e-16 ***
## factor(lseas)7  -82521.100   2659.214  -31.03   <2e-16 ***
## factor(lseas)8  -82504.198   2659.327  -31.02   <2e-16 ***
## factor(lseas)9  -82526.496   2659.439  -31.03   <2e-16 ***
## factor(lseas)10 -82524.994   2659.552  -31.03   <2e-16 ***
## factor(lseas)11 -82528.315   2658.993  -31.04   <2e-16 ***
## factor(lseas)12 -82523.813   2659.106  -31.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 41.47 on 105 degrees of freedom
## Multiple R-squared:  0.9721, Adjusted R-squared:  0.9686 
## F-statistic:   281 on 13 and 105 DF,  p-value: < 2.2e-16
### Adjusted R Square is 96.86 which is a good value and the P-value indicates that time variable is sufficient to predict the robberies in Boston
plot(lnreg)

### About the fitted vs Residual
## The Fitted Vs Residual plots above indicates the non-linear relationship between them hence we may have to look at other models for a better fit

##### The model has a good Adjusted R-Square value of 96.86% with very low p-values hence making all the varibales significant

lnresid=resid(lnreg)
lnhat=fitted(lnreg)
lnresid
##           1           2           3           4           5           6 
##  37.8000000  46.6000000  56.9000000  62.8000000  70.6000000  62.9000000 
##           7           8           9          10          11          12 
##  15.8000000 -13.6000000   9.2000000   0.2000000  -5.9777778   6.0222222 
##          13          14          15          16          17          18 
##   4.8222222  24.6222222  27.9222222  12.8222222  24.6222222  29.9222222 
##          19          20          21          22          23          24 
## -17.1777778 -30.5777778 -14.7777778 -24.7777778  -6.9555556   5.0444444 
##          25          26          27          28          29          30 
## -13.1555556 -14.3555556 -22.0555556  22.8444444  37.6444444  10.9444444 
##          31          32          33          34          35          36 
##  -4.1555556   6.4444444   6.2444444 -21.7555556   7.0666667  22.0666667 
##          37          38          39          40          41          42 
##  28.8666667   5.6666667  20.9666667   5.8666667  15.6666667 -24.0333333 
##          43          44          45          46          47          48 
## -34.1333333 -41.5333333 -45.7333333 -68.7333333 -63.9111111 -90.9111111 
##          49          50          51          52          53          54 
## -72.1111111 -53.3111111 -49.0111111 -55.1111111 -42.3111111 -18.0111111 
##          55          56          57          58          59          60 
## -41.1111111 -26.5111111  38.2888889 -13.7111111 -28.8888889 -32.8888889 
##          61          62          63          64          65          66 
## -77.0888889 -41.2888889 -31.9888889 -38.0888889   1.7111111 -29.9888889 
##          67          68          69          70          71          72 
##  37.9111111 -34.4888889 -26.6888889  34.3111111  23.1333333  34.1333333 
##          73          74          75          76          77          78 
## -17.0666667 -31.2666667  12.0333333  63.9333333 -12.2666667  19.0333333 
##          79          80          81          82          83          84 
##  72.9333333  38.5333333  26.3333333 -39.6666667 -29.8444444  27.1555556 
##          85          86          87          88          89          90 
##   0.9555556 -13.2444444  25.0555556 -22.0444444  19.7555556  10.0555556 
##          91          92          93          94          95          96 
## -13.0444444  58.5555556 -14.6444444  -0.6444444  24.1777778  17.1777778 
##          97          98          99         100         101         102 
## -12.0222222  -4.2222222 -43.9222222 -70.0222222 -67.2222222 -23.9222222 
##         103         104         105         106         107         108 
##  -9.0222222  75.5777778  -1.6222222 116.3777778  81.2000000  12.2000000 
##         109         110         111         112         113         114 
## 119.0000000  80.8000000   4.1000000  17.0000000 -48.2000000 -36.9000000 
##         115         116         117         118 
##  -8.0000000 -32.4000000  23.4000000  18.4000000
ltime=unclass(ltime)
class(lnhat)
## [1] "numeric"
plot(tsbos)
lines(ltime,lnhat,col='red',lwd=2)

rmseln=sqrt(sum(lnresid^2)/length(lnresid))
rmseln
## [1] 39.1229
### RMSE value for Linear model of time is 39.12 ###

### Fitted lines seems to be doing badly as the trend increases, mostly at the higher tail it is undersetimating
### Hence this may not be the optimial model
### We must check for Auto-correaltions

#### Histogram of Residuals ####
hist(lnresid)

### Histogram indicates the slight left skewness of the residuals

acf(lnresid)

### The plot indicates the correlations between the time series and their lags.
## In the above plot significant evidence that the last 4 months of data are significant in the time series estimation
AutocorTest(lnresid)
## 
##  Box-Ljung test
## 
## data:  lnresid
## X-squared = 85.372, df = 5, p-value < 2.2e-16
## The Bos-Ljung values is small hence we can conclude that the auto-correlation is less between the residual hence the fit is good.


#### Quardratic Model of Time ####  

# Testing to see if the fit has improved or not #
lnregqu=lm(ldata~0+ltime + I(ltime^2)+factor(lseas))
summary(lnregqu)
## 
## Call:
## lm(formula = ldata ~ 0 + ltime + I(ltime^2) + factor(lseas))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -78.785 -20.525  -7.814  23.835 102.406 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## ltime           -8.271e+03  1.941e+03  -4.260 4.50e-05 ***
## I(ltime^2)       2.109e+00  4.925e-01   4.282 4.14e-05 ***
## factor(lseas)1   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)2   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)3   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)4   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)5   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)6   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)7   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)8   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)9   8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)10  8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)11  8.109e+06  1.913e+06   4.239 4.88e-05 ***
## factor(lseas)12  8.109e+06  1.913e+06   4.239 4.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.42 on 104 degrees of freedom
## Multiple R-squared:  0.9762, Adjusted R-squared:  0.9731 
## F-statistic: 305.3 on 14 and 104 DF,  p-value: < 2.2e-16
## The Adjusted R-Square value seems to be increased from the linear model of time
plot(lnregqu)

## The Residual vs Fitted line has  become more straigther than linear model also, which means its closer to the linear trend and relationship between the residuals
## Normal QQ plots indicates better Normality in the residual of quadratic model than the linear.

quresid=resid(lnregqu)
quhat=fitted(lnregqu)

rmsequ=sqrt(sum(quresid^2)/length(quresid))
rmsequ
## [1] 36.07243
### RMSE for the quardratic is found to be 36.07 ###
ltime=unclass(ltime)
class(quhat)
## [1] "numeric"
plot(tsbos)
lines(ltime,quhat,col='red',lwd=2)

## Fit gets underestimated as the trend increases ##
hist(quresid)

## Historgram of residuals indicates a better normal distribution of resiual values when compared to the linear regression model

acf(quresid)

## ACF plot of the quadartic model of time is looks slightly better than the linear one
AutocorTest(quresid)
## 
##  Box-Ljung test
## 
## data:  quresid
## X-squared = 55.621, df = 5, p-value = 9.724e-11
## After looking at the residual plots and the RMSE value of the linear & Quadratic model of time(also the ACF plots), it's right to conclude that the Quadratic function has improved the fitted hence lets use the same for forecasting.

timpr=c(1975.833,1974.917)
seapr=c(11:12)
predreg=predict(lnregqu,data.frame(ltime=timpr,lseas=seapr),se.fit = TRUE )
predrval=predreg$fit

predreger=predreg$se.fit
class(predreger)
## [1] "numeric"
predupse=predrval + 2 * predreger
predlwse=predrval - 2 * predreger
predupse
##        1        2 
## 484.0639 428.3073
predlwse
##        1        2 
## 417.0189 370.1042
### 95% confidence intervals for the predicted robberies in Nov 1975 are 417 and 484, whereas for Dec 1975, they are 428 and 370.

######## End of Question 3 #######


##### Start of Question 4 (Holt-Winter Model) ######
## Model uses exponential weighted moving averages method ##

hwbos=HoltWinters(tsbos,seasonal = 'multiplicative')
str(hwbos)
## List of 9
##  $ fitted      : mts [1:106, 1:4] 47.1 56 59.6 30.4 36.6 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:4] "xhat" "level" "trend" "season"
##   ..- attr(*, "tsp")= num [1:3] 1967 1976 12
##   ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
##  $ x           : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
##  $ alpha       : Named num 0.213
##   ..- attr(*, "names")= chr "alpha"
##  $ beta        : Named num 0.00645
##   ..- attr(*, "names")= chr "beta"
##  $ gamma       : Named num 0.425
##   ..- attr(*, "names")= chr "gamma"
##  $ coefficients: Named num [1:14] 355.5 2.01 1.16 1.13 1.15 ...
##   ..- attr(*, "names")= chr [1:14] "a" "b" "s1" "s2" ...
##  $ seasonal    : chr "multiplicative"
##  $ SSE         : num 193990
##  $ call        : language HoltWinters(x = tsbos, seasonal = "multiplicative")
##  - attr(*, "class")= chr "HoltWinters"
hwbos$fitted
##               xhat     level    trend    season
## Jan 1967  47.10544  38.60606 1.038170 1.1882042
## Feb 1967  55.96233  40.16347 1.041519 1.3581447
## Mar 1967  59.60059  41.68171 1.044594 1.3949390
## Apr 1967  30.43937  43.24573 1.047945 0.6872172
## May 1967  36.56040  44.77771 1.051067 0.7977609
## Jun 1967  42.82511  46.48058 1.055271 0.9009013
## Jul 1967  56.00360  48.52358 1.061642 1.1294412
## Aug 1967  43.68749  49.01840 1.057986 0.8724170
## Sep 1967  51.89885  54.06176 1.083692 0.9411266
## Oct 1967  48.24019  56.30075 1.091144 0.8405402
## Nov 1967  41.88190  58.34530 1.097294 0.7045773
## Dec 1967  81.86587  67.94868 1.152160 1.1847304
## Jan 1968  86.95338  70.56424 1.161599 1.2123020
## Feb 1968  97.59894  69.44841 1.146909 1.3825128
## Mar 1968  94.11124  65.10699 1.111509 1.4212228
## Apr 1968  42.92925  60.35291 1.073674 0.6988709
## May 1968  61.23548  73.95246 1.154469 0.8153107
## Jun 1968  78.91238  83.67243 1.209718 0.9296699
## Jul 1968  93.19124  82.83882 1.196538 1.1089527
## Aug 1968  85.74517  86.88165 1.214897 0.9733091
## Sep 1968  98.16112  99.75875 1.290121 0.9714222
## Oct 1968  92.38269 105.84062 1.321028 0.8620872
## Nov 1968  92.41333 108.30324 1.328392 0.8429440
## Dec 1968 146.19032 118.12423 1.383171 1.2232741
## Jan 1969 139.58303 119.99697 1.386329 1.1499360
## Feb 1969 151.40690 124.79693 1.408347 1.1996876
## Mar 1969 147.81230 121.33600 1.376940 1.2045371
## Apr 1969 108.53532 121.33055 1.368023 0.8845687
## May 1969 117.50383 122.81054 1.368745 0.9462434
## Jun 1969 111.49189 123.39004 1.363654 0.8936961
## Jul 1969 137.39274 116.52749 1.310594 1.1659452
## Aug 1969 133.55054 114.65856 1.290085 1.1518077
## Sep 1969 121.87027 115.84676 1.289428 1.0404152
## Oct 1969 101.63657 114.70440 1.273742 0.8763424
## Nov 1969 107.77149 113.63434 1.258624 0.9380165
## Dec 1969 139.96193 112.44538 1.242837 1.2311033
## Jan 1970 124.88621 102.96061 1.173642 1.1992808
## Feb 1970 113.17783  99.53359 1.143967 1.1241615
## Mar 1970 119.06100  99.50623 1.136411 1.1830076
## Apr 1970  89.02511  99.37045 1.128205 0.8858339
## May 1970  95.41996 100.73323 1.129718 0.9367485
## Jun 1970  82.32023 102.44999 1.133505 0.7947234
## Jul 1970 129.63085 115.03015 1.207338 1.1152241
## Aug 1970 140.70015 121.08609 1.238613 1.1502187
## Sep 1970 133.52953 131.46031 1.297539 1.0058127
## Oct 1970 132.20976 154.47256 1.437603 0.8479868
## Nov 1970 155.57070 170.18454 1.529676 0.9059862
## Dec 1970 182.91427 176.04992 1.557642 1.0298789
## Jan 1971 198.15038 176.59050 1.551082 1.1123196
## Feb 1971 185.04776 166.23227 1.474264 1.1034022
## Mar 1971 190.70275 163.06124 1.444301 1.1592482
## Apr 1971 144.29947 160.88292 1.420935 0.8890699
## May 1971 155.81959 163.43075 1.428203 0.9451691
## Jun 1971 158.65861 171.21388 1.469194 0.9187849
## Jul 1971 205.40654 171.83433 1.463719 1.1852790
## Aug 1971 239.22612 185.99257 1.545602 1.2756129
## Sep 1971 228.97947 184.99402 1.529191 1.2276191
## Oct 1971 177.76915 183.74880 1.511296 0.9595652
## Nov 1971 197.04526 207.74602 1.656335 0.9409887
## Dec 1971 231.90942 225.47434 1.760002 1.0205738
## Jan 1972 237.52036 238.73984 1.834215 0.9873066
## Feb 1972 255.63008 240.67760 1.834883 1.0540904
## Mar 1972 263.55145 233.89241 1.779281 1.1182991
## Apr 1972 212.26467 234.42301 1.771227 0.8986869
## May 1972 257.28954 255.34243 1.894737 1.0002036
## Jun 1972 227.38624 247.58595 1.832485 0.9116657
## Jul 1972 335.43592 253.77026 1.860555 1.3121889
## Aug 1972 324.95677 258.48381 1.878957 1.2480923
## Sep 1972 317.20042 262.76101 1.894426 1.1985411
## Oct 1972 297.35310 263.01927 1.883873 1.1224974
## Nov 1972 269.04814 255.34192 1.822201 1.0462118
## Dec 1972 281.58563 254.70957 1.806369 1.0977315
## Jan 1973 263.01824 264.36309 1.856984 0.9879730
## Feb 1973 273.78346 273.76698 1.905663 0.9931470
## Mar 1973 307.63404 275.50450 1.904579 1.1089545
## Apr 1973 281.39354 278.24823 1.909991 1.0044094
## May 1973 258.37636 273.28405 1.865652 0.9390392
## Jun 1973 265.24719 281.41973 1.906095 0.9361914
## Jul 1973 384.96551 286.45695 1.926291 1.3349095
## Aug 1973 352.06669 276.25390 1.848054 1.2659627
## Sep 1973 342.09830 286.34060 1.901195 1.1868449
## Oct 1973 300.23276 282.29773 1.862855 1.0565602
## Nov 1973 300.52638 289.76217 1.898986 1.0303956
## Dec 1973 349.80660 302.51564 1.969000 1.1488481
## Jan 1974 316.66836 305.26264 1.974018 1.0306985
## Feb 1974 308.93117 309.37319 1.987799 0.9921961
## Mar 1974 352.76357 314.59808 2.008679 1.1142010
## Apr 1974 294.80520 303.64377 1.925065 0.9647751
## May 1974 287.64633 294.12373 1.851242 0.9718603
## Jun 1974 273.79883 285.74473 1.785255 0.9522444
## Jul 1974 363.30462 290.48484 1.804314 1.2429631
## Aug 1974 387.21981 290.86507 1.795129 1.3231037
## Sep 1974 351.47134 304.38463 1.870753 1.1476414
## Oct 1974 337.97612 308.58225 1.885762 1.0886021
## Nov 1974 371.92643 339.64625 2.073968 1.0883946
## Dec 1974 414.75027 357.40128 2.175114 1.1534413
## Jan 1975 372.29417 355.18759 2.146805 1.0418649
## Feb 1975 388.94364 383.46033 2.315323 1.0082120
## Mar 1975 417.17820 398.89484 2.399944 1.0395804
## Apr 1975 357.81513 392.64704 2.344164 0.9058812
## May 1975 367.54749 398.32875 2.365692 0.9172762
## Jun 1975 375.10637 385.46344 2.267449 0.9674400
## Jul 1975 464.86499 374.70874 2.183454 1.2334163
## Aug 1975 515.54688 365.33742 2.108923 1.4030535
## Sep 1975 407.55404 348.98166 1.989822 1.1612170
## Oct 1975 441.36740 355.27503 2.017581 1.2353107
plot(tsbos,lwd=1)
lines(hwbos$fitted[,1],col='red',lwd=2)

### Fitted lines looks to miss the peaks at the middle as well as at the upper tail

hwresid=tsbos - hwbos$fitted[,'xhat']
rmsehwm=sqrt(sum(hwresid^2)/length(hwresid))
rmsehwm
## [1] 42.77963
#### RMSE for the holtwinter Multiplicative model is found to be 42.779 ####

plot(hwresid)

### Holt Winters Additive model ###
hwbosad=HoltWinters(tsbos,seasonal = 'additive')
str(hwbosad)
## List of 9
##  $ fitted      : mts [1:106, 1:4] 47.8 57.7 61.7 31.1 37.3 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:4] "xhat" "level" "trend" "season"
##   ..- attr(*, "tsp")= num [1:3] 1967 1976 12
##   ..- attr(*, "class")= chr [1:3] "mts" "ts" "matrix"
##  $ x           : Time-Series [1:118] from 1966 to 1976: 41 39 50 40 43 38 44 35 39 35 ...
##  $ alpha       : Named num 0.6
##   ..- attr(*, "names")= chr "alpha"
##  $ beta        : Named num 0.000254
##   ..- attr(*, "names")= chr "beta"
##  $ gamma       : Named num 0.205
##   ..- attr(*, "names")= chr "gamma"
##  $ coefficients: Named num [1:14] 421.6 1.11 -6.89 2.33 5.76 ...
##   ..- attr(*, "names")= chr [1:14] "a" "b" "s1" "s2" ...
##  $ seasonal    : chr "additive"
##  $ SSE         : num 170403
##  $ call        : language HoltWinters(x = tsbos, seasonal = "additive")
##  - attr(*, "class")= chr "HoltWinters"
hwbosad$fitted
##               xhat     level    trend       season
## Jan 1967  47.79006  38.60606 1.038170   8.14583333
## Feb 1967  57.73686  40.96919 1.038507  15.72916667
## Mar 1967  61.74121  42.76501 1.038699  17.93750000
## Apr 1967  31.07647  44.55841 1.038891 -14.52083333
## May 1967  37.25253  46.15100 1.039032  -9.93750000
## Jun 1967  43.92285  48.23772 1.039298  -5.35416667
## Jul 1967  57.55751  51.12191 1.039766   5.39583333
## Aug 1967  45.65581  49.42924 1.039072  -4.81250000
## Sep 1967  57.92208  59.06832 1.041257  -2.18750000
## Oct 1967  54.20203  59.55675 1.041117  -6.39583333
## Nov 1967  48.42259  59.27764 1.040781 -11.89583333
## Dec 1967  82.19499  73.25509 1.044068   7.89583333
## Jan 1968  88.35090  78.97862 1.045257   8.32701705
## Feb 1968  88.29564  71.41985 1.043071  15.83272609
## Mar 1968  75.77722  56.69745 1.039066  18.04070348
## Apr 1968  31.87039  45.27960 1.035901 -14.44511677
## May 1968  68.81925  77.56964 1.043841  -9.79423232
## Jun 1968  89.65631  93.71052 1.047677  -5.10188378
## Jul 1968  89.04018  82.97332 1.044683   5.02218130
## Aug 1968  92.79639  95.38530 1.047571  -3.63647784
## Sep 1968 122.92560 124.13409 1.054608  -2.26309749
## Oct 1968 117.91246 123.43466 1.054163  -6.57636910
## Nov 1968 102.87502 111.95083 1.050977 -10.12678892
## Dec 1968 136.45655 126.86631 1.054500   8.53573456
## Jan 1969 143.64805 135.44120 1.056410   7.15044490
## Feb 1969 159.83772 145.10227 1.058596  13.67685244
## Mar 1969 142.06488 124.67448 1.053138  16.33726306
## Apr 1969 115.37123 124.48963 1.052823 -10.17122059
## May 1969 115.04469 121.72260 1.051853  -7.72976431
## Jun 1969 116.48638 122.14811 1.051693  -6.71342515
## Jul 1969 107.14818  99.52588 1.045679   6.57662012
## Aug 1969 109.47603 108.27682 1.047636   0.15156945
## Sep 1969 121.97644 123.42818 1.051220  -2.50295551
## Oct 1969 110.05746 117.29896 1.049395  -8.29089691
## Nov 1969 100.33785 107.52207 1.046645  -8.23086531
## Dec 1969 117.17778 106.56752 1.046136   9.56412149
## Jan 1970  93.49202  84.12475 1.040169   8.32710299
## Feb 1970 100.24688  88.46721 1.041008  10.73866271
## Mar 1970 110.76704  93.55703 1.042037  16.16797172
## Apr 1970  85.68694  95.33828 1.042224 -10.69357185
## May 1970  92.19386  98.96639 1.042881  -7.81541409
## Jun 1970  94.58334 103.49033 1.043766  -9.95075397
## Jul 1970 131.44899 122.77030 1.048399   7.63028913
## Aug 1970 141.07082 137.93863 1.051986   2.08020463
## Sep 1970 165.90053 168.32594 1.059439  -3.48485564
## Oct 1970 202.71205 211.41329 1.070116  -9.77135510
## Nov 1970 196.82590 204.26239 1.068028  -8.50452187
## Dec 1970 199.06187 191.64523 1.064551   6.35209406
## Jan 1971 189.92223 180.08220 1.061343   8.77867992
## Feb 1971 161.16011 148.81466 1.053129  11.29232379
## Mar 1971 167.09395 149.77179 1.053105  16.26905747
## Apr 1971 143.88049 153.16675 1.053700 -10.33996118
## May 1971 151.00493 157.28984 1.054480  -7.33939201
## Jun 1971 171.72890 178.12640 1.059506  -7.45701472
## Jul 1971 179.77426 169.15616 1.056957   9.56114142
## Aug 1971 235.06826 227.90494 1.071614   6.09171031
## Sep 1971 225.67286 222.34061 1.069929   2.26231693
## Oct 1971 205.98502 215.81257 1.067998 -10.89555057
## Nov 1971 251.35965 260.65646 1.079120 -10.37592488
## Dec 1971 277.41921 271.71224 1.081654   4.62531620
## Jan 1972 283.97895 278.53803 1.083114   4.35781456
## Feb 1972 264.40992 252.05461 1.076110  11.27919716
## Mar 1972 239.96565 222.30808 1.068280  16.58929834
## Apr 1972 224.73988 233.58924 1.070874  -9.92023345
## May 1972 272.03225 275.58524 1.081272  -4.63425918
## Jun 1972 232.91795 240.67438 1.072128  -8.82854922
## Jul 1972 268.11422 249.58980 1.074120  17.45030129
## Aug 1972 307.82822 301.55691 1.087050   5.18426812
## Sep 1972 323.64801 321.33289 1.091798   1.22332051
## Oct 1972 309.22304 313.04298 1.089414  -4.90934740
## Nov 1972 268.89506 276.82677 1.079937  -9.01165010
## Dec 1972 277.26400 270.77507 1.078125   5.41080618
## Jan 1973 300.34764 298.67451 1.084939   0.58818595
## Feb 1973 306.50082 298.35193 1.084581   7.06430575
## Mar 1973 298.41656 279.35121 1.079479  17.98587553
## Apr 1973 285.33228 288.57459 1.081548  -4.32386226
## May 1973 259.39320 267.87324 1.076014  -9.55606106
## Jun 1973 278.22534 284.90128 1.080066  -7.75600512
## Jul 1973 311.93572 286.44579 1.080184  24.40974315
## Aug 1973 294.58553 285.76587 1.079737   7.73991604
## Sep 1973 351.68240 350.64605 1.095946  -0.05959649
## Oct 1973 317.23059 326.15191 1.089445 -10.01076326
## Nov 1973 324.80232 333.69812 1.091085  -9.98687793
## Dec 1973 361.86893 351.69502 1.095380   9.07853023
## Jan 1974 349.56251 348.07261 1.094182   0.39571264
## Feb 1974 341.04797 335.63952 1.090745   4.31770889
## Mar 1974 346.69688 326.50921 1.088148  19.09952658
## Apr 1974 284.38335 290.60720 1.078751  -7.30259884
## May 1974 260.57247 266.87470 1.072447  -7.37467676
## Jun 1974 249.58951 256.21254 1.069466  -7.69249380
## Jul 1974 304.95556 279.71134 1.075164  24.16905558
## Aug 1974 328.33768 310.79048 1.082787  16.46440851
## Sep 1974 388.35486 390.81098 1.102842  -3.55895425
## Oct 1974 369.28325 377.31194 1.099132  -9.12782276
## Nov 1974 442.42976 448.98776 1.117062  -7.67506443
## Dec 1974 465.39454 455.84263 1.118520   8.43338806
## Jan 1975 412.01124 412.35815 1.107188  -1.45409596
## Feb 1975 470.25931 466.21871 1.120591   2.92001461
## Mar 1975 470.95135 455.79244 1.117657  14.04124854
## Apr 1975 389.79038 399.38279 1.103042 -10.69545222
## May 1975 381.94066 389.81967 1.100332  -8.97934433
## Jun 1975 339.45464 342.99185 1.088156  -4.62535884
## Jul 1975 359.37443 330.01785 1.084583  28.27199713
## Aug 1975 382.60960 354.26026 1.090466  27.25887028
## Sep 1975 357.71630 362.17981 1.092201  -5.55571099
## Oct 1975 408.83567 407.20901 1.103364   0.52329785
plot(tsbos,lwd=1)
lines(hwbos$fitted[,1],col='red',lwd=2)

hwresidad=tsbos - hwbosad$fitted[,'xhat']
plot(hwresidad)

rmsehwad=sqrt(sum(hwresidad^2)/length(hwresidad))
rmsehwad
## [1] 40.09463
### RMSE for Holt Winter Additive is found to be 40.094
## Since RMSE is smaller for holt winter additive model than the multiplicative model, let's choose additive model for the forecast
## Even the residual plot looks more spread out and constant variance than the multiplicative method

prehwad=predict(hwbosad,n.ahead = 10,prediction.interval = TRUE)
plot(hwbosad,predicted.values = prehwad)

prehwad
##               fit      upr      lwr
## Nov 1975 415.8172 494.3304 337.3040
## Dec 1975 426.1485 517.6976 334.5993
## Jan 1976 430.6809 533.6338 327.7280
## Feb 1976 427.3689 540.5876 314.1502
## Mar 1976 433.3092 555.9420 310.6765
## Apr 1976 416.0873 547.4655 284.7092
## May 1976 413.8147 553.3955 274.2340
## Jun 1976 423.9065 571.2377 276.5753
## Jul 1976 463.0003 617.6978 308.3029
## Aug 1976 460.8610 622.5931 299.1290
##95% confidence intervals for the predicted armed robberies in Nov 1975 are 337 and 494, whereas for Dec 1975, they are 518 and 335

###### End of Question 4 #######

#### Start of Question 5 ######

# RMSE of Decomposition Additive Model --------- 28.75
# RMSE of Decomposition Multiplicative Model --- 27.10
# RMSE of LOESS Additive Model ----------------- 28.47
# RMSE of LOESS Multiplicative Model ----------- 27.05
# RMSE of Linear Model of time ----------------- 39.12
# RMSE of Quadratic Model of time -------------- 36.07
# RMSE of Holt-Winters Multiplicative Model ---- 42.78
# RMSE of Holt-Winters Additive Model ---------- 40.094

## Looking at the RMSE values of all the models LOESS Multiplicative & Decomposition Multiplicative seems to be best among the lot



plot(residmt)

## Residual of Decomposition Multiplicative model ##

plot(residlg)

## Residual of LOESS Multiplicative model ##

## From the RMSE values there's no much difference between the two models
## But when we look at their residual plots, Decompose multiplicative model seems to have lesser variance on it's residuals
## Hence Let's pick Decomposition Multiplicative model 


#### End of Question 5 ####