library(fpp2)
## Warning: package 'fpp2' was built under R version 3.6.1
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.6.1
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.6.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.6.1

Read the csv

df <- read.csv("RegTS.csv")

Lets create a scatterplot matrix of these 6 variables

df %>%
as.data.frame() %>%
GGally::ggpairs()
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Lag1 sales is correlated with sale (0.876)s and lag 1 discount is correlated with the discount (0.867), apart from this,no correlation is observable.

Lets fit this model. But before that we need to convert it into a time series data.

df.ts <- ts(df,start=c(2013,10),end=c(2014,51),frequency=52)
autoplot(df.ts)

fit.consMR <- tslm(
sales_units ~ Lag1_Sales + discount_per + Lag1_Dis + promo_week_flg+age,
data=df.ts)
summary(fit.consMR)
## 
## Call:
## tslm(formula = sales_units ~ Lag1_Sales + discount_per + Lag1_Dis + 
##     promo_week_flg + age, data = df.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -167.802  -19.100   -1.484   14.663  186.980 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      50.45367   13.68713   3.686 0.000393 ***
## Lag1_Sales        0.69130    0.07389   9.356 7.58e-15 ***
## discount_per    170.94759   48.98121   3.490 0.000757 ***
## Lag1_Dis       -154.89133   47.19872  -3.282 0.001480 ** 
## promo_week_flg   39.21716   12.91200   3.037 0.003141 ** 
## age              -0.76070    0.32376  -2.350 0.021033 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.68 on 88 degrees of freedom
## Multiple R-squared:  0.8371, Adjusted R-squared:  0.8279 
## F-statistic: 90.45 on 5 and 88 DF,  p-value: < 2.2e-16
autoplot(df.ts[,'sales_units'], series="Data") +
autolayer(fitted(fit.consMR), series="Fitted") +
xlab("Year") + ylab("") +
ggtitle("Sales") +
guides(colour=guide_legend(title=" "))

Let us see the scatterplot between the actual and fitted

cbind(Data = df.ts[,"sales_units"],
Fitted = fitted(fit.consMR)) %>%
as.data.frame() %>%
ggplot(aes(x=Data, y=Fitted)) +
geom_point() +
xlab("Fitted (predicted values)") +
ylab("Data (actual values)") +
ggtitle("Sales-Blink") +
geom_abline(intercept=0, slope=1)

Lets Evaluate the Regression Model

checkresiduals(fit.consMR)

## 
##  Breusch-Godfrey test for serial correlation of order up to 19
## 
## data:  Residuals from Linear regression model
## LM test = 21.025, df = 19, p-value = 0.3354

Checking the scatter plot of the residuals against each of the predictor variables.

df <- as.data.frame(df.ts)
df[,"Residuals"] <- as.numeric(residuals(fit.consMR))
p1 <- ggplot(df, aes(x=Lag1_Sales, y=Residuals)) +
geom_point()
p2 <- ggplot(df, aes(x=discount_per, y=Residuals)) +
geom_point()
p3 <- ggplot(df, aes(x=Lag1_Dis, y=Residuals)) +
geom_point()
p4 <- ggplot(df, aes(x=promo_week_flg, y=Residuals)) +
geom_point()
p5 <- ggplot(df, aes(x=age, y=Residuals))+
geom_point()
gridExtra::grid.arrange(p1, p2, p3, p4,p5, nrow=2)

Lets check the residuals against the fitted value

cbind(Fitted = fitted(fit.consMR),
Residuals=residuals(fit.consMR)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()

The residuals seem to be inflated in variance.