library(fpp2)
## Warning: package 'fpp2' was built under R version 3.6.1
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.6.1
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.6.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.6.1
Read the csv
df <- read.csv("RegTS.csv")
Lets create a scatterplot matrix of these 6 variables
df %>%
as.data.frame() %>%
GGally::ggpairs()
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
Lag1 sales is correlated with sale (0.876)s and lag 1 discount is correlated with the discount (0.867), apart from this,no correlation is observable.
Lets fit this model. But before that we need to convert it into a time series data.
df.ts <- ts(df,start=c(2013,10),end=c(2014,51),frequency=52)
autoplot(df.ts)
fit.consMR <- tslm(
sales_units ~ Lag1_Sales + discount_per + Lag1_Dis + promo_week_flg+age,
data=df.ts)
summary(fit.consMR)
##
## Call:
## tslm(formula = sales_units ~ Lag1_Sales + discount_per + Lag1_Dis +
## promo_week_flg + age, data = df.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -167.802 -19.100 -1.484 14.663 186.980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.45367 13.68713 3.686 0.000393 ***
## Lag1_Sales 0.69130 0.07389 9.356 7.58e-15 ***
## discount_per 170.94759 48.98121 3.490 0.000757 ***
## Lag1_Dis -154.89133 47.19872 -3.282 0.001480 **
## promo_week_flg 39.21716 12.91200 3.037 0.003141 **
## age -0.76070 0.32376 -2.350 0.021033 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.68 on 88 degrees of freedom
## Multiple R-squared: 0.8371, Adjusted R-squared: 0.8279
## F-statistic: 90.45 on 5 and 88 DF, p-value: < 2.2e-16
autoplot(df.ts[,'sales_units'], series="Data") +
autolayer(fitted(fit.consMR), series="Fitted") +
xlab("Year") + ylab("") +
ggtitle("Sales") +
guides(colour=guide_legend(title=" "))
Let us see the scatterplot between the actual and fitted
cbind(Data = df.ts[,"sales_units"],
Fitted = fitted(fit.consMR)) %>%
as.data.frame() %>%
ggplot(aes(x=Data, y=Fitted)) +
geom_point() +
xlab("Fitted (predicted values)") +
ylab("Data (actual values)") +
ggtitle("Sales-Blink") +
geom_abline(intercept=0, slope=1)
Lets Evaluate the Regression Model
checkresiduals(fit.consMR)
##
## Breusch-Godfrey test for serial correlation of order up to 19
##
## data: Residuals from Linear regression model
## LM test = 21.025, df = 19, p-value = 0.3354
Checking the scatter plot of the residuals against each of the predictor variables.
df <- as.data.frame(df.ts)
df[,"Residuals"] <- as.numeric(residuals(fit.consMR))
p1 <- ggplot(df, aes(x=Lag1_Sales, y=Residuals)) +
geom_point()
p2 <- ggplot(df, aes(x=discount_per, y=Residuals)) +
geom_point()
p3 <- ggplot(df, aes(x=Lag1_Dis, y=Residuals)) +
geom_point()
p4 <- ggplot(df, aes(x=promo_week_flg, y=Residuals)) +
geom_point()
p5 <- ggplot(df, aes(x=age, y=Residuals))+
geom_point()
gridExtra::grid.arrange(p1, p2, p3, p4,p5, nrow=2)
Lets check the residuals against the fitted value
cbind(Fitted = fitted(fit.consMR),
Residuals=residuals(fit.consMR)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()
The residuals seem to be inflated in variance.