#Introduction
#1. In last few years, growth in chocolate market has been tremendous in terms of value and volume.**
#2. There has been an increase in international brands and extension of product portfolios of the existing players in the market.**
#3. Sales in this sector is driven by several factors such as type of retail stores, town, availability of refrigerator etc.**
#4. Objective : Identify how the presence and volume of the refrigerations affects the sales of chocolates**
#5. For the scope of this project Sales Data for major chocolate manufacturers has been taken**
#6. Sales data is from 4 major Indian states : Chattisgarh, Gujarat, Madhya Pradesh and Maharashtra**
knitr::opts_chunk$set(echo = TRUE, fig.width=12, fig.height=8)
library(data.table)
library(car)
library(plyr)
library(nortest) #Shapiro-Wilks & Anderson-Darling test
library(caret) #BoxCox Transformation
library(lmtest)
library(gplots)
library(shiny)
library(png)
library(grid)
library(jtools)
library(jpeg)
#OBJECTIVE
#To study the effect of presence of refrigerator on sales of Chocolates in India
#To study the relative impact of factors such as fridge Volume, Retailer type, town etc. on sales of `Chocolates`
myfile <-file.choose()
fulldata <-read.csv(myfile)
#Summary: few Data points
head(fulldata)
## OutletCode TownClass Town State
## 1 1 TITANIUM Pune Maharashtra
## 2 2 GOLD Gwalior Madhya Pradesh
## 3 3 TITANIUM Nerul Maharashtra
## 4 4 SILVER KORBA Chattisgarh
## 5 5 TITANIUM Kalyan Maharashtra
## 6 7 SILVER KANKAVLI Maharashtra
## RetailerClass Fridge.Volume Has.Fridge Sales
## 1 OTHERS 340 1 123860.13
## 2 HIGH END GROCER 50 1 79153.05
## 3 HIGH END GROCER 35 1 227851.06
## 4 HIGH END GROCER 35 1 31397.78
## 5 OTHERS 50 1 24210.18
## 6 OTHERS 0 0 20226.00
attach(fulldata)
#Variable Description
#Outlet Code- Unique Code specific to Outlets
#Town Class - Titanium , Gold , Silver and Rest of Urban depending on the following two factors:
#1) Revenue generated on an average from that Town
#2) Disposable Income of the individuals staying there
#Titanium being highest, followed by Gold, Silver and Rest of Urban
#Town - The town where the outlet concerned is located
#State- The state where the outlet concerned is located
#RetailerClass- Only the major 4 categories of the retailers are retained and the remaining low frequency categories are clubbed under "OTHERS"
#Fridge- The volume of the Fridge in Litres. 0 meaning No Fridge
#Has Fridge - Binary variable: 1 -Has Fridge, 0 -No Fridge
#Sales New - Y Variable of concerned - Annual Sales generated by each store
#Parent Firm - The revenue share of the parent firm in that shop as compared with all the Chocolate firms which supply their products in that shop
#Data Structure
options(width = 10000)
psych::describe(fulldata)[,c(2,3,4,5,11,12,13)]
## n mean sd median skew kurtosis se
## OutletCode 242595 126617.51 72905.95 126633.00 -0.01 -1.20 148.02
## TownClass* 242595 2.98 1.23 4.00 -0.69 -1.21 0.00
## Town* 242595 116.32 67.58 136.00 -0.24 -1.04 0.14
## State* 242595 3.28 0.94 4.00 -0.82 -0.82 0.00
## RetailerClass* 242595 3.52 1.15 4.00 -1.26 0.43 0.00
## Fridge.Volume 242595 46.15 95.63 0.00 2.40 4.38 0.19
## Has.Fridge 242595 0.43 0.50 0.00 0.28 -1.92 0.00
## Sales 242595 46288.43 77949.27 18058.44 3.34 12.43 158.26
#Removing Outliers
#Manually Removing points with sales less than 1000
library(gplots)
fulldata = fulldata[c(fulldata$Sales>1000),]
boxplot(fulldata$Sales, main = "Annual Sales: After Outlier Removal",
xlab = "Annual Sales", horizontal = TRUE)

attach(fulldata)
## The following objects are masked from fulldata (pos = 3):
##
## Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
View(fulldata)
dim(fulldata)
## [1] 232143 8
colnames(fulldata)
## [1] "OutletCode" "TownClass" "Town" "State" "RetailerClass" "Fridge.Volume" "Has.Fridge" "Sales"
#After Removing Sales below 1000
boxplot(fulldata$Sales , main = "Distribution of Annual Sales",
xlab = "Annual Sales", horizontal = TRUE)

#Removing Outliers
#We have 232,145 outlets information now
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
hist(fulldata$Sales, main = "Distribution of Sales", xlab = "Annual Sales")

#Interaction Plot
knitr::opts_chunk$set(fig.width=12, fig.height=18, fig.align= "center")
interaction.plot(Fridge.Volume, TownClass, Sales,data=fulldata,
main = "Interaction Plot of Fridge Volume and Town Class",
xlab = "Fridge Volume", ylab = "Annual Sales",
col=c("red","black","green", "blue"),
fixed=TRUE, lwd = 5,
leg.bty = "o")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(1, x, ...): "data" is not a graphical parameter

#Conclusion
#At higher fridge volmes, sales varies significantly as compared to lower fridge volumes for different town classes
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
boxplot(Sales ~ Has.Fridge, data = fulldata,
main = "Distribution of sales with and without fridge",
xlab = "Annual Sales", ylab = "Has Fridge?",
horizontal = FALSE)

#Plots
plot(fulldata$Fridge.Volume,fulldata$Sales,xlab="Fridge Vol", ylab="Sales")

#Fridge Vol VS Average Sales
#Given the average sales based on fridge (has or not), we notice that having fridge increases sales significantly to close to Rs. 90,000 from Rs. 15,000
salesvol<-aggregate(fulldata$Sales, by=list(FridgeVolume=fulldata$Fridge.Volume), mean)
plot(salesvol, xlab="Fridge Volume", ylab="Average Sales",main ="Variation of Average Sales with Fridge Volume ")

#To confirm that presence of fridge effect sales
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
aggregate(fulldata$Sales, by=list(FridgeVolume=fulldata$Has.Fridge), mean)
## FridgeVolume x
## 1 0 15622.28
## 2 1 89334.69
boxplot(fulldata$Sales[fulldata$Has.Fridge==0],fulldata$Sales[fulldata$Has.Fridge==1], main="1 -> No Fridge, 2 -> Has Fridge", ylab="Annual Sales")

#Main sales drivers
#1. Through this we found that food store and high end grocers drive the highest sales
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=26, fig.align= "center")
par(cex.axis=1)
AvgSales_RetailerClass=aggregate(fulldata$Sales, by=list(RE=fulldata$RetailerClass), mean)
plot(AvgSales_RetailerClass, xlab="RetailerClass", ylab="Average Sales",las=0)

#It was found that Maharashtra and Madhya Pradesh drive the highest sales
par(cex.axis=1)
AvgSales_State=aggregate(fulldata$Sales, by=list(State=fulldata$State), mean)
plot(AvgSales_State, xlab="State", ylab="Average Sales",las=0)


#Main sales drivers: Visualization
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
plotmeans(Sales ~ TownClass, data = fulldata,
main = "Mean plot of Sales by Town Class",
xlab = "Town Class", ylab = "Annual Sales",
mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a graphical parameter

fulldata$Has.Fridge <- as.factor(fulldata$Has.Fridge)
fulldata$TownClass <- as.factor(fulldata$TownClass)
fulldata$Has.State <- as.factor(fulldata$State)
fulldata$RetailerClass <- as.factor(fulldata$RetailerClass)
levels(fulldata$TownClass)
## [1] "GOLD" "REST OF URBAN" "SILVER" "TITANIUM"
fulldata$TownClass <- relevel(fulldata$TownClass, ref = "REST OF URBAN")
fulldata$RetailerClass <- relevel(fulldata$RetailerClass, ref = "OTHERS")
options(width = 10000)
#Regression Model 1
fit1 <- lm(fulldata$Sales ~ fulldata$Fridge.Volume)
#Model 1: Regression Output: We tried to find out Effect of presence of Fridge in sales revenue
#Conclusion: The p value < .05 , implying that the variables Sales and Fridge.Volume are significantly related. With 1 litre increment in fridge colume, the annual sales of the shop increase by 360 Rs INR.
options(width = 10000)
#Regression Model 2
fit2 <- lm(Sales ~ Fridge.Volume + TownClass+ RetailerClass + State)
summary(fit2)
##
## Call:
## lm(formula = Sales ~ Fridge.Volume + TownClass + RetailerClass +
## State)
##
## Residuals:
## Min 1Q Median 3Q Max
## -222621 -21155 -9093 9673 442787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37539.998 824.478 45.532 < 2e-16 ***
## Fridge.Volume 208.271 1.476 141.095 < 2e-16 ***
## TownClassREST OF URBAN -5279.439 516.782 -10.216 < 2e-16 ***
## TownClassSILVER -4979.501 421.724 -11.807 < 2e-16 ***
## TownClassTITANIUM 4203.844 334.986 12.549 < 2e-16 ***
## RetailerClassFOOD STORE 109406.753 681.875 160.450 < 2e-16 ***
## RetailerClassHIGH END GROCER 84771.770 604.172 140.311 < 2e-16 ***
## RetailerClassLOW END GROCER -16145.923 390.282 -41.370 < 2e-16 ***
## RetailerClassOTHERS -5330.029 566.002 -9.417 < 2e-16 ***
## StateGujarat -4532.301 789.137 -5.743 9.29e-09 ***
## StateMadhya Pradesh -5613.364 803.701 -6.984 2.87e-12 ***
## StateMaharashtra 1796.006 761.021 2.360 0.0183 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62330 on 232131 degrees of freedom
## Multiple R-squared: 0.3785, Adjusted R-squared: 0.3785
## F-statistic: 1.285e+04 on 11 and 232131 DF, p-value: < 2.2e-16
#Model 2: Effect of other factors -Linear Linear
#Conclusion : The p-value of the model is less that .05, implying the X variables are related with Y variables. All the X variables are related to the Y variables as their individual P values are < .05. The adjusted R square value of the model is 37%
options(width = 10000)
#Regression Model 3: Considering Interactions
fit3<- lm(Sales ~ Fridge.Volume + TownClass+ RetailerClass + State + TownClass*Fridge.Volume+RetailerClass*Fridge.Volume )
#Model Fit3 : With interactions considered, the Adjusted R square value improved fom 37% to 38.92%
options(width = 10000)
library(gplots)
Model_new <-step(fit3,trace=0,steps=1000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
plot(Model_new,5)

#fit 3 is declared the best model by step function
#Checking outliers
#We have sales data for 232145 outlets currently
#966 outliers were detected by the outlierTest
#After removing outliers, data for 231179 outlets remained
#After removing outliers, the adjusted R-squared has improved from 38.92% to 41.67% even for the same model
outliers <- outlierTest(Model_new, n.max=1000, order=FALSE, digits = 3)
rownums <- mapply(names(outliers$bonf.p), FUN=as.numeric)*-1
fulldata.or <- fulldata[c(rownums),]
x<-dim(fulldata.or)
options(width = 10000)
fit4 <- lm(fulldata.or$Sales ~ fulldata.or$Fridge.Volume + fulldata.or$TownClass+ fulldata.or$RetailerClass + fulldata.or$State + fulldata.or$TownClass*fulldata.or$Fridge.Volume+fulldata.or$RetailerClass*fulldata.or$Fridge.Volume)


#Testing for NonLiearity
#From Anderson Test, it was found that Residuals are not following a normal distribution since p <.05**
library(gplots)
ad.test(fulldata.or$Sales)
##
## Anderson-Darling normality test
##
## data: fulldata.or$Sales
## A = 29883, p-value < 2.2e-16
#BoxCox Transformation
#Lambda Value is 0, so log transform was taken on Sales
Sales_BoxCox<- BoxCoxTrans(fulldata.or$Sales)
Sales_BoxCox
## Box-Cox Transformation
##
## 231215 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1000 7346 19466 46759 50360 454670
##
## Largest/Smallest: 455
## Sample Skewness: 3.31
##
## Estimated Lambda: 0
## With fudge factor, Lambda = 0 will be used for transformations
fulldata.or$Sales.Log<- log(fulldata.or$Sales)
attach(fulldata.or)
## The following objects are masked from fulldata (pos = 3):
##
## Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
## The following objects are masked from fulldata (pos = 4):
##
## Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
#Best Model
#Running regression after transformation
fit5 <- lm(Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass)
#Best Model Summary
options(width = 10000)
summary(fit5)
##
## Call:
## lm(formula = Sales.Log ~ Fridge.Volume + TownClass + RetailerClass +
## State + TownClass * Fridge.Volume + RetailerClass * Fridge.Volume +
## State * Fridge.Volume + RetailerClass * Fridge.Volume * TownClass)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1517 -0.7508 0.0939 0.8261 3.3742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.514e+00 3.075e-02 309.461 < 2e-16 ***
## Fridge.Volume 1.015e-02 5.639e-04 17.994 < 2e-16 ***
## TownClassGOLD -3.184e-01 3.594e-02 -8.859 < 2e-16 ***
## TownClassSILVER -2.465e-01 3.372e-02 -7.310 2.69e-13 ***
## TownClassTITANIUM -1.407e-01 2.928e-02 -4.807 1.53e-06 ***
## RetailerClassCHEMIST -2.494e-01 3.537e-02 -7.051 1.78e-12 ***
## RetailerClassFOOD STORE 9.347e-01 5.997e-02 15.586 < 2e-16 ***
## RetailerClassHIGH END GROCER 1.119e+00 6.283e-02 17.804 < 2e-16 ***
## RetailerClassLOW END GROCER -7.709e-02 2.893e-02 -2.665 0.007696 **
## StateGujarat 7.670e-03 1.527e-02 0.502 0.615394
## StateMadhya Pradesh -3.542e-02 1.552e-02 -2.283 0.022448 *
## StateMaharashtra 1.702e-01 1.470e-02 11.578 < 2e-16 ***
## Fridge.Volume:TownClassGOLD 2.601e-03 5.324e-04 4.886 1.03e-06 ***
## Fridge.Volume:TownClassSILVER 9.168e-04 5.736e-04 1.598 0.109949
## Fridge.Volume:TownClassTITANIUM -1.160e-03 4.986e-04 -2.327 0.019969 *
## Fridge.Volume:RetailerClassCHEMIST 9.341e-05 6.945e-04 0.134 0.893013
## Fridge.Volume:RetailerClassFOOD STORE -2.412e-03 6.251e-04 -3.858 0.000114 ***
## Fridge.Volume:RetailerClassHIGH END GROCER -2.001e-03 6.533e-04 -3.062 0.002196 **
## Fridge.Volume:RetailerClassLOW END GROCER -9.174e-04 5.165e-04 -1.776 0.075692 .
## Fridge.Volume:StateGujarat -4.056e-03 2.915e-04 -13.915 < 2e-16 ***
## Fridge.Volume:StateMadhya Pradesh -3.610e-03 2.958e-04 -12.205 < 2e-16 ***
## Fridge.Volume:StateMaharashtra -4.687e-03 2.880e-04 -16.276 < 2e-16 ***
## TownClassGOLD:RetailerClassCHEMIST 5.973e-01 4.494e-02 13.291 < 2e-16 ***
## TownClassSILVER:RetailerClassCHEMIST 3.591e-01 4.447e-02 8.074 6.84e-16 ***
## TownClassTITANIUM:RetailerClassCHEMIST 7.805e-01 3.824e-02 20.409 < 2e-16 ***
## TownClassGOLD:RetailerClassFOOD STORE 1.076e+00 7.403e-02 14.528 < 2e-16 ***
## TownClassSILVER:RetailerClassFOOD STORE 5.357e-01 7.393e-02 7.246 4.30e-13 ***
## TownClassTITANIUM:RetailerClassFOOD STORE 1.072e+00 6.459e-02 16.598 < 2e-16 ***
## TownClassGOLD:RetailerClassHIGH END GROCER 8.353e-01 7.124e-02 11.725 < 2e-16 ***
## TownClassSILVER:RetailerClassHIGH END GROCER 5.829e-01 7.388e-02 7.889 3.05e-15 ***
## TownClassTITANIUM:RetailerClassHIGH END GROCER 8.468e-01 6.582e-02 12.865 < 2e-16 ***
## TownClassGOLD:RetailerClassLOW END GROCER 2.196e-01 3.775e-02 5.816 6.03e-09 ***
## TownClassSILVER:RetailerClassLOW END GROCER 5.408e-02 3.583e-02 1.509 0.131234
## TownClassTITANIUM:RetailerClassLOW END GROCER 1.572e-01 3.117e-02 5.043 4.60e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassCHEMIST -3.846e-03 7.394e-04 -5.201 1.98e-07 ***
## Fridge.Volume:TownClassSILVER:RetailerClassCHEMIST -2.026e-03 7.934e-04 -2.553 0.010673 *
## Fridge.Volume:TownClassTITANIUM:RetailerClassCHEMIST -1.434e-03 7.052e-04 -2.034 0.041955 *
## Fridge.Volume:TownClassGOLD:RetailerClassFOOD STORE -4.155e-03 6.849e-04 -6.066 1.31e-09 ***
## Fridge.Volume:TownClassSILVER:RetailerClassFOOD STORE -1.482e-03 7.254e-04 -2.042 0.041108 *
## Fridge.Volume:TownClassTITANIUM:RetailerClassFOOD STORE -9.815e-04 6.410e-04 -1.531 0.125745
## Fridge.Volume:TownClassGOLD:RetailerClassHIGH END GROCER -4.808e-03 7.021e-04 -6.847 7.54e-12 ***
## Fridge.Volume:TownClassSILVER:RetailerClassHIGH END GROCER -2.590e-03 7.538e-04 -3.436 0.000590 ***
## Fridge.Volume:TownClassTITANIUM:RetailerClassHIGH END GROCER -1.717e-03 6.668e-04 -2.575 0.010014 *
## Fridge.Volume:TownClassGOLD:RetailerClassLOW END GROCER -2.641e-03 5.653e-04 -4.672 2.99e-06 ***
## Fridge.Volume:TownClassSILVER:RetailerClassLOW END GROCER -6.287e-04 6.109e-04 -1.029 0.303458
## Fridge.Volume:TownClassTITANIUM:RetailerClassLOW END GROCER -4.664e-04 5.283e-04 -0.883 0.377337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.118 on 231169 degrees of freedom
## Multiple R-squared: 0.2981, Adjusted R-squared: 0.298
## F-statistic: 2182 on 45 and 231169 DF, p-value: < 2.2e-16
#Checking Linearity
#Anderson Test
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
library(gplots)
plot(fit5,1)

ad.test(fulldata.or$Sales.Log)
##
## Anderson-Darling normality test
##
## data: fulldata.or$Sales.Log
## A = 219.84, p-value < 2.2e-16
#Model does not seem to be linear visually
#The normality has been solved, however linearity still remains an issue, due to which the adjusted R square has gone down to 29.6%.
#Checking Normality
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
library(gplots)
plot(fit5,2)

#Data seems to be deviating from the normality at the extreme points, however normality has improved by great bounds compared to fit4 model
#Data Does not have multicollinearity
#Checking heteroskedasticity
a1<-bptest(fit5)
a1
##
## studentized Breusch-Pagan test
##
## data: fit5
## BP = 7563.5, df = 45, p-value < 2.2e-16
b1<-ncvTest(fit5)
b1
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 1610.473, Df = 1, p = < 2.22e-16
#Data has heteroskedasticity
#Removing Heteroskedasticity
## FGLs Log- linear with Intaction
# Step 1:Residuals of linear OLS Model
LogOLSModelRes <- resid(fit5)
# Step 2: Taking square of the residuals of linear OLS Model
LogOLSModelResSq <- LogOLSModelRes^2
# Step 3: Taking natural log of the squared residuals of linear OLS Model
lnOLSResSq <- log(LogOLSModelResSq)
# Step 4: Running auxiliary OLS Model
auxOLSModel <- lm(lnOLSResSq ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass,data = fulldata.or)
# Step 5: Get fitted value of auxiliary OLS Model i.e. 'auxOLSModel'
fittedValue <- fitted(auxOLSModel)
# Step 6: Compute exponential values of fiited value for auxialiary OLS Model
expValue <- exp(fittedValue)
# Step 7: Fit Log-linear FGLS Model
fit5_new <- lm(Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass,weights = 1/expValue,data = fulldata.or)
# summary of linear FGLS model
summary(fit5_new)
##
## Call:
## lm(formula = Sales.Log ~ Fridge.Volume + TownClass + RetailerClass +
## State + TownClass * Fridge.Volume + RetailerClass * Fridge.Volume +
## State * Fridge.Volume + RetailerClass * Fridge.Volume * TownClass,
## data = fulldata.or, weights = 1/expValue)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -12.1158 -1.2257 0.1649 1.3294 7.0519
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.428e+00 2.351e-02 401.032 < 2e-16 ***
## Fridge.Volume 1.887e-02 8.000e-04 23.586 < 2e-16 ***
## TownClassGOLD -3.091e-01 3.039e-02 -10.172 < 2e-16 ***
## TownClassSILVER -2.611e-01 2.640e-02 -9.891 < 2e-16 ***
## TownClassTITANIUM -1.180e-01 2.374e-02 -4.970 6.69e-07 ***
## RetailerClassCHEMIST -2.051e-01 2.823e-02 -7.265 3.75e-13 ***
## RetailerClassFOOD STORE 1.001e+00 5.312e-02 18.850 < 2e-16 ***
## RetailerClassHIGH END GROCER 1.195e+00 5.031e-02 23.760 < 2e-16 ***
## RetailerClassLOW END GROCER -2.955e-02 2.233e-02 -1.323 0.185696
## StateGujarat 3.678e-02 1.237e-02 2.973 0.002953 **
## StateMadhya Pradesh -3.678e-02 1.239e-02 -2.969 0.002983 **
## StateMaharashtra 1.990e-01 1.181e-02 16.847 < 2e-16 ***
## Fridge.Volume:TownClassGOLD -1.699e-03 7.921e-04 -2.145 0.031981 *
## Fridge.Volume:TownClassSILVER 4.358e-04 8.938e-04 0.488 0.625832
## Fridge.Volume:TownClassTITANIUM -5.771e-03 7.475e-04 -7.721 1.16e-14 ***
## Fridge.Volume:RetailerClassCHEMIST -1.167e-03 1.072e-03 -1.089 0.276292
## Fridge.Volume:RetailerClassFOOD STORE -8.203e-03 8.194e-04 -10.011 < 2e-16 ***
## Fridge.Volume:RetailerClassHIGH END GROCER -7.770e-03 8.029e-04 -9.677 < 2e-16 ***
## Fridge.Volume:RetailerClassLOW END GROCER -4.090e-03 7.655e-04 -5.343 9.16e-08 ***
## Fridge.Volume:StateGujarat -6.634e-03 3.382e-04 -19.615 < 2e-16 ***
## Fridge.Volume:StateMadhya Pradesh -5.642e-03 3.436e-04 -16.418 < 2e-16 ***
## Fridge.Volume:StateMaharashtra -8.075e-03 3.330e-04 -24.251 < 2e-16 ***
## TownClassGOLD:RetailerClassCHEMIST 5.642e-01 3.907e-02 14.443 < 2e-16 ***
## TownClassSILVER:RetailerClassCHEMIST 3.545e-01 3.633e-02 9.759 < 2e-16 ***
## TownClassTITANIUM:RetailerClassCHEMIST 7.581e-01 3.244e-02 23.366 < 2e-16 ***
## TownClassGOLD:RetailerClassFOOD STORE 9.628e-01 6.554e-02 14.691 < 2e-16 ***
## TownClassSILVER:RetailerClassFOOD STORE 4.666e-01 6.955e-02 6.709 1.97e-11 ***
## TownClassTITANIUM:RetailerClassFOOD STORE 9.582e-01 5.776e-02 16.589 < 2e-16 ***
## TownClassGOLD:RetailerClassHIGH END GROCER 7.471e-01 5.743e-02 13.010 < 2e-16 ***
## TownClassSILVER:RetailerClassHIGH END GROCER 4.833e-01 5.903e-02 8.186 2.71e-16 ***
## TownClassTITANIUM:RetailerClassHIGH END GROCER 7.370e-01 5.275e-02 13.970 < 2e-16 ***
## TownClassGOLD:RetailerClassLOW END GROCER 1.855e-01 3.216e-02 5.768 8.02e-09 ***
## TownClassSILVER:RetailerClassLOW END GROCER 1.891e-02 2.843e-02 0.665 0.506017
## TownClassTITANIUM:RetailerClassLOW END GROCER 1.341e-01 2.564e-02 5.230 1.70e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassCHEMIST -3.205e-03 1.126e-03 -2.846 0.004432 **
## Fridge.Volume:TownClassSILVER:RetailerClassCHEMIST -4.369e-03 1.224e-03 -3.570 0.000358 ***
## Fridge.Volume:TownClassTITANIUM:RetailerClassCHEMIST -9.652e-04 1.085e-03 -0.889 0.373773
## Fridge.Volume:TownClassGOLD:RetailerClassFOOD STORE 7.756e-04 8.913e-04 0.870 0.384187
## Fridge.Volume:TownClassSILVER:RetailerClassFOOD STORE -4.106e-04 1.000e-03 -0.411 0.681376
## Fridge.Volume:TownClassTITANIUM:RetailerClassFOOD STORE 4.364e-03 8.396e-04 5.198 2.02e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassHIGH END GROCER 8.209e-05 8.671e-04 0.095 0.924573
## Fridge.Volume:TownClassSILVER:RetailerClassHIGH END GROCER -1.154e-03 9.808e-04 -1.177 0.239202
## Fridge.Volume:TownClassTITANIUM:RetailerClassHIGH END GROCER 3.750e-03 8.204e-04 4.571 4.87e-06 ***
## Fridge.Volume:TownClassGOLD:RetailerClassLOW END GROCER 1.072e-03 8.316e-04 1.289 0.197304
## Fridge.Volume:TownClassSILVER:RetailerClassLOW END GROCER 2.918e-03 9.434e-04 3.093 0.001981 **
## Fridge.Volume:TownClassTITANIUM:RetailerClassLOW END GROCER 2.145e-03 7.823e-04 2.742 0.006106 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.805 on 231169 degrees of freedom
## Multiple R-squared: 0.3659, Adjusted R-squared: 0.3658
## F-statistic: 2965 on 45 and 231169 DF, p-value: < 2.2e-16
attach(fulldata.or)
A <- fit5_new$coefficients
write.csv(A,"A.csv")
getwd()
## [1] "C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT"
#Change in Percentage Sales for "100 litres" increment in Fridge volume
img1 <- readJPEG("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT/pic/equation.jpg", native=FALSE)
grid.raster(img1)

#Change in Percentage Sales for "100 litres" increment in Fridge volume
img2 <- readJPEG("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT/pic/table.jpg", native=FALSE)
grid.raster(img2)

#Insights Derived from the Tables
#Installing Fridges in the "Silver Town Class" for "Others" type of retailers seem most beneficial
#Installing Fridges in the Tier 3 Towns (Rest of Urban) in Chemists is also beneficial. It is because, Chemist is a moderate Sales driver and will be a good place to invest in. Fridge can also be used to store major drugs
#The -ve coefficients are not surprising as it implies most of the shops in those categories have fridge / air-conditioner already. Or, the demand for chocolate is saturated, so fridge will not result in any additional sale
#For Revenue Improvement: Target Gujrat. Invest more on Chemists and Other Retailers over the whole state
#For Society Welfare: Target Chattisgarh and MP (revenue generation is much low).Invest more on Chemists and Other Retailers over Silver and Gold Town Classes
#Some More Graphical Insights
#As Fridge Volume Increases, OTHERS Retailers drive more revenue - FRIDGE UPGRADATION TARGET
#FOOD STORE and HIGH END GROCER drive higher revenue at low Fridge Volume- NEW FRIDGE INSTALLATION TARGET
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "RetailerClass",
main.title= "Interaction of Fridge Volume and Retailer Class")

#Some More Graphical Insights
#REST OF URBAN & GOLD should be targetted for both FRIDGE UPGRADATION OR NEW FRIDGE INSTALLATION
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "TownClass",
main.title= "Interaction of Fridge Volume and Town Class")

#Some More Graphical Insights
#CHATTISGARH shows highest potential for FRIDGE INSTALLATION / UPGRADATION followed by MP
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "State",
main.title= "Interaction of Fridge Volume and State")

#Results
#This model has 37.91% adjusted R-squared which is an improvement - We have also taken interaction with Has.Fridge instead of Fridge.Vol.
#fulldata.or$Has.Fridge*fulldata.or$RetailerClass - w/0 47.29
#fulldata.or$Has.Fridge*fulldata.or$State - w/o 47.29
#fulldata.or$Has.Fridge*fulldata.or$TownClass - w/o all interaction ~28%
#all FridgeVol interactions ~30%
#fulldata.or$Fridge.Volume:fulldata.or$State w/o - 29.2
#fulldata.or$Fridge.Volume*fulldata.or$RetailerClass w/o - 28.2
#Conclusion: Last model "Best fit model" with adjusted R-squared 47.29%