#Introduction

#1. In last few years, growth in chocolate market has been tremendous in terms of value and volume.**

#2. There has been an increase in international brands and extension of product portfolios of the existing players in the market.**

#3. Sales in this sector is driven by several factors such as type of retail stores, town, availability of refrigerator etc.**

#4. Objective : Identify how the presence and volume of the refrigerations affects the sales of chocolates**

#5. For the scope of this project Sales Data for major chocolate manufacturers has been taken**

#6. Sales data is from 4 major Indian states : Chattisgarh, Gujarat, Madhya Pradesh and Maharashtra**
knitr::opts_chunk$set(echo = TRUE, fig.width=12, fig.height=8)
library(data.table)
library(car)
library(plyr)
library(nortest) #Shapiro-Wilks & Anderson-Darling test
library(caret) #BoxCox Transformation 
library(lmtest)
library(gplots)
library(shiny)
library(png)
library(grid)
library(jtools)
library(jpeg)
#OBJECTIVE
#To study the effect of presence of refrigerator on sales of Chocolates in India

#To study the relative impact of factors such as fridge Volume, Retailer type, town etc. on sales of `Chocolates`
myfile <-file.choose()
fulldata <-read.csv(myfile)
#Summary: few Data points
head(fulldata)
##   OutletCode TownClass     Town          State
## 1          1  TITANIUM     Pune    Maharashtra
## 2          2      GOLD  Gwalior Madhya Pradesh
## 3          3  TITANIUM    Nerul    Maharashtra
## 4          4    SILVER    KORBA    Chattisgarh
## 5          5  TITANIUM   Kalyan    Maharashtra
## 6          7    SILVER KANKAVLI    Maharashtra
##                    RetailerClass Fridge.Volume Has.Fridge     Sales
## 1                         OTHERS           340          1 123860.13
## 2 HIGH END GROCER                           50          1  79153.05
## 3 HIGH END GROCER                           35          1 227851.06
## 4 HIGH END GROCER                           35          1  31397.78
## 5                         OTHERS            50          1  24210.18
## 6                         OTHERS             0          0  20226.00
attach(fulldata)

#Variable Description
#Outlet Code- Unique Code specific to Outlets
#Town Class - Titanium , Gold , Silver and Rest of Urban depending on the following two factors:
#1) Revenue generated on an average from that Town
#2) Disposable Income of the individuals staying there
#Titanium being highest, followed by Gold, Silver and Rest of Urban
#Town - The town where the outlet concerned is located
#State- The state where the outlet concerned is located 
#RetailerClass- Only the major 4 categories of the retailers are retained and the remaining low frequency categories are clubbed under "OTHERS" 
#Fridge- The volume of the Fridge in Litres. 0 meaning No Fridge 
#Has Fridge - Binary variable: 1 -Has Fridge, 0 -No Fridge 
#Sales New - Y Variable of concerned - Annual Sales generated by each store 
#Parent Firm  - The revenue share of the parent firm in that shop as compared with all the Chocolate firms which supply their products in that shop
#Data Structure
options(width = 10000)
psych::describe(fulldata)[,c(2,3,4,5,11,12,13)]
##                     n      mean       sd    median  skew kurtosis     se
## OutletCode     242595 126617.51 72905.95 126633.00 -0.01    -1.20 148.02
## TownClass*     242595      2.98     1.23      4.00 -0.69    -1.21   0.00
## Town*          242595    116.32    67.58    136.00 -0.24    -1.04   0.14
## State*         242595      3.28     0.94      4.00 -0.82    -0.82   0.00
## RetailerClass* 242595      3.52     1.15      4.00 -1.26     0.43   0.00
## Fridge.Volume  242595     46.15    95.63      0.00  2.40     4.38   0.19
## Has.Fridge     242595      0.43     0.50      0.00  0.28    -1.92   0.00
## Sales          242595  46288.43 77949.27  18058.44  3.34    12.43 158.26
#Removing Outliers
#Manually Removing points with sales less than 1000
library(gplots)
fulldata = fulldata[c(fulldata$Sales>1000),]
boxplot(fulldata$Sales, main = "Annual Sales: After Outlier Removal",
        xlab = "Annual Sales", horizontal = TRUE)

attach(fulldata)
## The following objects are masked from fulldata (pos = 3):
## 
##     Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
View(fulldata)
dim(fulldata)
## [1] 232143      8
colnames(fulldata)
## [1] "OutletCode"    "TownClass"     "Town"          "State"         "RetailerClass" "Fridge.Volume" "Has.Fridge"    "Sales"
#After Removing Sales below 1000
boxplot(fulldata$Sales , main = "Distribution of Annual Sales",
        xlab = "Annual Sales", horizontal = TRUE)

#Removing Outliers
#We have 232,145 outlets information now
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
hist(fulldata$Sales, main = "Distribution of Sales", xlab = "Annual Sales")

#Interaction Plot
knitr::opts_chunk$set(fig.width=12, fig.height=18, fig.align= "center")
interaction.plot(Fridge.Volume, TownClass, Sales,data=fulldata,
                 main = "Interaction Plot of Fridge Volume and Town Class", 
                 xlab = "Fridge Volume", ylab = "Annual Sales",
                 col=c("red","black","green", "blue"),
                 fixed=TRUE, lwd = 5,
                 leg.bty = "o")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## Warning in axis(1, x, ...): "data" is not a graphical parameter

#Conclusion

#At higher fridge volmes, sales varies significantly as compared to lower fridge volumes for different town classes
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
boxplot(Sales ~ Has.Fridge, data = fulldata,
        main = "Distribution of sales with and without fridge",
        xlab = "Annual Sales", ylab = "Has Fridge?",
        horizontal = FALSE)

#Plots
plot(fulldata$Fridge.Volume,fulldata$Sales,xlab="Fridge Vol", ylab="Sales")

#Fridge Vol VS Average Sales

#Given the average sales based on fridge (has or not), we notice that having fridge increases sales significantly to close to Rs. 90,000 from Rs. 15,000

salesvol<-aggregate(fulldata$Sales, by=list(FridgeVolume=fulldata$Fridge.Volume), mean)
plot(salesvol, xlab="Fridge Volume", ylab="Average Sales",main ="Variation of Average Sales with Fridge Volume ")

#To confirm that presence of fridge effect sales
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
aggregate(fulldata$Sales, by=list(FridgeVolume=fulldata$Has.Fridge), mean)
##   FridgeVolume        x
## 1            0 15622.28
## 2            1 89334.69
boxplot(fulldata$Sales[fulldata$Has.Fridge==0],fulldata$Sales[fulldata$Has.Fridge==1], main="1 -> No Fridge, 2 -> Has Fridge", ylab="Annual Sales")

#Main sales drivers
#1. Through this we found that food store and high end grocers drive the highest sales
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=26, fig.align= "center")
par(cex.axis=1)
AvgSales_RetailerClass=aggregate(fulldata$Sales, by=list(RE=fulldata$RetailerClass), mean)
plot(AvgSales_RetailerClass, xlab="RetailerClass", ylab="Average Sales",las=0)

#It was found that Maharashtra and Madhya Pradesh drive the highest sales
par(cex.axis=1)
AvgSales_State=aggregate(fulldata$Sales, by=list(State=fulldata$State), mean)
plot(AvgSales_State, xlab="State", ylab="Average Sales",las=0)

#Main sales drivers: Visualization
options(width = 10000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
plotmeans(Sales ~ TownClass, data = fulldata,
          main = "Mean plot of Sales by Town Class",
          xlab = "Town Class", ylab = "Annual Sales",
          mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a graphical parameter

fulldata$Has.Fridge <- as.factor(fulldata$Has.Fridge)
fulldata$TownClass <- as.factor(fulldata$TownClass)
fulldata$Has.State <- as.factor(fulldata$State)
fulldata$RetailerClass <- as.factor(fulldata$RetailerClass)
levels(fulldata$TownClass)
## [1] "GOLD"          "REST OF URBAN" "SILVER"        "TITANIUM"
fulldata$TownClass <- relevel(fulldata$TownClass, ref = "REST OF URBAN")
fulldata$RetailerClass <- relevel(fulldata$RetailerClass, ref = "OTHERS")
options(width = 10000)
#Regression Model 1
fit1 <- lm(fulldata$Sales ~ fulldata$Fridge.Volume)

#Model 1: Regression Output: We tried to find out Effect of presence of Fridge in sales revenue

#Conclusion: The p value < .05 , implying that the variables Sales and Fridge.Volume are significantly related. With 1 litre increment in fridge colume, the annual sales of the shop increase by 360 Rs INR.
options(width = 10000)
#Regression Model 2
fit2 <- lm(Sales ~ Fridge.Volume + TownClass+ RetailerClass + State)
summary(fit2)
## 
## Call:
## lm(formula = Sales ~ Fridge.Volume + TownClass + RetailerClass + 
##     State)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -222621  -21155   -9093    9673  442787 
## 
## Coefficients:
##                                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                  37539.998    824.478  45.532  < 2e-16 ***
## Fridge.Volume                                  208.271      1.476 141.095  < 2e-16 ***
## TownClassREST OF URBAN                       -5279.439    516.782 -10.216  < 2e-16 ***
## TownClassSILVER                              -4979.501    421.724 -11.807  < 2e-16 ***
## TownClassTITANIUM                             4203.844    334.986  12.549  < 2e-16 ***
## RetailerClassFOOD STORE                     109406.753    681.875 160.450  < 2e-16 ***
## RetailerClassHIGH END GROCER                 84771.770    604.172 140.311  < 2e-16 ***
## RetailerClassLOW END GROCER                 -16145.923    390.282 -41.370  < 2e-16 ***
## RetailerClassOTHERS                          -5330.029    566.002  -9.417  < 2e-16 ***
## StateGujarat                                 -4532.301    789.137  -5.743 9.29e-09 ***
## StateMadhya Pradesh                          -5613.364    803.701  -6.984 2.87e-12 ***
## StateMaharashtra                              1796.006    761.021   2.360   0.0183 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62330 on 232131 degrees of freedom
## Multiple R-squared:  0.3785, Adjusted R-squared:  0.3785 
## F-statistic: 1.285e+04 on 11 and 232131 DF,  p-value: < 2.2e-16
#Model 2: Effect of other factors -Linear Linear 
#Conclusion : The p-value of the model is less that .05, implying the X variables are related with Y variables. All the X variables are related to the Y variables as their individual P values are < .05. The adjusted R square value of the model is 37%
options(width = 10000)
#Regression Model 3: Considering Interactions
fit3<- lm(Sales ~ Fridge.Volume + TownClass+ RetailerClass + State +  TownClass*Fridge.Volume+RetailerClass*Fridge.Volume )
#Model Fit3 : With interactions considered, the Adjusted R square value improved fom 37% to 38.92% 
options(width = 10000)
library(gplots)
Model_new <-step(fit3,trace=0,steps=1000)
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
plot(Model_new,5)

#fit 3 is declared the best model by step function
#Checking outliers
#We have sales data for 232145 outlets currently
#966 outliers were detected by the outlierTest 
#After removing outliers, data for 231179 outlets remained
#After removing outliers, the adjusted R-squared has improved from 38.92% to 41.67% even for the same model

outliers <- outlierTest(Model_new, n.max=1000, order=FALSE, digits = 3)
rownums <- mapply(names(outliers$bonf.p), FUN=as.numeric)*-1
fulldata.or <- fulldata[c(rownums),]
x<-dim(fulldata.or)
options(width = 10000)

fit4 <- lm(fulldata.or$Sales ~ fulldata.or$Fridge.Volume + fulldata.or$TownClass+ fulldata.or$RetailerClass + fulldata.or$State +  fulldata.or$TownClass*fulldata.or$Fridge.Volume+fulldata.or$RetailerClass*fulldata.or$Fridge.Volume)

#Testing for NonLiearity
#From Anderson Test, it was found that Residuals are not following a normal distribution since p <.05**
library(gplots)
ad.test(fulldata.or$Sales)
## 
##  Anderson-Darling normality test
## 
## data:  fulldata.or$Sales
## A = 29883, p-value < 2.2e-16
#BoxCox Transformation
#Lambda Value is 0, so log transform was taken on Sales
Sales_BoxCox<- BoxCoxTrans(fulldata.or$Sales)
Sales_BoxCox
## Box-Cox Transformation
## 
## 231215 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1000    7346   19466   46759   50360  454670 
## 
## Largest/Smallest: 455 
## Sample Skewness: 3.31 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations
fulldata.or$Sales.Log<- log(fulldata.or$Sales)
attach(fulldata.or)
## The following objects are masked from fulldata (pos = 3):
## 
##     Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
## The following objects are masked from fulldata (pos = 4):
## 
##     Fridge.Volume, Has.Fridge, OutletCode, RetailerClass, Sales, State, Town, TownClass
#Best Model
#Running regression after transformation
fit5 <- lm(Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass)
#Best Model Summary
options(width = 10000)
summary(fit5)
## 
## Call:
## lm(formula = Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + 
##     State + TownClass * Fridge.Volume + RetailerClass * Fridge.Volume + 
##     State * Fridge.Volume + RetailerClass * Fridge.Volume * TownClass)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1517 -0.7508  0.0939  0.8261  3.3742 
## 
## Coefficients:
##                                                                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                                                  9.514e+00  3.075e-02 309.461  < 2e-16 ***
## Fridge.Volume                                                                1.015e-02  5.639e-04  17.994  < 2e-16 ***
## TownClassGOLD                                                               -3.184e-01  3.594e-02  -8.859  < 2e-16 ***
## TownClassSILVER                                                             -2.465e-01  3.372e-02  -7.310 2.69e-13 ***
## TownClassTITANIUM                                                           -1.407e-01  2.928e-02  -4.807 1.53e-06 ***
## RetailerClassCHEMIST                                                        -2.494e-01  3.537e-02  -7.051 1.78e-12 ***
## RetailerClassFOOD STORE                                                      9.347e-01  5.997e-02  15.586  < 2e-16 ***
## RetailerClassHIGH END GROCER                                                 1.119e+00  6.283e-02  17.804  < 2e-16 ***
## RetailerClassLOW END GROCER                                                 -7.709e-02  2.893e-02  -2.665 0.007696 ** 
## StateGujarat                                                                 7.670e-03  1.527e-02   0.502 0.615394    
## StateMadhya Pradesh                                                         -3.542e-02  1.552e-02  -2.283 0.022448 *  
## StateMaharashtra                                                             1.702e-01  1.470e-02  11.578  < 2e-16 ***
## Fridge.Volume:TownClassGOLD                                                  2.601e-03  5.324e-04   4.886 1.03e-06 ***
## Fridge.Volume:TownClassSILVER                                                9.168e-04  5.736e-04   1.598 0.109949    
## Fridge.Volume:TownClassTITANIUM                                             -1.160e-03  4.986e-04  -2.327 0.019969 *  
## Fridge.Volume:RetailerClassCHEMIST                                           9.341e-05  6.945e-04   0.134 0.893013    
## Fridge.Volume:RetailerClassFOOD STORE                                       -2.412e-03  6.251e-04  -3.858 0.000114 ***
## Fridge.Volume:RetailerClassHIGH END GROCER                                  -2.001e-03  6.533e-04  -3.062 0.002196 ** 
## Fridge.Volume:RetailerClassLOW END GROCER                                   -9.174e-04  5.165e-04  -1.776 0.075692 .  
## Fridge.Volume:StateGujarat                                                  -4.056e-03  2.915e-04 -13.915  < 2e-16 ***
## Fridge.Volume:StateMadhya Pradesh                                           -3.610e-03  2.958e-04 -12.205  < 2e-16 ***
## Fridge.Volume:StateMaharashtra                                              -4.687e-03  2.880e-04 -16.276  < 2e-16 ***
## TownClassGOLD:RetailerClassCHEMIST                                           5.973e-01  4.494e-02  13.291  < 2e-16 ***
## TownClassSILVER:RetailerClassCHEMIST                                         3.591e-01  4.447e-02   8.074 6.84e-16 ***
## TownClassTITANIUM:RetailerClassCHEMIST                                       7.805e-01  3.824e-02  20.409  < 2e-16 ***
## TownClassGOLD:RetailerClassFOOD STORE                                        1.076e+00  7.403e-02  14.528  < 2e-16 ***
## TownClassSILVER:RetailerClassFOOD STORE                                      5.357e-01  7.393e-02   7.246 4.30e-13 ***
## TownClassTITANIUM:RetailerClassFOOD STORE                                    1.072e+00  6.459e-02  16.598  < 2e-16 ***
## TownClassGOLD:RetailerClassHIGH END GROCER                                   8.353e-01  7.124e-02  11.725  < 2e-16 ***
## TownClassSILVER:RetailerClassHIGH END GROCER                                 5.829e-01  7.388e-02   7.889 3.05e-15 ***
## TownClassTITANIUM:RetailerClassHIGH END GROCER                               8.468e-01  6.582e-02  12.865  < 2e-16 ***
## TownClassGOLD:RetailerClassLOW END GROCER                                    2.196e-01  3.775e-02   5.816 6.03e-09 ***
## TownClassSILVER:RetailerClassLOW END GROCER                                  5.408e-02  3.583e-02   1.509 0.131234    
## TownClassTITANIUM:RetailerClassLOW END GROCER                                1.572e-01  3.117e-02   5.043 4.60e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassCHEMIST                            -3.846e-03  7.394e-04  -5.201 1.98e-07 ***
## Fridge.Volume:TownClassSILVER:RetailerClassCHEMIST                          -2.026e-03  7.934e-04  -2.553 0.010673 *  
## Fridge.Volume:TownClassTITANIUM:RetailerClassCHEMIST                        -1.434e-03  7.052e-04  -2.034 0.041955 *  
## Fridge.Volume:TownClassGOLD:RetailerClassFOOD STORE                         -4.155e-03  6.849e-04  -6.066 1.31e-09 ***
## Fridge.Volume:TownClassSILVER:RetailerClassFOOD STORE                       -1.482e-03  7.254e-04  -2.042 0.041108 *  
## Fridge.Volume:TownClassTITANIUM:RetailerClassFOOD STORE                     -9.815e-04  6.410e-04  -1.531 0.125745    
## Fridge.Volume:TownClassGOLD:RetailerClassHIGH END GROCER                    -4.808e-03  7.021e-04  -6.847 7.54e-12 ***
## Fridge.Volume:TownClassSILVER:RetailerClassHIGH END GROCER                  -2.590e-03  7.538e-04  -3.436 0.000590 ***
## Fridge.Volume:TownClassTITANIUM:RetailerClassHIGH END GROCER                -1.717e-03  6.668e-04  -2.575 0.010014 *  
## Fridge.Volume:TownClassGOLD:RetailerClassLOW END GROCER                     -2.641e-03  5.653e-04  -4.672 2.99e-06 ***
## Fridge.Volume:TownClassSILVER:RetailerClassLOW END GROCER                   -6.287e-04  6.109e-04  -1.029 0.303458    
## Fridge.Volume:TownClassTITANIUM:RetailerClassLOW END GROCER                 -4.664e-04  5.283e-04  -0.883 0.377337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.118 on 231169 degrees of freedom
## Multiple R-squared:  0.2981, Adjusted R-squared:  0.298 
## F-statistic:  2182 on 45 and 231169 DF,  p-value: < 2.2e-16
#Checking Linearity
#Anderson Test
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
library(gplots)
plot(fit5,1)

ad.test(fulldata.or$Sales.Log)
## 
##  Anderson-Darling normality test
## 
## data:  fulldata.or$Sales.Log
## A = 219.84, p-value < 2.2e-16
#Model does not seem to be linear visually
#The normality has been solved, however linearity still remains an issue, due to which the adjusted R square has gone down to 29.6%.
#Checking Normality
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.align= "center")
library(gplots)
plot(fit5,2)

#Data seems to be deviating from the normality at the extreme points, however normality has improved by great bounds compared to fit4 model
#Data Does not have multicollinearity
#Checking  heteroskedasticity
a1<-bptest(fit5)
a1
## 
##  studentized Breusch-Pagan test
## 
## data:  fit5
## BP = 7563.5, df = 45, p-value < 2.2e-16
b1<-ncvTest(fit5)
b1
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1610.473, Df = 1, p = < 2.22e-16
#Data has heteroskedasticity
#Removing Heteroskedasticity
## FGLs Log- linear with Intaction
# Step 1:Residuals of linear OLS Model
LogOLSModelRes <- resid(fit5)

# Step 2: Taking square of the residuals of linear OLS Model
LogOLSModelResSq <- LogOLSModelRes^2

# Step 3: Taking natural log of the squared residuals of linear OLS Model
lnOLSResSq <- log(LogOLSModelResSq)

# Step 4: Running auxiliary OLS Model
auxOLSModel <- lm(lnOLSResSq ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass,data = fulldata.or)

# Step 5: Get fitted value of auxiliary OLS Model i.e. 'auxOLSModel'
fittedValue <- fitted(auxOLSModel)

# Step 6: Compute exponential values of fiited value for auxialiary OLS Model
expValue <- exp(fittedValue)

# Step 7: Fit Log-linear FGLS Model
fit5_new <- lm(Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + State + TownClass*Fridge.Volume + RetailerClass*Fridge.Volume + State*Fridge.Volume +RetailerClass*Fridge.Volume*TownClass,weights = 1/expValue,data = fulldata.or)
# summary of linear FGLS model
summary(fit5_new)
## 
## Call:
## lm(formula = Sales.Log ~ Fridge.Volume + TownClass + RetailerClass + 
##     State + TownClass * Fridge.Volume + RetailerClass * Fridge.Volume + 
##     State * Fridge.Volume + RetailerClass * Fridge.Volume * TownClass, 
##     data = fulldata.or, weights = 1/expValue)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.1158  -1.2257   0.1649   1.3294   7.0519 
## 
## Coefficients:
##                                                                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                                                  9.428e+00  2.351e-02 401.032  < 2e-16 ***
## Fridge.Volume                                                                1.887e-02  8.000e-04  23.586  < 2e-16 ***
## TownClassGOLD                                                               -3.091e-01  3.039e-02 -10.172  < 2e-16 ***
## TownClassSILVER                                                             -2.611e-01  2.640e-02  -9.891  < 2e-16 ***
## TownClassTITANIUM                                                           -1.180e-01  2.374e-02  -4.970 6.69e-07 ***
## RetailerClassCHEMIST                                                        -2.051e-01  2.823e-02  -7.265 3.75e-13 ***
## RetailerClassFOOD STORE                                                      1.001e+00  5.312e-02  18.850  < 2e-16 ***
## RetailerClassHIGH END GROCER                                                 1.195e+00  5.031e-02  23.760  < 2e-16 ***
## RetailerClassLOW END GROCER                                                 -2.955e-02  2.233e-02  -1.323 0.185696    
## StateGujarat                                                                 3.678e-02  1.237e-02   2.973 0.002953 ** 
## StateMadhya Pradesh                                                         -3.678e-02  1.239e-02  -2.969 0.002983 ** 
## StateMaharashtra                                                             1.990e-01  1.181e-02  16.847  < 2e-16 ***
## Fridge.Volume:TownClassGOLD                                                 -1.699e-03  7.921e-04  -2.145 0.031981 *  
## Fridge.Volume:TownClassSILVER                                                4.358e-04  8.938e-04   0.488 0.625832    
## Fridge.Volume:TownClassTITANIUM                                             -5.771e-03  7.475e-04  -7.721 1.16e-14 ***
## Fridge.Volume:RetailerClassCHEMIST                                          -1.167e-03  1.072e-03  -1.089 0.276292    
## Fridge.Volume:RetailerClassFOOD STORE                                       -8.203e-03  8.194e-04 -10.011  < 2e-16 ***
## Fridge.Volume:RetailerClassHIGH END GROCER                                  -7.770e-03  8.029e-04  -9.677  < 2e-16 ***
## Fridge.Volume:RetailerClassLOW END GROCER                                   -4.090e-03  7.655e-04  -5.343 9.16e-08 ***
## Fridge.Volume:StateGujarat                                                  -6.634e-03  3.382e-04 -19.615  < 2e-16 ***
## Fridge.Volume:StateMadhya Pradesh                                           -5.642e-03  3.436e-04 -16.418  < 2e-16 ***
## Fridge.Volume:StateMaharashtra                                              -8.075e-03  3.330e-04 -24.251  < 2e-16 ***
## TownClassGOLD:RetailerClassCHEMIST                                           5.642e-01  3.907e-02  14.443  < 2e-16 ***
## TownClassSILVER:RetailerClassCHEMIST                                         3.545e-01  3.633e-02   9.759  < 2e-16 ***
## TownClassTITANIUM:RetailerClassCHEMIST                                       7.581e-01  3.244e-02  23.366  < 2e-16 ***
## TownClassGOLD:RetailerClassFOOD STORE                                        9.628e-01  6.554e-02  14.691  < 2e-16 ***
## TownClassSILVER:RetailerClassFOOD STORE                                      4.666e-01  6.955e-02   6.709 1.97e-11 ***
## TownClassTITANIUM:RetailerClassFOOD STORE                                    9.582e-01  5.776e-02  16.589  < 2e-16 ***
## TownClassGOLD:RetailerClassHIGH END GROCER                                   7.471e-01  5.743e-02  13.010  < 2e-16 ***
## TownClassSILVER:RetailerClassHIGH END GROCER                                 4.833e-01  5.903e-02   8.186 2.71e-16 ***
## TownClassTITANIUM:RetailerClassHIGH END GROCER                               7.370e-01  5.275e-02  13.970  < 2e-16 ***
## TownClassGOLD:RetailerClassLOW END GROCER                                    1.855e-01  3.216e-02   5.768 8.02e-09 ***
## TownClassSILVER:RetailerClassLOW END GROCER                                  1.891e-02  2.843e-02   0.665 0.506017    
## TownClassTITANIUM:RetailerClassLOW END GROCER                                1.341e-01  2.564e-02   5.230 1.70e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassCHEMIST                            -3.205e-03  1.126e-03  -2.846 0.004432 ** 
## Fridge.Volume:TownClassSILVER:RetailerClassCHEMIST                          -4.369e-03  1.224e-03  -3.570 0.000358 ***
## Fridge.Volume:TownClassTITANIUM:RetailerClassCHEMIST                        -9.652e-04  1.085e-03  -0.889 0.373773    
## Fridge.Volume:TownClassGOLD:RetailerClassFOOD STORE                          7.756e-04  8.913e-04   0.870 0.384187    
## Fridge.Volume:TownClassSILVER:RetailerClassFOOD STORE                       -4.106e-04  1.000e-03  -0.411 0.681376    
## Fridge.Volume:TownClassTITANIUM:RetailerClassFOOD STORE                      4.364e-03  8.396e-04   5.198 2.02e-07 ***
## Fridge.Volume:TownClassGOLD:RetailerClassHIGH END GROCER                     8.209e-05  8.671e-04   0.095 0.924573    
## Fridge.Volume:TownClassSILVER:RetailerClassHIGH END GROCER                  -1.154e-03  9.808e-04  -1.177 0.239202    
## Fridge.Volume:TownClassTITANIUM:RetailerClassHIGH END GROCER                 3.750e-03  8.204e-04   4.571 4.87e-06 ***
## Fridge.Volume:TownClassGOLD:RetailerClassLOW END GROCER                      1.072e-03  8.316e-04   1.289 0.197304    
## Fridge.Volume:TownClassSILVER:RetailerClassLOW END GROCER                    2.918e-03  9.434e-04   3.093 0.001981 ** 
## Fridge.Volume:TownClassTITANIUM:RetailerClassLOW END GROCER                  2.145e-03  7.823e-04   2.742 0.006106 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.805 on 231169 degrees of freedom
## Multiple R-squared:  0.3659, Adjusted R-squared:  0.3658 
## F-statistic:  2965 on 45 and 231169 DF,  p-value: < 2.2e-16
attach(fulldata.or)
A <- fit5_new$coefficients
write.csv(A,"A.csv")
getwd()
## [1] "C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT"
#Change in Percentage Sales for "100 litres" increment in Fridge volume
img1 <- readJPEG("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT/pic/equation.jpg", native=FALSE)
grid.raster(img1)

#Change in Percentage Sales for "100 litres" increment in Fridge volume
img2 <- readJPEG("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT/pic/table.jpg", native=FALSE)
grid.raster(img2)

#Insights Derived from the Tables
#Installing Fridges in the "Silver Town Class" for "Others" type of retailers seem most beneficial
#Installing Fridges in the Tier 3 Towns (Rest of Urban) in Chemists is also beneficial. It is because, Chemist is a moderate Sales driver and will be a good place to invest in. Fridge can also be used to store major drugs
#The -ve coefficients are not surprising as it implies most of the shops in those categories have fridge / air-conditioner already. Or, the demand for chocolate is saturated, so fridge will not result in any additional sale
#For Revenue Improvement: Target Gujrat. Invest more on Chemists and Other Retailers over the whole state
#For Society Welfare: Target Chattisgarh and MP (revenue generation is much low).Invest more on Chemists and Other Retailers over Silver and Gold Town Classes
#Some More Graphical Insights
#As Fridge Volume Increases, OTHERS Retailers drive more revenue - FRIDGE UPGRADATION TARGET
#FOOD STORE and HIGH END GROCER drive higher revenue at low Fridge Volume- NEW FRIDGE INSTALLATION TARGET
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "RetailerClass",
              main.title= "Interaction of Fridge Volume and Retailer Class")

#Some More Graphical Insights
#REST OF URBAN & GOLD should be targetted for both FRIDGE UPGRADATION OR NEW FRIDGE INSTALLATION
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "TownClass",
              main.title= "Interaction of Fridge Volume and Town Class")

#Some More Graphical Insights
#CHATTISGARH shows highest potential for FRIDGE INSTALLATION / UPGRADATION followed by MP
interact_plot(fit5_new, pred = "Fridge.Volume", modx = "State",
              main.title= "Interaction of Fridge Volume and State")

#Results

#This model has 37.91% adjusted R-squared which is an improvement - We have also taken interaction with Has.Fridge instead of Fridge.Vol.

#fulldata.or$Has.Fridge*fulldata.or$RetailerClass - w/0 47.29

#fulldata.or$Has.Fridge*fulldata.or$State - w/o 47.29

#fulldata.or$Has.Fridge*fulldata.or$TownClass - w/o all interaction ~28%

#all FridgeVol interactions ~30%

#fulldata.or$Fridge.Volume:fulldata.or$State w/o - 29.2

#fulldata.or$Fridge.Volume*fulldata.or$RetailerClass w/o - 28.2

#Conclusion: Last model "Best fit model" with adjusted R-squared 47.29%