Project: Analysis of Biscuit & Confectionery Sectors in Indian Market

Group 5

Reading Data

setwd("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT")
salesdata.df <- read.csv("DAM Project final data.csv")
attach(salesdata.df)
head(salesdata.df)
  OutletCode TownClass     Town          State
1          1  TITANIUM     Pune    Maharashtra
2          2      GOLD  Gwalior Madhya Pradesh
3          3  TITANIUM    Nerul    Maharashtra
4          4    SILVER    KORBA    Chattisgarh
5          5  TITANIUM   Kalyan    Maharashtra
6          7    SILVER KANKAVLI    Maharashtra
                              RE                         RE_new
1 ERETAIL                                                OTHERS
2 HIGH END GROCER                HIGH END GROCER               
3 HIGH END GROCER                HIGH END GROCER               
4 HIGH END GROCER                HIGH END GROCER               
5 PANPLUS                                                OTHERS
6 CASH AND CARRY                                         OTHERS
  Fridge.Volume Has.Fridge Annual.Sales Parent.Firm CHOCO1 CHOCO2 CHOCO3
1           340          1    123860.13      71.60%  7.50% 10.90%  1.80%
2            50          1     79153.05      71.10%  6.40% 11.40%  0.50%
3            35          1    227851.06      71.60%  7.50% 10.90%  1.80%
4            35          1     31397.78      62.70%  6.60% 15.10%  0.00%
5            50          1     24210.18      71.60%  7.50% 10.90%  1.80%
6             0          0     20226.00      71.60%  7.50% 10.90%  1.80%
  CHOCO4 Sales.New
1  1.60% 123860.13
2  3.20%  79153.05
3  1.60% 227851.06
4  7.20%  31397.78
5  1.60%  24210.18
6  1.60%  20226.00
summary(salesdata.df$Sales.New)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0    6333   18058   46288   48606  454670 

Model 1: To check whether sales is affected by availability of fridge or not

fit <- lm(salesdata.df$Sales.New ~ salesdata.df$Has.Fridge)
summary(fit)

Call:
lm(formula = salesdata.df$Sales.New ~ salesdata.df$Has.Fridge)

Residuals:
   Min     1Q Median     3Q    Max 
-88047 -25221  -8762   4419 440030 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)              14640.1      185.6   78.87   <2e-16 ***
salesdata.df$Has.Fridge  73406.6      282.7  259.68   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 68950 on 242593 degrees of freedom
Multiple R-squared:  0.2175,    Adjusted R-squared:  0.2175 
F-statistic: 6.743e+04 on 1 and 242593 DF,  p-value: < 2.2e-16

Removing Outliers

plot of chunk unnamed-chunk-3

Model 2

fit <- lm(salesdata.df$Sales.New ~ salesdata.df$Fridge.Volume + salesdata.df$TownClass+ salesdata.df$RE_new + salesdata.df$State)
summary(fit)

Call:
lm(formula = salesdata.df$Sales.New ~ salesdata.df$Fridge.Volume + 
    salesdata.df$TownClass + salesdata.df$RE_new + salesdata.df$State)

Residuals:
    Min      1Q  Median      3Q     Max 
-222745  -21202   -9116    9748  442371 

Coefficients:
                                                    Estimate Std. Error
(Intercept)                                        37349.896    824.868
salesdata.df$Fridge.Volume                           208.280      1.476
salesdata.df$TownClassREST OF URBAN                -5281.205    516.728
salesdata.df$TownClassSilver                      -15764.138   1640.069
salesdata.df$TownClassSILVER                       -4505.138    427.396
salesdata.df$TownClassTITANIUM                      4171.070    334.984
salesdata.df$RE_newFOOD STORE                     109467.875    681.865
salesdata.df$RE_newHIGH END GROCER                 84741.221    604.127
salesdata.df$RE_newLOW END GROCER                 -16152.731    390.243
salesdata.df$RE_newOTHERS                          -5342.958    565.947
salesdata.df$StateGujarat                          -4392.833    789.323
salesdata.df$StateMadhya Pradesh                   -5565.949    803.649
salesdata.df$StateMaharashtra                       2078.860    762.080
                                                  t value Pr(>|t|)    
(Intercept)                                        45.280  < 2e-16 ***
salesdata.df$Fridge.Volume                        141.115  < 2e-16 ***
salesdata.df$TownClassREST OF URBAN               -10.220  < 2e-16 ***
salesdata.df$TownClassSilver                       -9.612  < 2e-16 ***
salesdata.df$TownClassSILVER                      -10.541  < 2e-16 ***
salesdata.df$TownClassTITANIUM                     12.452  < 2e-16 ***
salesdata.df$RE_newFOOD STORE                     160.542  < 2e-16 ***
salesdata.df$RE_newHIGH END GROCER                140.270  < 2e-16 ***
salesdata.df$RE_newLOW END GROCER                 -41.391  < 2e-16 ***
salesdata.df$RE_newOTHERS                          -9.441  < 2e-16 ***
salesdata.df$StateGujarat                          -5.565 2.62e-08 ***
salesdata.df$StateMadhya Pradesh                   -6.926 4.35e-12 ***
salesdata.df$StateMaharashtra                       2.728  0.00637 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 62320 on 232132 degrees of freedom
Multiple R-squared:  0.3786,    Adjusted R-squared:  0.3786 
F-statistic: 1.179e+04 on 12 and 232132 DF,  p-value: < 2.2e-16

Model 3

fit <- lm(log(salesdata.df$Sales.New) ~ salesdata.df$Fridge.Volume + salesdata.df$TownClass+ salesdata.df$RE_new + salesdata.df$State +  salesdata.df$TownClass*salesdata.df$Has.Fridge+salesdata.df$RE_new*salesdata.df$Has.Fridge)
#salesdata.df$State*salesdata.df$Has.Fridge
summary(fit)

Call:
lm(formula = log(salesdata.df$Sales.New) ~ salesdata.df$Fridge.Volume + 
    salesdata.df$TownClass + salesdata.df$RE_new + salesdata.df$State + 
    salesdata.df$TownClass * salesdata.df$Has.Fridge + salesdata.df$RE_new * 
    salesdata.df$Has.Fridge)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.8892 -0.6048  0.0563  0.6561  4.0255 

Coefficients:
                                                                            Estimate
(Intercept)                                                                9.149e+00
salesdata.df$Fridge.Volume                                                -1.921e-04
salesdata.df$TownClassREST OF URBAN                                        2.813e-01
salesdata.df$TownClassSilver                                               3.385e-01
salesdata.df$TownClassSILVER                                               3.153e-02
salesdata.df$TownClassTITANIUM                                             1.080e-01
salesdata.df$RE_newFOOD STORE                                              6.400e-01
salesdata.df$RE_newHIGH END GROCER                                         1.135e+00
salesdata.df$RE_newLOW END GROCER                                         -1.086e-01
salesdata.df$RE_newOTHERS                                                 -1.261e-01
salesdata.df$StateGujarat                                                 -2.112e-02
salesdata.df$StateMadhya Pradesh                                          -1.330e-01
salesdata.df$StateMaharashtra                                              8.226e-03
salesdata.df$Has.Fridge                                                    1.785e+00
salesdata.df$TownClassREST OF URBAN:salesdata.df$Has.Fridge               -3.743e-01
salesdata.df$TownClassSilver:salesdata.df$Has.Fridge                      -7.386e-01
salesdata.df$TownClassSILVER:salesdata.df$Has.Fridge                      -5.891e-02
salesdata.df$TownClassTITANIUM:salesdata.df$Has.Fridge                    -3.564e-02
salesdata.df$RE_newFOOD STORE                    :salesdata.df$Has.Fridge  1.613e-01
salesdata.df$RE_newHIGH END GROCER               :salesdata.df$Has.Fridge -4.677e-01
salesdata.df$RE_newLOW END GROCER                :salesdata.df$Has.Fridge -3.555e-01
salesdata.df$RE_newOTHERS:salesdata.df$Has.Fridge                         -8.576e-02
                                                                          Std. Error
(Intercept)                                                                1.418e-02
salesdata.df$Fridge.Volume                                                 2.636e-05
salesdata.df$TownClassREST OF URBAN                                        9.962e-03
salesdata.df$TownClassSilver                                               3.295e-02
salesdata.df$TownClassSILVER                                               8.487e-03
salesdata.df$TownClassTITANIUM                                             7.176e-03
salesdata.df$RE_newFOOD STORE                                              3.074e-02
salesdata.df$RE_newHIGH END GROCER                                         2.792e-02
salesdata.df$RE_newLOW END GROCER                                          8.173e-03
salesdata.df$RE_newOTHERS                                                  1.128e-02
salesdata.df$StateGujarat                                                  1.237e-02
salesdata.df$StateMadhya Pradesh                                           1.258e-02
salesdata.df$StateMaharashtra                                              1.194e-02
salesdata.df$Has.Fridge                                                    1.372e-02
salesdata.df$TownClassREST OF URBAN:salesdata.df$Has.Fridge                1.744e-02
salesdata.df$TownClassSilver:salesdata.df$Has.Fridge                       5.241e-02
salesdata.df$TownClassSILVER:salesdata.df$Has.Fridge                       1.391e-02
salesdata.df$TownClassTITANIUM:salesdata.df$Has.Fridge                     1.025e-02
salesdata.df$RE_newFOOD STORE                    :salesdata.df$Has.Fridge  3.314e-02
salesdata.df$RE_newHIGH END GROCER               :salesdata.df$Has.Fridge  3.012e-02
salesdata.df$RE_newLOW END GROCER                :salesdata.df$Has.Fridge  1.232e-02
salesdata.df$RE_newOTHERS:salesdata.df$Has.Fridge                          1.818e-02
                                                                          t value
(Intercept)                                                               645.378
salesdata.df$Fridge.Volume                                                 -7.290
salesdata.df$TownClassREST OF URBAN                                        28.233
salesdata.df$TownClassSilver                                               10.276
salesdata.df$TownClassSILVER                                                3.715
salesdata.df$TownClassTITANIUM                                             15.056
salesdata.df$RE_newFOOD STORE                                              20.820
salesdata.df$RE_newHIGH END GROCER                                         40.633
salesdata.df$RE_newLOW END GROCER                                         -13.288
salesdata.df$RE_newOTHERS                                                 -11.176
salesdata.df$StateGujarat                                                  -1.707
salesdata.df$StateMadhya Pradesh                                          -10.572
salesdata.df$StateMaharashtra                                               0.689
salesdata.df$Has.Fridge                                                   130.110
salesdata.df$TownClassREST OF URBAN:salesdata.df$Has.Fridge               -21.461
salesdata.df$TownClassSilver:salesdata.df$Has.Fridge                      -14.091
salesdata.df$TownClassSILVER:salesdata.df$Has.Fridge                       -4.237
salesdata.df$TownClassTITANIUM:salesdata.df$Has.Fridge                     -3.478
salesdata.df$RE_newFOOD STORE                    :salesdata.df$Has.Fridge   4.868
salesdata.df$RE_newHIGH END GROCER               :salesdata.df$Has.Fridge -15.527
salesdata.df$RE_newLOW END GROCER                :salesdata.df$Has.Fridge -28.849
salesdata.df$RE_newOTHERS:salesdata.df$Has.Fridge                          -4.717
                                                                          Pr(>|t|)
(Intercept)                                                                < 2e-16
salesdata.df$Fridge.Volume                                                3.12e-13
salesdata.df$TownClassREST OF URBAN                                        < 2e-16
salesdata.df$TownClassSilver                                               < 2e-16
salesdata.df$TownClassSILVER                                              0.000203
salesdata.df$TownClassTITANIUM                                             < 2e-16
salesdata.df$RE_newFOOD STORE                                              < 2e-16
salesdata.df$RE_newHIGH END GROCER                                         < 2e-16
salesdata.df$RE_newLOW END GROCER                                          < 2e-16
salesdata.df$RE_newOTHERS                                                  < 2e-16
salesdata.df$StateGujarat                                                 0.087731
salesdata.df$StateMadhya Pradesh                                           < 2e-16
salesdata.df$StateMaharashtra                                             0.490941
salesdata.df$Has.Fridge                                                    < 2e-16
salesdata.df$TownClassREST OF URBAN:salesdata.df$Has.Fridge                < 2e-16
salesdata.df$TownClassSilver:salesdata.df$Has.Fridge                       < 2e-16
salesdata.df$TownClassSILVER:salesdata.df$Has.Fridge                      2.27e-05
salesdata.df$TownClassTITANIUM:salesdata.df$Has.Fridge                    0.000505
salesdata.df$RE_newFOOD STORE                    :salesdata.df$Has.Fridge 1.13e-06
salesdata.df$RE_newHIGH END GROCER               :salesdata.df$Has.Fridge  < 2e-16
salesdata.df$RE_newLOW END GROCER                :salesdata.df$Has.Fridge  < 2e-16
salesdata.df$RE_newOTHERS:salesdata.df$Has.Fridge                         2.39e-06

(Intercept)                                                               ***
salesdata.df$Fridge.Volume                                                ***
salesdata.df$TownClassREST OF URBAN                                       ***
salesdata.df$TownClassSilver                                              ***
salesdata.df$TownClassSILVER                                              ***
salesdata.df$TownClassTITANIUM                                            ***
salesdata.df$RE_newFOOD STORE                                             ***
salesdata.df$RE_newHIGH END GROCER                                        ***
salesdata.df$RE_newLOW END GROCER                                         ***
salesdata.df$RE_newOTHERS                                                 ***
salesdata.df$StateGujarat                                                 .  
salesdata.df$StateMadhya Pradesh                                          ***
salesdata.df$StateMaharashtra                                                
salesdata.df$Has.Fridge                                                   ***
salesdata.df$TownClassREST OF URBAN:salesdata.df$Has.Fridge               ***
salesdata.df$TownClassSilver:salesdata.df$Has.Fridge                      ***
salesdata.df$TownClassSILVER:salesdata.df$Has.Fridge                      ***
salesdata.df$TownClassTITANIUM:salesdata.df$Has.Fridge                    ***
salesdata.df$RE_newFOOD STORE                    :salesdata.df$Has.Fridge ***
salesdata.df$RE_newHIGH END GROCER               :salesdata.df$Has.Fridge ***
salesdata.df$RE_newLOW END GROCER                :salesdata.df$Has.Fridge ***
salesdata.df$RE_newOTHERS:salesdata.df$Has.Fridge                         ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9753 on 232123 degrees of freedom
Multiple R-squared:  0.4749,    Adjusted R-squared:  0.4748 
F-statistic:  9996 on 21 and 232123 DF,  p-value: < 2.2e-16

Model 4

setwd("C:/Users/Anshumaan/Desktop/2018 DAM/PROJECT")
numerical_data<- read.csv("DAM_Numerical_data.csv")
numerical_data= numerical_data[c(numerical_data$Sales.New>=1000),]
m <- lm(log(Sales.New) ~ Has.Fridge +RE_new +State + TownClass +Has.Fridge*TownClass+Has.Fridge*RE_new, data = numerical_data)
summary(m)

Call:
lm(formula = log(Sales.New) ~ Has.Fridge + RE_new + State + TownClass + 
    Has.Fridge * TownClass + Has.Fridge * RE_new, data = numerical_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5831 -0.6505  0.0296  0.6691  4.0785 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           8.948786   0.010538 849.177   <2e-16 ***
Has.Fridge            0.888639   0.014389  61.758   <2e-16 ***
RE_new                0.052954   0.003972  13.330   <2e-16 ***
State                 0.067595   0.002576  26.242   <2e-16 ***
TownClass            -0.026170   0.002717  -9.633   <2e-16 ***
Has.Fridge:TownClass  0.121734   0.004440  27.418   <2e-16 ***
Has.Fridge:RE_new     0.240614   0.004985  48.264   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.016 on 232138 degrees of freedom
Multiple R-squared:  0.4297,    Adjusted R-squared:  0.4297 
F-statistic: 2.915e+04 on 6 and 232138 DF,  p-value: < 2.2e-16