Email - abhirana101@gmail.com

College - Goa Institute of Management

Objective: Our objective here is to analyse and predict the sales in Big Mart considering different variable such as product visibility, fat content, store location, type of outlet etc. We will use different statistical models to support this analysis and suggestions on how to improve sales at Big Mart. See project report for further analysis. –

setwd("C:/Users/Abhi/Desktop/Data Analytics/Project")
BigMart<-read.csv(paste("Big Mart Sales Project.csv", sep="TRUE"))
head(BigMart)
##   Item_Identifier Item_Weight Item_Fat_Content Item_Visibility
## 1           FDA15       9.300          Low Fat      0.01604730
## 2           DRC01       5.920          Regular      0.01927822
## 3           FDN15      17.500          Low Fat      0.01676007
## 4           FDX07      19.200          Regular      0.00000000
## 5           NCD19       8.930          Low Fat      0.00000000
## 6           FDP36      10.395          Regular      0.00000000
##               Item_Type Item_MRP Outlet_Identifier
## 1                 Dairy 249.8092            OUT049
## 2           Soft Drinks  48.2692            OUT018
## 3                  Meat 141.6180            OUT049
## 4 Fruits and Vegetables 182.0950            OUT010
## 5             Household  53.8614            OUT013
## 6          Baking Goods  51.4008            OUT018
##   Outlet_Establishment_Year Outlet_Size Outlet_Location_Type
## 1                      1999      Medium               Tier 1
## 2                      2009      Medium               Tier 3
## 3                      1999      Medium               Tier 1
## 4                      1998                           Tier 3
## 5                      1987        High               Tier 3
## 6                      2009      Medium               Tier 3
##         Outlet_Type Item_Outlet_Sales
## 1 Supermarket Type1         3735.1380
## 2 Supermarket Type2          443.4228
## 3 Supermarket Type1         2097.2700
## 4     Grocery Store          732.3800
## 5 Supermarket Type1          994.7052
## 6 Supermarket Type2          556.6088
str(BigMart)
## 'data.frame':    8523 obs. of  12 variables:
##  $ Item_Identifier          : Factor w/ 1559 levels "DRA12","DRA24",..: 157 9 663 1122 1298 759 697 739 441 991 ...
##  $ Item_Weight              : num  9.3 5.92 17.5 19.2 8.93 ...
##  $ Item_Fat_Content         : Factor w/ 5 levels "LF","low fat",..: 3 5 3 5 3 5 5 3 5 5 ...
##  $ Item_Visibility          : num  0.016 0.0193 0.0168 0 0 ...
##  $ Item_Type                : Factor w/ 16 levels "Baking Goods",..: 5 15 11 7 10 1 14 14 6 6 ...
##  $ Item_MRP                 : num  249.8 48.3 141.6 182.1 53.9 ...
##  $ Outlet_Identifier        : Factor w/ 10 levels "OUT010","OUT013",..: 10 4 10 1 2 4 2 6 8 3 ...
##  $ Outlet_Establishment_Year: int  1999 2009 1999 1998 1987 2009 1987 1985 2002 2007 ...
##  $ Outlet_Size              : Factor w/ 4 levels "","High","Medium",..: 3 3 3 1 2 3 2 3 1 1 ...
##  $ Outlet_Location_Type     : Factor w/ 3 levels "Tier 1","Tier 2",..: 1 3 1 3 3 3 3 3 2 2 ...
##  $ Outlet_Type              : Factor w/ 4 levels "Grocery Store",..: 2 3 2 1 2 3 2 4 2 2 ...
##  $ Item_Outlet_Sales        : num  3735 443 2097 732 995 ...
summary(BigMart)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  FDG33  :  10    Min.   : 4.555   LF     : 316     Min.   :0.00000  
##  FDW13  :  10    1st Qu.: 8.774   low fat: 112     1st Qu.:0.02699  
##  DRE49  :   9    Median :12.600   Low Fat:5089     Median :0.05393  
##  DRN47  :   9    Mean   :12.858   reg    : 117     Mean   :0.06613  
##  FDD38  :   9    3rd Qu.:16.850   Regular:2889     3rd Qu.:0.09459  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.32839  
##  (Other):8467    NA's   :1463                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :2410   Tier 1:2388         
##  1st Qu.:1987              High  : 932   Tier 2:2785         
##  Median :1999              Medium:2793   Tier 3:3350         
##  Mean   :1998              Small :2388                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
lp<- BigMart[order(-BigMart$Item_Outlet_Sales), ]
lp[1:10, 10:12]
##      Outlet_Location_Type       Outlet_Type Item_Outlet_Sales
## 7189               Tier 3 Supermarket Type3         13086.965
## 5224               Tier 3 Supermarket Type3         12117.560
## 1451               Tier 3 Supermarket Type3         11445.102
## 4350               Tier 3 Supermarket Type3         10993.690
## 6542               Tier 3 Supermarket Type3         10306.584
## 4889               Tier 3 Supermarket Type1         10256.649
## 1010               Tier 3 Supermarket Type3         10236.675
## 7192               Tier 3 Supermarket Type3         10072.888
## 4290               Tier 1 Supermarket Type1          9779.936
## 7753               Tier 3 Supermarket Type3          9678.069
lp<- BigMart[order(BigMart$Item_Outlet_Sales), ]
lp[1:10, 10:12]
##      Outlet_Location_Type   Outlet_Type Item_Outlet_Sales
## 907                Tier 3 Grocery Store           33.2900
## 6951               Tier 3 Grocery Store           33.2900
## 2572               Tier 1 Grocery Store           33.9558
## 3054               Tier 1 Grocery Store           34.6216
## 7389               Tier 3 Grocery Store           35.2874
## 4266               Tier 3 Grocery Store           36.6190
## 7613               Tier 1 Grocery Store           36.6190
## 2056               Tier 1 Grocery Store           37.2848
## 490                Tier 1 Grocery Store           37.9506
## 575                Tier 1 Grocery Store           37.9506
library(car)
attach(BigMart)
boxplot(Item_Outlet_Sales ~ Outlet_Location_Type, horizontal=TRUE,
        ylab="Sales", xlab="Ourlet location", las=1,
        main="Comparision of sales at different outlet locations",
        col=c("yellow","pink")
        )

attach(BigMart)
## The following objects are masked from BigMart (pos = 3):
## 
##     Item_Fat_Content, Item_Identifier, Item_MRP,
##     Item_Outlet_Sales, Item_Type, Item_Visibility, Item_Weight,
##     Outlet_Establishment_Year, Outlet_Identifier,
##     Outlet_Location_Type, Outlet_Size, Outlet_Type
boxplot(Item_Outlet_Sales ~ Outlet_Type, horizontal=TRUE,
        ylab="Sales", xlab="Ourlet location", las=1,
        main="Comparision of sales at different outlet locations",
        col=c("yellow","pink")
        )

pairs(formula = ~ Item_Fat_Content + Item_Type + Item_Outlet_Sales + Item_Visibility, cex=0.6,col = c("green", "red"),
data=BigMart,main="Scatter plot matrix")

ttable <- with(BigMart, table(BigMart$Item_Type))
ttable
## 
##          Baking Goods                Breads             Breakfast 
##                   648                   251                   110 
##                Canned                 Dairy          Frozen Foods 
##                   649                   682                   856 
## Fruits and Vegetables           Hard Drinks    Health and Hygiene 
##                  1232                   214                   520 
##             Household                  Meat                Others 
##                   910                   425                   169 
##               Seafood           Snack Foods           Soft Drinks 
##                    64                  1200                   445 
##         Starchy Foods 
##                   148
ttable1 <- with(BigMart, table(BigMart$Outlet_Location_Type))
ttable1
## 
## Tier 1 Tier 2 Tier 3 
##   2388   2785   3350
ttable3 <- with(BigMart, table(BigMart$Outlet_Type))
ttable3
## 
##     Grocery Store Supermarket Type1 Supermarket Type2 Supermarket Type3 
##              1083              5577               928               935
ttable4 <- with(BigMart, table(BigMart$Outlet_Size))
ttable4
## 
##          High Medium  Small 
##   2410    932   2793   2388
ttable5 <- with(BigMart, table(BigMart$Outlet_Establishment_Year))
ttable5
## 
## 1985 1987 1997 1998 1999 2002 2004 2007 2009 
## 1463  932  930  555  930  929  930  926  928
ttable6 <- with(BigMart, table(BigMart$Item_Fat_Content))
ttable6
## 
##      LF low fat Low Fat     reg Regular 
##     316     112    5089     117    2889
ttable7 <- with(BigMart, table(BigMart$Outlet_Identifier))
ttable7
## 
## OUT010 OUT013 OUT017 OUT018 OUT019 OUT027 OUT035 OUT045 OUT046 OUT049 
##    555    932    926    928    528    935    930    929    930    930
ttable8 <- xtabs(~ Item_Type + Item_Fat_Content , data=BigMart)
ttable8
##                        Item_Fat_Content
## Item_Type                LF low fat Low Fat reg Regular
##   Baking Goods           20       8     301  13     306
##   Breads                  8       6     126   3     108
##   Breakfast               2       0      39   2      67
##   Canned                 17      10     314  11     297
##   Dairy                  24      12     382   6     258
##   Frozen Foods           22       4     424  19     387
##   Fruits and Vegetables  39      11     580  25     577
##   Hard Drinks             8       7     199   0       0
##   Health and Hygiene     29      10     481   0       0
##   Household              54      16     840   0       0
##   Meat                   10       1     159   7     248
##   Others                 11       2     156   0       0
##   Seafood                 4       1      32   0      27
##   Snack Foods            31      16     645  23     485
##   Soft Drinks            28       7     339   1      70
##   Starchy Foods           9       1      72   7      59
ttable9 <- xtabs(~ Item_Fat_Content + Outlet_Type, data=BigMart)
ttable9
##                 Outlet_Type
## Item_Fat_Content Grocery Store Supermarket Type1 Supermarket Type2
##          LF                 45               192                42
##          low fat            18                70                 9
##          Low Fat           642              3347               547
##          reg                14                79                18
##          Regular           364              1889               312
##                 Outlet_Type
## Item_Fat_Content Supermarket Type3
##          LF                     37
##          low fat                15
##          Low Fat               553
##          reg                     6
##          Regular               324
ttable10 <- xtabs(~ Item_Outlet_Sales + Outlet_Type, data=BigMart)
head(ttable10)
##                  Outlet_Type
## Item_Outlet_Sales Grocery Store Supermarket Type1 Supermarket Type2
##           33.29               2                 0                 0
##           33.9558             1                 0                 0
##           34.6216             1                 0                 0
##           35.2874             1                 0                 0
##           36.619              2                 0                 0
##           37.2848             1                 0                 0
##                  Outlet_Type
## Item_Outlet_Sales Supermarket Type3
##           33.29                   0
##           33.9558                 0
##           34.6216                 0
##           35.2874                 0
##           36.619                  0
##           37.2848                 0
lm1<- lm(Item_Outlet_Sales ~ Outlet_Size+Outlet_Location_Type+Outlet_Type, data = BigMart)
summary(lm1)
## 
## Call:
## lm(formula = Item_Outlet_Sales ~ Outlet_Size + Outlet_Location_Type + 
##     Outlet_Type, data = BigMart)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3452.4 -1064.0  -187.1   672.1  9392.9 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    167.90      88.13   1.905  0.05681 .  
## Outlet_SizeHigh                 22.13     113.75   0.195  0.84576    
## Outlet_SizeMedium              242.94      91.30   2.661  0.00781 ** 
## Outlet_SizeSmall               172.43      59.79   2.884  0.00393 ** 
## Outlet_Location_TypeTier 2     161.00      69.00   2.333  0.01966 *  
## Outlet_Location_TypeTier 3     171.45     108.43   1.581  0.11386    
## Outlet_TypeSupermarket Type1  1937.51      81.08  23.896  < 2e-16 ***
## Outlet_TypeSupermarket Type2  1413.20     121.29  11.652  < 2e-16 ***
## Outlet_TypeSupermarket Type3  3111.74     121.21  25.671  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1488 on 8514 degrees of freedom
## Multiple R-squared:  0.2404, Adjusted R-squared:  0.2397 
## F-statistic: 336.9 on 8 and 8514 DF,  p-value: < 2.2e-16
lm2 <- lm(Item_Outlet_Sales ~ Item_Visibility+Item_Fat_Content+Item_Type, data = BigMart)
summary(lm2)
## 
## Call:
## lm(formula = Item_Outlet_Sales ~ Item_Visibility + Item_Fat_Content + 
##     Item_Type, data = BigMart)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2588.3 -1280.0  -395.1   892.6 10600.6 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     2150.4486   118.1314  18.204  < 2e-16 ***
## Item_Visibility                -4363.3529   356.2054 -12.250  < 2e-16 ***
## Item_Fat_Contentlow fat           -0.5135   185.9362  -0.003 0.997796    
## Item_Fat_ContentLow Fat           78.6273    98.0024   0.802 0.422402    
## Item_Fat_Contentreg             -145.7729   183.8594  -0.793 0.427887    
## Item_Fat_ContentRegular          149.8037   101.7608   1.472 0.141027    
## Item_TypeBreads                  240.6050   125.6100   1.915 0.055463 .  
## Item_TypeBreakfast               218.9292   174.3768   1.255 0.209334    
## Item_TypeCanned                  267.9014    93.8065   2.856 0.004302 ** 
## Item_TypeDairy                   298.6998    92.7822   3.219 0.001290 ** 
## Item_TypeFrozen Foods            165.4176    87.9758   1.880 0.060106 .  
## Item_TypeFruits and Vegetables   333.2940    81.9670   4.066 4.82e-05 ***
## Item_TypeHard Drinks             199.0411   134.8087   1.476 0.139855    
## Item_TypeHealth and Hygiene       27.7581   101.6905   0.273 0.784885    
## Item_TypeHousehold               303.3366    89.3042   3.397 0.000685 ***
## Item_TypeMeat                    165.8670   105.5726   1.571 0.116192    
## Item_TypeOthers                  -34.0289   147.3961  -0.231 0.817423    
## Item_TypeSeafood                 400.2604   221.3749   1.808 0.070632 .  
## Item_TypeSnack Foods             318.5573    82.4004   3.866 0.000111 ***
## Item_TypeSoft Drinks              52.0696   104.9553   0.496 0.619828    
## Item_TypeStarchy Foods           427.6159   153.9879   2.777 0.005499 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1689 on 8502 degrees of freedom
## Multiple R-squared:  0.02266,    Adjusted R-squared:  0.02036 
## F-statistic: 9.854 on 20 and 8502 DF,  p-value: < 2.2e-16
cm1 <- BigMart[, c(2,4,6,12)]
cor(cm1)
##                   Item_Weight Item_Visibility     Item_MRP
## Item_Weight                 1              NA           NA
## Item_Visibility            NA     1.000000000 -0.001314848
## Item_MRP                   NA    -0.001314848  1.000000000
## Item_Outlet_Sales          NA    -0.128624612  0.567574447
##                   Item_Outlet_Sales
## Item_Weight                      NA
## Item_Visibility          -0.1286246
## Item_MRP                  0.5675744
## Item_Outlet_Sales         1.0000000
library(corrgram)
corrgram(cm1, order=TRUE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         text.panel=panel.txt,
         main="Corrgram")

library(lattice)
histogram(~Item_Outlet_Sales | Item_Fat_Content + Item_Type, data=BigMart,
          type="count", 
          layout=c(3,3), 
          col=c("burlywood", "darkolivegreen", "red", "yellow", "peachpuff", "blue", "Green"))