#Project Title:  Big Market Sales Analysis
#NAME: Sree Lakshmi Addepalli
#EMAIL: lakshi.ana@gmail.com
#COLLEGE / COMPANY: VESIT
#Cleaning the training data
setwd("C:/Users/Lakshmi/Desktop/LakshmiCapstoneProject/bigmart-sales-data")
BigMartSalesData <- read.csv(file="Train.csv",head=TRUE,sep=",")
summary(BigMartSalesData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  FDG33  :  10    Min.   : 4.555   LF     : 316     Min.   :0.00000  
##  FDW13  :  10    1st Qu.: 8.774   low fat: 112     1st Qu.:0.02699  
##  DRE49  :   9    Median :12.600   Low Fat:5089     Median :0.05393  
##  DRN47  :   9    Mean   :12.858   reg    : 117     Mean   :0.06613  
##  FDD38  :   9    3rd Qu.:16.850   Regular:2889     3rd Qu.:0.09459  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.32839  
##  (Other):8467    NA's   :1463                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :2410   Tier 1:2388         
##  1st Qu.:1987              High  : 932   Tier 2:2785         
##  Median :1999              Medium:2793   Tier 3:3350         
##  Mean   :1998              Small :2388                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
#setting the Missing Values of weights with weight of its item identifier if missing else setting it to mean value
test <- function(x){
x[is.na(x)] <- 0 
z <- max(x)
}
y <- aggregate(BigMartSalesData$Item_Weight~BigMartSalesData$Item_Identifier,BigMartSalesData,test)
head(y)
##   BigMartSalesData$Item_Identifier BigMartSalesData$Item_Weight
## 1                            DRA12                       11.600
## 2                            DRA24                       19.350
## 3                            DRA59                        8.270
## 4                            DRB01                        7.390
## 5                            DRB13                        6.115
## 6                            DRB24                        8.785
items_weight_identifier <- vector(mode="list" , length = length(y$`BigMartSalesData$Item_Identifier`))
names(items_weight_identifier) <- y$`BigMartSalesData$Item_Identifier`
for(i in 1:length(y$`BigMartSalesData$Item_Identifier`))
{
items_weight_identifier[[y$`BigMartSalesData$Item_Identifier`[i]]] <- y$`BigMartSalesData$Item_Weight`[i]
}
length(items_weight_identifier)
## [1] 1559
for(i in 1:length(BigMartSalesData$Item_Weight))
{
if(is.na(BigMartSalesData$Item_Weight[i]))

{
     p <- BigMartSalesData$Item_Identifier[i]
 
if(p %in% names(items_weight_identifier)) {
     BigMartSalesData$Item_Weight[i] <- items_weight_identifier[[p]]
}
else
{
     BigMartSalesData$Item_Weight[i] <- 12.858
}
}
}
summary(BigMartSalesData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  FDG33  :  10    Min.   : 4.555   LF     : 316     Min.   :0.00000  
##  FDW13  :  10    1st Qu.: 8.785   low fat: 112     1st Qu.:0.02699  
##  DRE49  :   9    Median :12.650   Low Fat:5089     Median :0.05393  
##  DRN47  :   9    Mean   :12.875   reg    : 117     Mean   :0.06613  
##  FDD38  :   9    3rd Qu.:16.850   Regular:2889     3rd Qu.:0.09459  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.32839  
##  (Other):8467                                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :2410   Tier 1:2388         
##  1st Qu.:1987              High  : 932   Tier 2:2785         
##  Median :1999              Medium:2793   Tier 3:3350         
##  Mean   :1998              Small :2388                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
#Reducing the categories to two variables namely"Low Fat" and "Regular"
BigMartSalesData$Item_Fat_Content <- as.character(BigMartSalesData$Item_Fat_Content)
BigMartSalesData$Item_Fat_Content[BigMartSalesData$Item_Fat_Content=="LF"] <-"Low Fat"
BigMartSalesData$Item_Fat_Content[BigMartSalesData$Item_Fat_Content=="low Fat"] <-"Low Fat"
BigMartSalesData$Item_Fat_Content[BigMartSalesData$Item_Fat_Content=="low fat"] <-"Low Fat"
BigMartSalesData$Item_Fat_Content[BigMartSalesData$Item_Fat_Content=="reg"] <-"Regular"
BigMartSalesData$Item_Fat_Content <- as.factor(BigMartSalesData$Item_Fat_Content)
summary(BigMartSalesData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  FDG33  :  10    Min.   : 4.555   Low Fat:5517     Min.   :0.00000  
##  FDW13  :  10    1st Qu.: 8.785   Regular:3006     1st Qu.:0.02699  
##  DRE49  :   9    Median :12.650                    Median :0.05393  
##  DRN47  :   9    Mean   :12.875                    Mean   :0.06613  
##  FDD38  :   9    3rd Qu.:16.850                    3rd Qu.:0.09459  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.32839  
##  (Other):8467                                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :2410   Tier 1:2388         
##  1st Qu.:1987              High  : 932   Tier 2:2785         
##  Median :1999              Medium:2793   Tier 3:3350         
##  Mean   :1998              Small :2388                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
#setting the visibility level to mean value
for(i in 1:length(BigMartSalesData$Item_Visibility))
{
if(BigMartSalesData$Item_Visibility[i]==0)
{
BigMartSalesData$Item_Visibility[i] <- 0.06613
}
}


#Classifying the outlet size of missing variables to "High", "Medium" , "Low" respectively
library(caTools)
set.seed(100)
BigMartSalesData$Outlet_Size <- as.character(BigMartSalesData$Outlet_Size)
BigMartSubsetSalesData <- subset(BigMartSalesData, BigMartSalesData$Outlet_Size != "")
spl <- sample.split(BigMartSubsetSalesData$Outlet_Size, SplitRatio = 0.8)
Train <- subset(BigMartSubsetSalesData, spl == TRUE)
Test <- subset(BigMartSubsetSalesData, spl == FALSE)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
Train$Outlet_Size <- as.factor(Train$Outlet_Size)
Test$Outlet_Size <- as.factor(Test$Outlet_Size)
SizeForest <- randomForest(Outlet_Size ~.-Item_Outlet_Sales -Item_Identifier,data =  Train,nodesize = 25, ntree = 100)
PredictForest <- predict(SizeForest, newdata = Test)
table(Test$Outlet_Size, PredictForest)
##         PredictForest
##          High Medium Small
##   High    186      0     0
##   Medium    0    559     0
##   Small     0      0   478
BigMartSalesData$Outlet_Size <- predict(SizeForest, newdata =BigMartSalesData)
summary(BigMartSalesData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility   
##  FDG33  :  10    Min.   : 4.555   Low Fat:5517     Min.   :0.003575  
##  FDW13  :  10    1st Qu.: 8.785   Regular:3006     1st Qu.:0.033085  
##  DRE49  :   9    Median :12.650                    Median :0.062517  
##  DRN47  :   9    Mean   :12.875                    Mean   :0.070213  
##  FDD38  :   9    3rd Qu.:16.850                    3rd Qu.:0.094585  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.328391  
##  (Other):8467                                                        
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985              High  : 932   Tier 1:2388         
##  1st Qu.:1987              Medium:5203   Tier 2:2785         
##  Median :1999              Small :2388   Tier 3:3350         
##  Mean   :1998                                                
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
write.csv(BigMartSalesData,"BigMartSalesCleanData.csv")

#similarly doing it for the test set
#Cleaning the test data
setwd("C:/Users/Lakshmi/Desktop/LakshmiCapstoneProject/bigmart-sales-data")
BigMartSalesTestData <- read.csv(file="Test.csv",head=TRUE,sep=",")
summary(BigMartSalesTestData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  DRF48  :   8    Min.   : 4.555   LF     : 206     Min.   :0.00000  
##  FDK57  :   8    1st Qu.: 8.645   low fat:  66     1st Qu.:0.02705  
##  FDN52  :   8    Median :12.500   Low Fat:3396     Median :0.05415  
##  FDP15  :   8    Mean   :12.696   reg    :  78     Mean   :0.06568  
##  FDQ60  :   8    3rd Qu.:16.700   Regular:1935     3rd Qu.:0.09346  
##  FDW10  :   8    Max.   :21.350                    Max.   :0.32364  
##  (Other):5633    NA's   :976                                        
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Snack Foods          : 789   Min.   : 31.99   OUT027 : 624     
##  Fruits and Vegetables: 781   1st Qu.: 94.41   OUT013 : 621     
##  Household            : 638   Median :141.42   OUT035 : 620     
##  Frozen Foods         : 570   Mean   :141.02   OUT046 : 620     
##  Dairy                : 454   3rd Qu.:186.03   OUT049 : 620     
##  Baking Goods         : 438   Max.   :266.59   OUT045 : 619     
##  (Other)              :2011                    (Other):1957     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :1606   Tier 1:1592         
##  1st Qu.:1987              High  : 621   Tier 2:1856         
##  Median :1999              Medium:1862   Tier 3:2233         
##  Mean   :1998              Small :1592                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type  
##  Grocery Store    : 722  
##  Supermarket Type1:3717  
##  Supermarket Type2: 618  
##  Supermarket Type3: 624  
##                          
##                          
## 
#setting the Missing Values of weights with weight of its item identifier if missing else setting it to mean value
test <- function(x){
x[is.na(x)] <- 0 
z <- max(x)
}
y <- aggregate(BigMartSalesTestData$Item_Weight~BigMartSalesTestData$Item_Identifier,BigMartSalesTestData,test)
head(y)
##   BigMartSalesTestData$Item_Identifier BigMartSalesTestData$Item_Weight
## 1                                DRA12                           11.600
## 2                                DRA24                           19.350
## 3                                DRA59                            8.270
## 4                                DRB01                            7.390
## 5                                DRB13                            6.115
## 6                                DRB24                            8.785
items_weight_identifier_test <- vector(mode="list" , length = length(y$`BigMartSalesTestData$Item_Identifier`))
names(items_weight_identifier_test) <- y$`BigMartSalesTestData$Item_Identifier`
for(i in 1:length(y$`BigMartSalesTestData$Item_Identifier`))
{
items_weight_identifier_test[[y$`BigMartSalesTestData$Item_Identifier`[i]]] <- y$`BigMartSalesTestData$Item_Weight`[i]
}
length(items_weight_identifier_test)
## [1] 1543
for(i in 1:length(BigMartSalesTestData$Item_Weight))
{
if(is.na(BigMartSalesTestData$Item_Weight[i]))

{
     p <- BigMartSalesTestData$Item_Identifier[i]
 
if(p %in% names(items_weight_identifier_test)) {
     BigMartSalesTestData$Item_Weight[i] <- items_weight_identifier_test[[p]]
}
else
{
     BigMartSalesTestData$Item_Weight[i] <- 12.696
}
}
}
summary(BigMartSalesTestData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  DRF48  :   8    Min.   : 4.555   LF     : 206     Min.   :0.00000  
##  FDK57  :   8    1st Qu.: 8.630   low fat:  66     1st Qu.:0.02705  
##  FDN52  :   8    Median :12.350   Low Fat:3396     Median :0.05415  
##  FDP15  :   8    Mean   :12.676   reg    :  78     Mean   :0.06568  
##  FDQ60  :   8    3rd Qu.:16.700   Regular:1935     3rd Qu.:0.09346  
##  FDW10  :   8    Max.   :21.350                    Max.   :0.32364  
##  (Other):5633                                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Snack Foods          : 789   Min.   : 31.99   OUT027 : 624     
##  Fruits and Vegetables: 781   1st Qu.: 94.41   OUT013 : 621     
##  Household            : 638   Median :141.42   OUT035 : 620     
##  Frozen Foods         : 570   Mean   :141.02   OUT046 : 620     
##  Dairy                : 454   3rd Qu.:186.03   OUT049 : 620     
##  Baking Goods         : 438   Max.   :266.59   OUT045 : 619     
##  (Other)              :2011                    (Other):1957     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :1606   Tier 1:1592         
##  1st Qu.:1987              High  : 621   Tier 2:1856         
##  Median :1999              Medium:1862   Tier 3:2233         
##  Mean   :1998              Small :1592                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type  
##  Grocery Store    : 722  
##  Supermarket Type1:3717  
##  Supermarket Type2: 618  
##  Supermarket Type3: 624  
##                          
##                          
## 
#Reducing the categories to two variables namely"Low Fat" and "Regular"
BigMartSalesTestData$Item_Fat_Content <- as.character(BigMartSalesTestData$Item_Fat_Content)
BigMartSalesTestData$Item_Fat_Content[BigMartSalesTestData$Item_Fat_Content=="LF"] <-"Low Fat"
BigMartSalesTestData$Item_Fat_Content[BigMartSalesTestData$Item_Fat_Content=="low Fat"] <-"Low Fat"
BigMartSalesTestData$Item_Fat_Content[BigMartSalesTestData$Item_Fat_Content=="low fat"] <-"Low Fat"
BigMartSalesTestData$Item_Fat_Content[BigMartSalesTestData$Item_Fat_Content=="reg"] <-"Regular"
BigMartSalesTestData$Item_Fat_Content <- as.factor(BigMartSalesTestData$Item_Fat_Content)
summary(BigMartSalesTestData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility  
##  DRF48  :   8    Min.   : 4.555   Low Fat:3668     Min.   :0.00000  
##  FDK57  :   8    1st Qu.: 8.630   Regular:2013     1st Qu.:0.02705  
##  FDN52  :   8    Median :12.350                    Median :0.05415  
##  FDP15  :   8    Mean   :12.676                    Mean   :0.06568  
##  FDQ60  :   8    3rd Qu.:16.700                    3rd Qu.:0.09346  
##  FDW10  :   8    Max.   :21.350                    Max.   :0.32364  
##  (Other):5633                                                       
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Snack Foods          : 789   Min.   : 31.99   OUT027 : 624     
##  Fruits and Vegetables: 781   1st Qu.: 94.41   OUT013 : 621     
##  Household            : 638   Median :141.42   OUT035 : 620     
##  Frozen Foods         : 570   Mean   :141.02   OUT046 : 620     
##  Dairy                : 454   3rd Qu.:186.03   OUT049 : 620     
##  Baking Goods         : 438   Max.   :266.59   OUT045 : 619     
##  (Other)              :2011                    (Other):1957     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :1606   Tier 1:1592         
##  1st Qu.:1987              High  : 621   Tier 2:1856         
##  Median :1999              Medium:1862   Tier 3:2233         
##  Mean   :1998              Small :1592                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type  
##  Grocery Store    : 722  
##  Supermarket Type1:3717  
##  Supermarket Type2: 618  
##  Supermarket Type3: 624  
##                          
##                          
## 
#setting the visibility level to mean value
for(i in 1:length(BigMartSalesTestData$Item_Visibility))
{
if(BigMartSalesTestData$Item_Visibility[i]==0)
{
BigMartSalesTestData$Item_Visibility[i] <- 0.06568
}
}
summary(BigMartSalesTestData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility   
##  DRF48  :   8    Min.   : 4.555   Low Fat:3668     Min.   :0.003591  
##  FDK57  :   8    1st Qu.: 8.630   Regular:2013     1st Qu.:0.033208  
##  FDN52  :   8    Median :12.350                    Median :0.062137  
##  FDP15  :   8    Mean   :12.676                    Mean   :0.069765  
##  FDQ60  :   8    3rd Qu.:16.700                    3rd Qu.:0.093463  
##  FDW10  :   8    Max.   :21.350                    Max.   :0.323637  
##  (Other):5633                                                        
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Snack Foods          : 789   Min.   : 31.99   OUT027 : 624     
##  Fruits and Vegetables: 781   1st Qu.: 94.41   OUT013 : 621     
##  Household            : 638   Median :141.42   OUT035 : 620     
##  Frozen Foods         : 570   Mean   :141.02   OUT046 : 620     
##  Dairy                : 454   3rd Qu.:186.03   OUT049 : 620     
##  Baking Goods         : 438   Max.   :266.59   OUT045 : 619     
##  (Other)              :2011                    (Other):1957     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985                    :1606   Tier 1:1592         
##  1st Qu.:1987              High  : 621   Tier 2:1856         
##  Median :1999              Medium:1862   Tier 3:2233         
##  Mean   :1998              Small :1592                       
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type  
##  Grocery Store    : 722  
##  Supermarket Type1:3717  
##  Supermarket Type2: 618  
##  Supermarket Type3: 624  
##                          
##                          
## 
#Classifying the outlet size of missing variables to "High", "Medium" , "Low" respectively
library(caTools)
set.seed(100)
BigMartSalesTestData$Outlet_Size <- as.character(BigMartSalesTestData$Outlet_Size)
BigMartSubsetSalesTestData <- subset(BigMartSalesTestData, BigMartSalesTestData$Outlet_Size != "")
spl <- sample.split(BigMartSubsetSalesTestData$Outlet_Size, SplitRatio = 0.8)
Train_t <- subset(BigMartSubsetSalesTestData, spl == TRUE)
Test_t <- subset(BigMartSubsetSalesTestData, spl == FALSE)
library(randomForest)
Train_t$Outlet_Size <- as.factor(Train_t$Outlet_Size)
Test_t$Outlet_Size <- as.factor(Test_t$Outlet_Size)
SizeForest_t <- randomForest(Outlet_Size ~.-Item_Identifier,data =  Train_t,nodesize = 25, ntree = 100)
PredictForest_t <- predict(SizeForest_t, newdata = Test_t)
table(Test_t$Outlet_Size, PredictForest_t)
##         PredictForest_t
##          High Medium Small
##   High    124      0     0
##   Medium    0    372     0
##   Small     0      0   318
BigMartSalesTestData$Outlet_Size <- predict(SizeForest_t, newdata =BigMartSalesTestData)
summary(BigMartSalesTestData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility   
##  DRF48  :   8    Min.   : 4.555   Low Fat:3668     Min.   :0.003591  
##  FDK57  :   8    1st Qu.: 8.630   Regular:2013     1st Qu.:0.033208  
##  FDN52  :   8    Median :12.350                    Median :0.062137  
##  FDP15  :   8    Mean   :12.676                    Mean   :0.069765  
##  FDQ60  :   8    3rd Qu.:16.700                    3rd Qu.:0.093463  
##  FDW10  :   8    Max.   :21.350                    Max.   :0.323637  
##  (Other):5633                                                        
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Snack Foods          : 789   Min.   : 31.99   OUT027 : 624     
##  Fruits and Vegetables: 781   1st Qu.: 94.41   OUT013 : 621     
##  Household            : 638   Median :141.42   OUT035 : 620     
##  Frozen Foods         : 570   Mean   :141.02   OUT046 : 620     
##  Dairy                : 454   3rd Qu.:186.03   OUT049 : 620     
##  Baking Goods         : 438   Max.   :266.59   OUT045 : 619     
##  (Other)              :2011                    (Other):1957     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985              High  : 621   Tier 1:1592         
##  1st Qu.:1987              Medium:3468   Tier 2:1856         
##  Median :1999              Small :1592   Tier 3:2233         
##  Mean   :1998                                                
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type  
##  Grocery Store    : 722  
##  Supermarket Type1:3717  
##  Supermarket Type2: 618  
##  Supermarket Type3: 624  
##                          
##                          
## 
write.csv(BigMartSalesTestData,"BigMartSalesCleanTestData.csv")
#Doing all analysis on Training  Data

#Task 1: Read your dataset in R and visualize the length and breadth of your dataset.
BigMartsFinalData <- read.csv(file="BigMartSalesCleanData.csv",head=TRUE,sep=",")
head(BigMartsFinalData)
##   X Item_Identifier Item_Weight Item_Fat_Content Item_Visibility
## 1 1           FDA15       9.300          Low Fat      0.01604730
## 2 2           DRC01       5.920          Regular      0.01927822
## 3 3           FDN15      17.500          Low Fat      0.01676007
## 4 4           FDX07      19.200          Regular      0.06613000
## 5 5           NCD19       8.930          Low Fat      0.06613000
## 6 6           FDP36      10.395          Regular      0.06613000
##               Item_Type Item_MRP Outlet_Identifier
## 1                 Dairy 249.8092            OUT049
## 2           Soft Drinks  48.2692            OUT018
## 3                  Meat 141.6180            OUT049
## 4 Fruits and Vegetables 182.0950            OUT010
## 5             Household  53.8614            OUT013
## 6          Baking Goods  51.4008            OUT018
##   Outlet_Establishment_Year Outlet_Size Outlet_Location_Type
## 1                      1999      Medium               Tier 1
## 2                      2009      Medium               Tier 3
## 3                      1999      Medium               Tier 1
## 4                      1998      Medium               Tier 3
## 5                      1987        High               Tier 3
## 6                      2009      Medium               Tier 3
##         Outlet_Type Item_Outlet_Sales
## 1 Supermarket Type1         3735.1380
## 2 Supermarket Type2          443.4228
## 3 Supermarket Type1         2097.2700
## 4     Grocery Store          732.3800
## 5 Supermarket Type1          994.7052
## 6 Supermarket Type2          556.6088
nrow(BigMartsFinalData)
## [1] 8523
ncol(BigMartsFinalData)
## [1] 13
#Task 2:Create a descriptive statistics (min, max, median etc) of each variable.
summary(BigMartsFinalData)
##        X        Item_Identifier  Item_Weight     Item_Fat_Content
##  Min.   :   1   FDG33  :  10    Min.   : 4.555   Low Fat:5517    
##  1st Qu.:2132   FDW13  :  10    1st Qu.: 8.785   Regular:3006    
##  Median :4262   DRE49  :   9    Median :12.650                   
##  Mean   :4262   DRN47  :   9    Mean   :12.875                   
##  3rd Qu.:6392   FDD38  :   9    3rd Qu.:16.850                   
##  Max.   :8523   FDF52  :   9    Max.   :21.350                   
##                 (Other):8467                                     
##  Item_Visibility                    Item_Type       Item_MRP     
##  Min.   :0.003575   Fruits and Vegetables:1232   Min.   : 31.29  
##  1st Qu.:0.033085   Snack Foods          :1200   1st Qu.: 93.83  
##  Median :0.062517   Household            : 910   Median :143.01  
##  Mean   :0.070213   Frozen Foods         : 856   Mean   :140.99  
##  3rd Qu.:0.094585   Dairy                : 682   3rd Qu.:185.64  
##  Max.   :0.328391   Canned               : 649   Max.   :266.89  
##                     (Other)              :2994                   
##  Outlet_Identifier Outlet_Establishment_Year Outlet_Size  
##  OUT027 : 935      Min.   :1985              High  : 932  
##  OUT013 : 932      1st Qu.:1987              Medium:5203  
##  OUT035 : 930      Median :1999              Small :2388  
##  OUT046 : 930      Mean   :1998                           
##  OUT049 : 930      3rd Qu.:2004                           
##  OUT045 : 929      Max.   :2009                           
##  (Other):2937                                             
##  Outlet_Location_Type            Outlet_Type   Item_Outlet_Sales 
##  Tier 1:2388          Grocery Store    :1083   Min.   :   33.29  
##  Tier 2:2785          Supermarket Type1:5577   1st Qu.:  834.25  
##  Tier 3:3350          Supermarket Type2: 928   Median : 1794.33  
##                       Supermarket Type3: 935   Mean   : 2181.29  
##                                                3rd Qu.: 3101.30  
##                                                Max.   :13086.97  
## 
#Task 3: Create one-way contingency tables for the categorical variables in your dataset.
# Category 1: Item Fat Content
my_item_fat_content_table <- with(BigMartsFinalData,table(BigMartsFinalData$Item_Fat_Content))
my_item_fat_content_table
## 
## Low Fat Regular 
##    5517    3006
prop.table(my_item_fat_content_table)*100
## 
##  Low Fat  Regular 
## 64.73073 35.26927
#Category 2:Item Type
my_item_type_table <- with(BigMartsFinalData,table(BigMartsFinalData$Item_Type))
my_item_type_table
## 
##          Baking Goods                Breads             Breakfast 
##                   648                   251                   110 
##                Canned                 Dairy          Frozen Foods 
##                   649                   682                   856 
## Fruits and Vegetables           Hard Drinks    Health and Hygiene 
##                  1232                   214                   520 
##             Household                  Meat                Others 
##                   910                   425                   169 
##               Seafood           Snack Foods           Soft Drinks 
##                    64                  1200                   445 
##         Starchy Foods 
##                   148
prop.table(my_item_type_table)*100
## 
##          Baking Goods                Breads             Breakfast 
##             7.6029567             2.9449724             1.2906254 
##                Canned                 Dairy          Frozen Foods 
##             7.6146897             8.0018773            10.0434119 
## Fruits and Vegetables           Hard Drinks    Health and Hygiene 
##            14.4550041             2.5108530             6.1011381 
##             Household                  Meat                Others 
##            10.6769917             4.9865071             1.9828699 
##               Seafood           Snack Foods           Soft Drinks 
##             0.7509093            14.0795495             5.2211663 
##         Starchy Foods 
##             1.7364778
#Category 3:Outlet Identifier
my_outlet_identifier_table <- with(BigMartsFinalData,table(BigMartsFinalData$Outlet_Identifier))
my_outlet_identifier_table
## 
## OUT010 OUT013 OUT017 OUT018 OUT019 OUT027 OUT035 OUT045 OUT046 OUT049 
##    555    932    926    928    528    935    930    929    930    930
prop.table(my_outlet_identifier_table)*100
## 
##    OUT010    OUT013    OUT017    OUT018    OUT019    OUT027    OUT035 
##  6.511792 10.935117 10.864719 10.888185  6.195002 10.970316 10.911651 
##    OUT045    OUT046    OUT049 
## 10.899918 10.911651 10.911651
#Category 4:Outlet Establishment Year
my_outlet_establishment_year_table <- with(BigMartsFinalData,table(BigMartsFinalData$Outlet_Establishment_Year))
my_outlet_establishment_year_table
## 
## 1985 1987 1997 1998 1999 2002 2004 2007 2009 
## 1463  932  930  555  930  929  930  926  928
prop.table(my_outlet_establishment_year_table)*100
## 
##      1985      1987      1997      1998      1999      2002      2004 
## 17.165317 10.935117 10.911651  6.511792 10.911651 10.899918 10.911651 
##      2007      2009 
## 10.864719 10.888185
#Category 5:Outlet Size
my_outlet_size_table <- with(BigMartsFinalData,table(BigMartsFinalData$Outlet_Size))
my_outlet_size_table
## 
##   High Medium  Small 
##    932   5203   2388
prop.table(my_outlet_size_table)*100
## 
##     High   Medium    Small 
## 10.93512 61.04658 28.01830
#Category 6:Outlet_Location_Type
my_outlet_location_type_table <- with(BigMartsFinalData,table(BigMartsFinalData$Outlet_Location_Type))
my_outlet_location_type_table
## 
## Tier 1 Tier 2 Tier 3 
##   2388   2785   3350
prop.table(my_outlet_location_type_table)*100
## 
##   Tier 1   Tier 2   Tier 3 
## 28.01830 32.67629 39.30541
#Category 7:Outlet_Type
my_outlet_type_table <- with(BigMartsFinalData,table(BigMartsFinalData$Outlet_Type))
my_outlet_type_table
## 
##     Grocery Store Supermarket Type1 Supermarket Type2 Supermarket Type3 
##              1083              5577               928               935
prop.table(my_outlet_type_table)*100
## 
##     Grocery Store Supermarket Type1 Supermarket Type2 Supermarket Type3 
##          12.70679          65.43471          10.88818          10.97032
#Task 4: Create two-way contingency tables for the categorical variables in your dataset.

#4.1 Item Fat Content vs Item Type
my_table_1 <- xtabs(~ Item_Fat_Content+Item_Type, data=BigMartsFinalData)
my_table_1
##                 Item_Type
## Item_Fat_Content Baking Goods Breads Breakfast Canned Dairy Frozen Foods
##          Low Fat          329    140        41    341   418          450
##          Regular          319    111        69    308   264          406
##                 Item_Type
## Item_Fat_Content Fruits and Vegetables Hard Drinks Health and Hygiene
##          Low Fat                   630         214                520
##          Regular                   602           0                  0
##                 Item_Type
## Item_Fat_Content Household Meat Others Seafood Snack Foods Soft Drinks
##          Low Fat       910  170    169      37         692         374
##          Regular         0  255      0      27         508          71
##                 Item_Type
## Item_Fat_Content Starchy Foods
##          Low Fat            82
##          Regular            66
prop.table(my_table_1)
##                 Item_Type
## Item_Fat_Content Baking Goods      Breads   Breakfast      Canned
##          Low Fat  0.038601431 0.016426141 0.004810513 0.040009386
##          Regular  0.037428136 0.013023583 0.008095741 0.036137510
##                 Item_Type
## Item_Fat_Content       Dairy Frozen Foods Fruits and Vegetables
##          Low Fat 0.049043764  0.052798310           0.073917635
##          Regular 0.030975009  0.047635809           0.070632406
##                 Item_Type
## Item_Fat_Content Hard Drinks Health and Hygiene   Household        Meat
##          Low Fat 0.025108530        0.061011381 0.106769917 0.019946028
##          Regular 0.000000000        0.000000000 0.000000000 0.029919043
##                 Item_Type
## Item_Fat_Content      Others     Seafood Snack Foods Soft Drinks
##          Low Fat 0.019828699 0.004341194 0.081192069 0.043881262
##          Regular 0.000000000 0.003167899 0.059603426 0.008330400
##                 Item_Type
## Item_Fat_Content Starchy Foods
##          Low Fat   0.009621025
##          Regular   0.007743752
#4.2 Item Fat Content vs Outlet Identifier
my_table_2 <- xtabs(~ Item_Fat_Content+Outlet_Identifier, data=BigMartsFinalData)
my_table_2
##                 Outlet_Identifier
## Item_Fat_Content OUT010 OUT013 OUT017 OUT018 OUT019 OUT027 OUT035 OUT045
##          Low Fat    359    606    594    598    346    605    606    609
##          Regular    196    326    332    330    182    330    324    320
##                 Outlet_Identifier
## Item_Fat_Content OUT046 OUT049
##          Low Fat    598    596
##          Regular    332    334
prop.table(my_table_2)
##                 Outlet_Identifier
## Item_Fat_Content     OUT010     OUT013     OUT017     OUT018     OUT019
##          Low Fat 0.04212132 0.07110172 0.06969377 0.07016309 0.04059603
##          Regular 0.02299660 0.03824944 0.03895342 0.03871876 0.02135398
##                 Outlet_Identifier
## Item_Fat_Content     OUT027     OUT035     OUT045     OUT046     OUT049
##          Low Fat 0.07098440 0.07110172 0.07145371 0.07016309 0.06992843
##          Regular 0.03871876 0.03801478 0.03754547 0.03895342 0.03918808
#4.3 Item Fat Content vs Outlet Establishment Year
my_table_3 <- xtabs(~ Item_Fat_Content+Outlet_Establishment_Year, data=BigMartsFinalData)
my_table_3
##                 Outlet_Establishment_Year
## Item_Fat_Content 1985 1987 1997 1998 1999 2002 2004 2007 2009
##          Low Fat  951  606  598  359  596  609  606  594  598
##          Regular  512  326  332  196  334  320  324  332  330
prop.table(my_table_3)
##                 Outlet_Establishment_Year
## Item_Fat_Content       1985       1987       1997       1998       1999
##          Low Fat 0.11158043 0.07110172 0.07016309 0.04212132 0.06992843
##          Regular 0.06007274 0.03824944 0.03895342 0.02299660 0.03918808
##                 Outlet_Establishment_Year
## Item_Fat_Content       2002       2004       2007       2009
##          Low Fat 0.07145371 0.07110172 0.06969377 0.07016309
##          Regular 0.03754547 0.03801478 0.03895342 0.03871876
#4.4 Item Fat Content vs Outlet Size
my_table_4 <- xtabs(~ Item_Fat_Content+Outlet_Size, data=BigMartsFinalData)
my_table_4
##                 Outlet_Size
## Item_Fat_Content High Medium Small
##          Low Fat  606   3361  1550
##          Regular  326   1842   838
prop.table(my_table_4)
##                 Outlet_Size
## Item_Fat_Content       High     Medium      Small
##          Low Fat 0.07110172 0.39434471 0.18186085
##          Regular 0.03824944 0.21612108 0.09832219
#4.5 Item Fat Content vs Outlet_Location_Type
my_table_5 <- xtabs(~ Item_Fat_Content+Outlet_Location_Type, data=BigMartsFinalData)
my_table_5
##                 Outlet_Location_Type
## Item_Fat_Content Tier 1 Tier 2 Tier 3
##          Low Fat   1540   1809   2168
##          Regular    848    976   1182
prop.table(my_table_5)
##                 Outlet_Location_Type
## Item_Fat_Content     Tier 1     Tier 2     Tier 3
##          Low Fat 0.18068755 0.21224921 0.25437053
##          Regular 0.09949548 0.11451367 0.13868356
#4.6 Item Fat Content vs Outlet_Type
my_table_6 <- xtabs(~ Item_Fat_Content+Outlet_Type, data=BigMartsFinalData)
my_table_6
##                 Outlet_Type
## Item_Fat_Content Grocery Store Supermarket Type1 Supermarket Type2
##          Low Fat           705              3609               598
##          Regular           378              1968               330
##                 Outlet_Type
## Item_Fat_Content Supermarket Type3
##          Low Fat               605
##          Regular               330
prop.table(my_table_6)
##                 Outlet_Type
## Item_Fat_Content Grocery Store Supermarket Type1 Supermarket Type2
##          Low Fat    0.08271735        0.42344245        0.07016309
##          Regular    0.04435058        0.23090461        0.03871876
##                 Outlet_Type
## Item_Fat_Content Supermarket Type3
##          Low Fat        0.07098440
##          Regular        0.03871876
#4.7 Item Type vs Outlet Identifier
my_table_7 <- xtabs(~ Item_Type+Outlet_Identifier, data=BigMartsFinalData)
my_table_7
##                        Outlet_Identifier
## Item_Type               OUT010 OUT013 OUT017 OUT018 OUT019 OUT027 OUT035
##   Baking Goods              42     73     73     68     43     69     68
##   Breads                    17     25     22     27     16     31     29
##   Breakfast                  9     13     12     12     10     11     10
##   Canned                    35     65     69     78     38     72     79
##   Dairy                     43     80     74     73     49     67     71
##   Frozen Foods              54     92    106     92     49     89     92
##   Fruits and Vegetables     79    142    127    135     73    140    129
##   Hard Drinks               16     23     22     22      8     23     22
##   Health and Hygiene        37     61     61     58     30     60     50
##   Household                 67    103     95     95     52     99    102
##   Meat                      34     41     44     46     32     56     43
##   Others                    10     16     16     20     17     15     16
##   Seafood                    4      5      5      7      6      7      8
##   Snack Foods               71    125    128    132     75    137    140
##   Soft Drinks               28     49     54     46     26     45     52
##   Starchy Foods              9     19     18     17      4     14     19
##                        Outlet_Identifier
## Item_Type               OUT045 OUT046 OUT049
##   Baking Goods              70     76     66
##   Breads                    33     26     25
##   Breakfast                 10     10     13
##   Canned                    74     72     67
##   Dairy                     69     78     78
##   Frozen Foods              81    108     93
##   Fruits and Vegetables    143    126    138
##   Hard Drinks               28     20     30
##   Health and Hygiene        55     56     52
##   Household                 99    103     95
##   Meat                      38     44     47
##   Others                    20     22     17
##   Seafood                    9      6      7
##   Snack Foods              133    120    139
##   Soft Drinks               51     48     46
##   Starchy Foods             16     15     17
prop.table(my_table_7)
##                        Outlet_Identifier
## Item_Type                     OUT010       OUT013       OUT017
##   Baking Goods          0.0049278423 0.0085650593 0.0085650593
##   Breads                0.0019946028 0.0029332395 0.0025812507
##   Breakfast             0.0010559662 0.0015252845 0.0014079549
##   Canned                0.0041065353 0.0076264226 0.0080957409
##   Dairy                 0.0050451719 0.0093863663 0.0086823888
##   Frozen Foods          0.0063357973 0.0107943212 0.0124369354
##   Fruits and Vegetables 0.0092690367 0.0166608002 0.0149008565
##   Hard Drinks           0.0018772733 0.0026985803 0.0025812507
##   Health and Hygiene    0.0043411944 0.0071571043 0.0071571043
##   Household             0.0078610818 0.0120849466 0.0111463100
##   Meat                  0.0039892057 0.0048105127 0.0051625015
##   Others                0.0011732958 0.0018772733 0.0018772733
##   Seafood               0.0004693183 0.0005866479 0.0005866479
##   Snack Foods           0.0083304001 0.0146661973 0.0150181861
##   Soft Drinks           0.0032852282 0.0057491494 0.0063357973
##   Starchy Foods         0.0010559662 0.0022292620 0.0021119324
##                        Outlet_Identifier
## Item_Type                     OUT018       OUT019       OUT027
##   Baking Goods          0.0079784114 0.0050451719 0.0080957409
##   Breads                0.0031678986 0.0018772733 0.0036372169
##   Breakfast             0.0014079549 0.0011732958 0.0012906254
##   Canned                0.0091517071 0.0044585240 0.0084477297
##   Dairy                 0.0085650593 0.0057491494 0.0078610818
##   Frozen Foods          0.0107943212 0.0057491494 0.0104423325
##   Fruits and Vegetables 0.0158394931 0.0085650593 0.0164261410
##   Hard Drinks           0.0025812507 0.0009386366 0.0026985803
##   Health and Hygiene    0.0068051156 0.0035198874 0.0070397747
##   Household             0.0111463100 0.0061011381 0.0116156283
##   Meat                  0.0053971606 0.0037545465 0.0065704564
##   Others                0.0023465916 0.0019946028 0.0017599437
##   Seafood               0.0008213071 0.0007039775 0.0008213071
##   Snack Foods           0.0154875044 0.0087997184 0.0160741523
##   Soft Drinks           0.0053971606 0.0030505690 0.0052798310
##   Starchy Foods         0.0019946028 0.0004693183 0.0016426141
##                        Outlet_Identifier
## Item_Type                     OUT035       OUT045       OUT046
##   Baking Goods          0.0079784114 0.0082130705 0.0089170480
##   Breads                0.0034025578 0.0038718761 0.0030505690
##   Breakfast             0.0011732958 0.0011732958 0.0011732958
##   Canned                0.0092690367 0.0086823888 0.0084477297
##   Dairy                 0.0083304001 0.0080957409 0.0091517071
##   Frozen Foods          0.0107943212 0.0095036959 0.0126715945
##   Fruits and Vegetables 0.0151355157 0.0167781298 0.0147835269
##   Hard Drinks           0.0025812507 0.0032852282 0.0023465916
##   Health and Hygiene    0.0058664789 0.0064531268 0.0065704564
##   Household             0.0119676170 0.0116156283 0.0120849466
##   Meat                  0.0050451719 0.0044585240 0.0051625015
##   Others                0.0018772733 0.0023465916 0.0025812507
##   Seafood               0.0009386366 0.0010559662 0.0007039775
##   Snack Foods           0.0164261410 0.0156048340 0.0140795495
##   Soft Drinks           0.0061011381 0.0059838085 0.0056318198
##   Starchy Foods         0.0022292620 0.0018772733 0.0017599437
##                        Outlet_Identifier
## Item_Type                     OUT049
##   Baking Goods          0.0077437522
##   Breads                0.0029332395
##   Breakfast             0.0015252845
##   Canned                0.0078610818
##   Dairy                 0.0091517071
##   Frozen Foods          0.0109116508
##   Fruits and Vegetables 0.0161914819
##   Hard Drinks           0.0035198874
##   Health and Hygiene    0.0061011381
##   Household             0.0111463100
##   Meat                  0.0055144902
##   Others                0.0019946028
##   Seafood               0.0008213071
##   Snack Foods           0.0163088115
##   Soft Drinks           0.0053971606
##   Starchy Foods         0.0019946028
#4.8 Item Type vs Outlet Establishment Year
my_table_8 <- xtabs(~ Item_Type+Outlet_Establishment_Year, data=BigMartsFinalData)
my_table_8
##                        Outlet_Establishment_Year
## Item_Type               1985 1987 1997 1998 1999 2002 2004 2007 2009
##   Baking Goods           112   73   76   42   66   70   68   73   68
##   Breads                  47   25   26   17   25   33   29   22   27
##   Breakfast               21   13   10    9   13   10   10   12   12
##   Canned                 110   65   72   35   67   74   79   69   78
##   Dairy                  116   80   78   43   78   69   71   74   73
##   Frozen Foods           138   92  108   54   93   81   92  106   92
##   Fruits and Vegetables  213  142  126   79  138  143  129  127  135
##   Hard Drinks             31   23   20   16   30   28   22   22   22
##   Health and Hygiene      90   61   56   37   52   55   50   61   58
##   Household              151  103  103   67   95   99  102   95   95
##   Meat                    88   41   44   34   47   38   43   44   46
##   Others                  32   16   22   10   17   20   16   16   20
##   Seafood                 13    5    6    4    7    9    8    5    7
##   Snack Foods            212  125  120   71  139  133  140  128  132
##   Soft Drinks             71   49   48   28   46   51   52   54   46
##   Starchy Foods           18   19   15    9   17   16   19   18   17
prop.table(my_table_8)
##                        Outlet_Establishment_Year
## Item_Type                       1985         1987         1997
##   Baking Goods          0.0131409128 0.0085650593 0.0089170480
##   Breads                0.0055144902 0.0029332395 0.0030505690
##   Breakfast             0.0024639212 0.0015252845 0.0011732958
##   Canned                0.0129062537 0.0076264226 0.0084477297
##   Dairy                 0.0136102311 0.0093863663 0.0091517071
##   Frozen Foods          0.0161914819 0.0107943212 0.0126715945
##   Fruits and Vegetables 0.0249912003 0.0166608002 0.0147835269
##   Hard Drinks           0.0036372169 0.0026985803 0.0023465916
##   Health and Hygiene    0.0105596621 0.0071571043 0.0065704564
##   Household             0.0177167664 0.0120849466 0.0120849466
##   Meat                  0.0103250029 0.0048105127 0.0051625015
##   Others                0.0037545465 0.0018772733 0.0025812507
##   Seafood               0.0015252845 0.0005866479 0.0007039775
##   Snack Foods           0.0248738707 0.0146661973 0.0140795495
##   Soft Drinks           0.0083304001 0.0057491494 0.0056318198
##   Starchy Foods         0.0021119324 0.0022292620 0.0017599437
##                        Outlet_Establishment_Year
## Item_Type                       1998         1999         2002
##   Baking Goods          0.0049278423 0.0077437522 0.0082130705
##   Breads                0.0019946028 0.0029332395 0.0038718761
##   Breakfast             0.0010559662 0.0015252845 0.0011732958
##   Canned                0.0041065353 0.0078610818 0.0086823888
##   Dairy                 0.0050451719 0.0091517071 0.0080957409
##   Frozen Foods          0.0063357973 0.0109116508 0.0095036959
##   Fruits and Vegetables 0.0092690367 0.0161914819 0.0167781298
##   Hard Drinks           0.0018772733 0.0035198874 0.0032852282
##   Health and Hygiene    0.0043411944 0.0061011381 0.0064531268
##   Household             0.0078610818 0.0111463100 0.0116156283
##   Meat                  0.0039892057 0.0055144902 0.0044585240
##   Others                0.0011732958 0.0019946028 0.0023465916
##   Seafood               0.0004693183 0.0008213071 0.0010559662
##   Snack Foods           0.0083304001 0.0163088115 0.0156048340
##   Soft Drinks           0.0032852282 0.0053971606 0.0059838085
##   Starchy Foods         0.0010559662 0.0019946028 0.0018772733
##                        Outlet_Establishment_Year
## Item_Type                       2004         2007         2009
##   Baking Goods          0.0079784114 0.0085650593 0.0079784114
##   Breads                0.0034025578 0.0025812507 0.0031678986
##   Breakfast             0.0011732958 0.0014079549 0.0014079549
##   Canned                0.0092690367 0.0080957409 0.0091517071
##   Dairy                 0.0083304001 0.0086823888 0.0085650593
##   Frozen Foods          0.0107943212 0.0124369354 0.0107943212
##   Fruits and Vegetables 0.0151355157 0.0149008565 0.0158394931
##   Hard Drinks           0.0025812507 0.0025812507 0.0025812507
##   Health and Hygiene    0.0058664789 0.0071571043 0.0068051156
##   Household             0.0119676170 0.0111463100 0.0111463100
##   Meat                  0.0050451719 0.0051625015 0.0053971606
##   Others                0.0018772733 0.0018772733 0.0023465916
##   Seafood               0.0009386366 0.0005866479 0.0008213071
##   Snack Foods           0.0164261410 0.0150181861 0.0154875044
##   Soft Drinks           0.0061011381 0.0063357973 0.0053971606
##   Starchy Foods         0.0022292620 0.0021119324 0.0019946028
#4.9 Item Type vs Outlet Size
my_table_9 <- xtabs(~ Item_Type+Outlet_Size, data=BigMartsFinalData)
my_table_9
##                        Outlet_Size
## Item_Type               High Medium Small
##   Baking Goods            73    388   187
##   Breads                  25    155    71
##   Breakfast               13     67    30
##   Canned                  65    395   189
##   Dairy                   80    404   198
##   Frozen Foods            92    515   249
##   Fruits and Vegetables  142    762   328
##   Hard Drinks             23    141    50
##   Health and Hygiene      61    323   136
##   Household              103    550   257
##   Meat                    41    265   119
##   Others                  16     98    55
##   Seafood                  5     39    20
##   Snack Foods            125    740   335
##   Soft Drinks             49    270   126
##   Starchy Foods           19     91    38
prop.table(my_table_9)
##                        Outlet_Size
## Item_Type                       High       Medium        Small
##   Baking Goods          0.0085650593 0.0455238766 0.0219406312
##   Breads                0.0029332395 0.0181860847 0.0083304001
##   Breakfast             0.0015252845 0.0078610818 0.0035198874
##   Canned                0.0076264226 0.0463451836 0.0221752904
##   Dairy                 0.0093863663 0.0474011498 0.0232312566
##   Frozen Foods          0.0107943212 0.0604247331 0.0292150651
##   Fruits and Vegetables 0.0166608002 0.0894051390 0.0384841018
##   Hard Drinks           0.0026985803 0.0165434706 0.0058664789
##   Health and Hygiene    0.0071571043 0.0378974539 0.0159568227
##   Household             0.0120849466 0.0645312683 0.0301537017
##   Meat                  0.0048105127 0.0310923384 0.0139622199
##   Others                0.0018772733 0.0114982987 0.0064531268
##   Seafood               0.0005866479 0.0045758536 0.0023465916
##   Snack Foods           0.0146661973 0.0868238883 0.0393054089
##   Soft Drinks           0.0057491494 0.0316789863 0.0147835269
##   Starchy Foods         0.0022292620 0.0106769917 0.0044585240
#4.10 Item Type vs Outlet_Location_Type
my_table_10 <- xtabs(~ Item_Type+Outlet_Location_Type, data=BigMartsFinalData)
my_table_10
##                        Outlet_Location_Type
## Item_Type               Tier 1 Tier 2 Tier 3
##   Baking Goods             185    211    252
##   Breads                    67     84    100
##   Breakfast                 33     32     45
##   Canned                   177    222    250
##   Dairy                    205    214    263
##   Frozen Foods             250    279    327
##   Fruits and Vegetables    337    399    496
##   Hard Drinks               58     72     84
##   Health and Hygiene       138    166    216
##   Household                250    296    364
##   Meat                     123    125    177
##   Others                    56     52     61
##   Seafood                   19     22     23
##   Snack Foods              334    401    465
##   Soft Drinks              120    157    168
##   Starchy Foods             36     53     59
prop.table(my_table_10)
##                        Outlet_Location_Type
## Item_Type                    Tier 1      Tier 2      Tier 3
##   Baking Goods          0.021705972 0.024756541 0.029567054
##   Breads                0.007861082 0.009855685 0.011732958
##   Breakfast             0.003871876 0.003754547 0.005279831
##   Canned                0.020767335 0.026047166 0.029332395
##   Dairy                 0.024052564 0.025108530 0.030857679
##   Frozen Foods          0.029332395 0.032734952 0.038366772
##   Fruits and Vegetables 0.039540068 0.046814502 0.058195471
##   Hard Drinks           0.006805116 0.008447730 0.009855685
##   Health and Hygiene    0.016191482 0.019476710 0.025343189
##   Household             0.029332395 0.034729555 0.042707967
##   Meat                  0.014431538 0.014666197 0.020767335
##   Others                0.006570456 0.006101138 0.007157104
##   Seafood               0.002229262 0.002581251 0.002698580
##   Snack Foods           0.039188079 0.047049161 0.054558254
##   Soft Drinks           0.014079549 0.018420744 0.019711369
##   Starchy Foods         0.004223865 0.006218468 0.006922445
#4.11 Item Type vs Outlet_Type
my_table_11 <- xtabs(~ Item_Type+Outlet_Type, data=BigMartsFinalData)
my_table_11
##                        Outlet_Type
## Item_Type               Grocery Store Supermarket Type1 Supermarket Type2
##   Baking Goods                     85               426                68
##   Breads                           33               160                27
##   Breakfast                        19                68                12
##   Canned                           73               426                78
##   Dairy                            92               450                73
##   Frozen Foods                    103               572                92
##   Fruits and Vegetables           152               805               135
##   Hard Drinks                      24               145                22
##   Health and Hygiene               67               335                58
##   Household                       119               597                95
##   Meat                             66               257                46
##   Others                           27               107                20
##   Seafood                          10                40                 7
##   Snack Foods                     146               785               132
##   Soft Drinks                      54               300                46
##   Starchy Foods                    13               104                17
##                        Outlet_Type
## Item_Type               Supermarket Type3
##   Baking Goods                         69
##   Breads                               31
##   Breakfast                            11
##   Canned                               72
##   Dairy                                67
##   Frozen Foods                         89
##   Fruits and Vegetables               140
##   Hard Drinks                          23
##   Health and Hygiene                   60
##   Household                            99
##   Meat                                 56
##   Others                               15
##   Seafood                               7
##   Snack Foods                         137
##   Soft Drinks                          45
##   Starchy Foods                        14
prop.table(my_table_11)
##                        Outlet_Type
## Item_Type               Grocery Store Supermarket Type1 Supermarket Type2
##   Baking Goods           0.0099730142      0.0499824006      0.0079784114
##   Breads                 0.0038718761      0.0187727326      0.0031678986
##   Breakfast              0.0022292620      0.0079784114      0.0014079549
##   Canned                 0.0085650593      0.0499824006      0.0091517071
##   Dairy                  0.0107943212      0.0527983105      0.0085650593
##   Frozen Foods           0.0120849466      0.0671125191      0.0107943212
##   Fruits and Vegetables  0.0178340960      0.0944503109      0.0158394931
##   Hard Drinks            0.0028159099      0.0170127889      0.0025812507
##   Health and Hygiene     0.0078610818      0.0393054089      0.0068051156
##   Household              0.0139622199      0.0700457585      0.0111463100
##   Meat                   0.0077437522      0.0301537017      0.0053971606
##   Others                 0.0031678986      0.0125542649      0.0023465916
##   Seafood                0.0011732958      0.0046931832      0.0008213071
##   Snack Foods            0.0171301185      0.0921037193      0.0154875044
##   Soft Drinks            0.0063357973      0.0351988736      0.0053971606
##   Starchy Foods          0.0015252845      0.0122022762      0.0019946028
##                        Outlet_Type
## Item_Type               Supermarket Type3
##   Baking Goods               0.0080957409
##   Breads                     0.0036372169
##   Breakfast                  0.0012906254
##   Canned                     0.0084477297
##   Dairy                      0.0078610818
##   Frozen Foods               0.0104423325
##   Fruits and Vegetables      0.0164261410
##   Hard Drinks                0.0026985803
##   Health and Hygiene         0.0070397747
##   Household                  0.0116156283
##   Meat                       0.0065704564
##   Others                     0.0017599437
##   Seafood                    0.0008213071
##   Snack Foods                0.0160741523
##   Soft Drinks                0.0052798310
##   Starchy Foods              0.0016426141
#4.12 Outlet Identifier vs Outlet Establishment Year
my_table_12 <- xtabs(~ Outlet_Identifier+Outlet_Establishment_Year, data=BigMartsFinalData)
my_table_12
##                  Outlet_Establishment_Year
## Outlet_Identifier 1985 1987 1997 1998 1999 2002 2004 2007 2009
##            OUT010    0    0    0  555    0    0    0    0    0
##            OUT013    0  932    0    0    0    0    0    0    0
##            OUT017    0    0    0    0    0    0    0  926    0
##            OUT018    0    0    0    0    0    0    0    0  928
##            OUT019  528    0    0    0    0    0    0    0    0
##            OUT027  935    0    0    0    0    0    0    0    0
##            OUT035    0    0    0    0    0    0  930    0    0
##            OUT045    0    0    0    0    0  929    0    0    0
##            OUT046    0    0  930    0    0    0    0    0    0
##            OUT049    0    0    0    0  930    0    0    0    0
prop.table(my_table_12)
##                  Outlet_Establishment_Year
## Outlet_Identifier       1985       1987       1997       1998       1999
##            OUT010 0.00000000 0.00000000 0.00000000 0.06511792 0.00000000
##            OUT013 0.00000000 0.10935117 0.00000000 0.00000000 0.00000000
##            OUT017 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT018 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT019 0.06195002 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT027 0.10970316 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT035 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT045 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT046 0.00000000 0.00000000 0.10911651 0.00000000 0.00000000
##            OUT049 0.00000000 0.00000000 0.00000000 0.00000000 0.10911651
##                  Outlet_Establishment_Year
## Outlet_Identifier       2002       2004       2007       2009
##            OUT010 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT013 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT017 0.00000000 0.00000000 0.10864719 0.00000000
##            OUT018 0.00000000 0.00000000 0.00000000 0.10888185
##            OUT019 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT027 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT035 0.00000000 0.10911651 0.00000000 0.00000000
##            OUT045 0.10899918 0.00000000 0.00000000 0.00000000
##            OUT046 0.00000000 0.00000000 0.00000000 0.00000000
##            OUT049 0.00000000 0.00000000 0.00000000 0.00000000
#4.13 Outlet Identifier vs Outlet Size
my_table_13 <- xtabs(~ Outlet_Identifier+Outlet_Size, data=BigMartsFinalData)
my_table_13
##                  Outlet_Size
## Outlet_Identifier High Medium Small
##            OUT010    0    555     0
##            OUT013  932      0     0
##            OUT017    0    926     0
##            OUT018    0    928     0
##            OUT019    0      0   528
##            OUT027    0    935     0
##            OUT035    0      0   930
##            OUT045    0    929     0
##            OUT046    0      0   930
##            OUT049    0    930     0
prop.table(my_table_13)
##                  Outlet_Size
## Outlet_Identifier       High     Medium      Small
##            OUT010 0.00000000 0.06511792 0.00000000
##            OUT013 0.10935117 0.00000000 0.00000000
##            OUT017 0.00000000 0.10864719 0.00000000
##            OUT018 0.00000000 0.10888185 0.00000000
##            OUT019 0.00000000 0.00000000 0.06195002
##            OUT027 0.00000000 0.10970316 0.00000000
##            OUT035 0.00000000 0.00000000 0.10911651
##            OUT045 0.00000000 0.10899918 0.00000000
##            OUT046 0.00000000 0.00000000 0.10911651
##            OUT049 0.00000000 0.10911651 0.00000000
#4.14 Outlet Identifier vs Outlet_Location_Type
my_table_14 <- xtabs(~ Outlet_Identifier+Outlet_Location_Type, data=BigMartsFinalData)
my_table_14
##                  Outlet_Location_Type
## Outlet_Identifier Tier 1 Tier 2 Tier 3
##            OUT010      0      0    555
##            OUT013      0      0    932
##            OUT017      0    926      0
##            OUT018      0      0    928
##            OUT019    528      0      0
##            OUT027      0      0    935
##            OUT035      0    930      0
##            OUT045      0    929      0
##            OUT046    930      0      0
##            OUT049    930      0      0
prop.table(my_table_14)
##                  Outlet_Location_Type
## Outlet_Identifier     Tier 1     Tier 2     Tier 3
##            OUT010 0.00000000 0.00000000 0.06511792
##            OUT013 0.00000000 0.00000000 0.10935117
##            OUT017 0.00000000 0.10864719 0.00000000
##            OUT018 0.00000000 0.00000000 0.10888185
##            OUT019 0.06195002 0.00000000 0.00000000
##            OUT027 0.00000000 0.00000000 0.10970316
##            OUT035 0.00000000 0.10911651 0.00000000
##            OUT045 0.00000000 0.10899918 0.00000000
##            OUT046 0.10911651 0.00000000 0.00000000
##            OUT049 0.10911651 0.00000000 0.00000000
#4.15 Outlet Identifier vs Outlet_Type
my_table_15 <- xtabs(~ Outlet_Identifier+Outlet_Type, data=BigMartsFinalData)
my_table_15
##                  Outlet_Type
## Outlet_Identifier Grocery Store Supermarket Type1 Supermarket Type2
##            OUT010           555                 0                 0
##            OUT013             0               932                 0
##            OUT017             0               926                 0
##            OUT018             0                 0               928
##            OUT019           528                 0                 0
##            OUT027             0                 0                 0
##            OUT035             0               930                 0
##            OUT045             0               929                 0
##            OUT046             0               930                 0
##            OUT049             0               930                 0
##                  Outlet_Type
## Outlet_Identifier Supermarket Type3
##            OUT010                 0
##            OUT013                 0
##            OUT017                 0
##            OUT018                 0
##            OUT019                 0
##            OUT027               935
##            OUT035                 0
##            OUT045                 0
##            OUT046                 0
##            OUT049                 0
prop.table(my_table_15)
##                  Outlet_Type
## Outlet_Identifier Grocery Store Supermarket Type1 Supermarket Type2
##            OUT010    0.06511792        0.00000000        0.00000000
##            OUT013    0.00000000        0.10935117        0.00000000
##            OUT017    0.00000000        0.10864719        0.00000000
##            OUT018    0.00000000        0.00000000        0.10888185
##            OUT019    0.06195002        0.00000000        0.00000000
##            OUT027    0.00000000        0.00000000        0.00000000
##            OUT035    0.00000000        0.10911651        0.00000000
##            OUT045    0.00000000        0.10899918        0.00000000
##            OUT046    0.00000000        0.10911651        0.00000000
##            OUT049    0.00000000        0.10911651        0.00000000
##                  Outlet_Type
## Outlet_Identifier Supermarket Type3
##            OUT010        0.00000000
##            OUT013        0.00000000
##            OUT017        0.00000000
##            OUT018        0.00000000
##            OUT019        0.00000000
##            OUT027        0.10970316
##            OUT035        0.00000000
##            OUT045        0.00000000
##            OUT046        0.00000000
##            OUT049        0.00000000
#4.16 Outlet Establishment Year vs Outlet Size
my_table_16 <- xtabs(~ Outlet_Establishment_Year+Outlet_Size, data=BigMartsFinalData)
my_table_16
##                          Outlet_Size
## Outlet_Establishment_Year High Medium Small
##                      1985    0    935   528
##                      1987  932      0     0
##                      1997    0      0   930
##                      1998    0    555     0
##                      1999    0    930     0
##                      2002    0    929     0
##                      2004    0      0   930
##                      2007    0    926     0
##                      2009    0    928     0
prop.table(my_table_16)
##                          Outlet_Size
## Outlet_Establishment_Year       High     Medium      Small
##                      1985 0.00000000 0.10970316 0.06195002
##                      1987 0.10935117 0.00000000 0.00000000
##                      1997 0.00000000 0.00000000 0.10911651
##                      1998 0.00000000 0.06511792 0.00000000
##                      1999 0.00000000 0.10911651 0.00000000
##                      2002 0.00000000 0.10899918 0.00000000
##                      2004 0.00000000 0.00000000 0.10911651
##                      2007 0.00000000 0.10864719 0.00000000
##                      2009 0.00000000 0.10888185 0.00000000
#4.17 Outlet Establishment Year vs Outlet_Location_Type
my_table_17 <- xtabs(~ Outlet_Establishment_Year+Outlet_Location_Type, data=BigMartsFinalData)
my_table_17
##                          Outlet_Location_Type
## Outlet_Establishment_Year Tier 1 Tier 2 Tier 3
##                      1985    528      0    935
##                      1987      0      0    932
##                      1997    930      0      0
##                      1998      0      0    555
##                      1999    930      0      0
##                      2002      0    929      0
##                      2004      0    930      0
##                      2007      0    926      0
##                      2009      0      0    928
prop.table(my_table_17)
##                          Outlet_Location_Type
## Outlet_Establishment_Year     Tier 1     Tier 2     Tier 3
##                      1985 0.06195002 0.00000000 0.10970316
##                      1987 0.00000000 0.00000000 0.10935117
##                      1997 0.10911651 0.00000000 0.00000000
##                      1998 0.00000000 0.00000000 0.06511792
##                      1999 0.10911651 0.00000000 0.00000000
##                      2002 0.00000000 0.10899918 0.00000000
##                      2004 0.00000000 0.10911651 0.00000000
##                      2007 0.00000000 0.10864719 0.00000000
##                      2009 0.00000000 0.00000000 0.10888185
#4.18 Outlet Establishment Year vs Outlet_Type
my_table_18 <- xtabs(~ Outlet_Establishment_Year+Outlet_Type, data=BigMartsFinalData)
my_table_18
##                          Outlet_Type
## Outlet_Establishment_Year Grocery Store Supermarket Type1
##                      1985           528                 0
##                      1987             0               932
##                      1997             0               930
##                      1998           555                 0
##                      1999             0               930
##                      2002             0               929
##                      2004             0               930
##                      2007             0               926
##                      2009             0                 0
##                          Outlet_Type
## Outlet_Establishment_Year Supermarket Type2 Supermarket Type3
##                      1985                 0               935
##                      1987                 0                 0
##                      1997                 0                 0
##                      1998                 0                 0
##                      1999                 0                 0
##                      2002                 0                 0
##                      2004                 0                 0
##                      2007                 0                 0
##                      2009               928                 0
prop.table(my_table_18)
##                          Outlet_Type
## Outlet_Establishment_Year Grocery Store Supermarket Type1
##                      1985    0.06195002        0.00000000
##                      1987    0.00000000        0.10935117
##                      1997    0.00000000        0.10911651
##                      1998    0.06511792        0.00000000
##                      1999    0.00000000        0.10911651
##                      2002    0.00000000        0.10899918
##                      2004    0.00000000        0.10911651
##                      2007    0.00000000        0.10864719
##                      2009    0.00000000        0.00000000
##                          Outlet_Type
## Outlet_Establishment_Year Supermarket Type2 Supermarket Type3
##                      1985        0.00000000        0.10970316
##                      1987        0.00000000        0.00000000
##                      1997        0.00000000        0.00000000
##                      1998        0.00000000        0.00000000
##                      1999        0.00000000        0.00000000
##                      2002        0.00000000        0.00000000
##                      2004        0.00000000        0.00000000
##                      2007        0.00000000        0.00000000
##                      2009        0.10888185        0.00000000
#4.19 Outlet Size vs Outlet_Location_Type
my_table_19 <- xtabs(~ Outlet_Size+Outlet_Location_Type, data=BigMartsFinalData)
my_table_19
##            Outlet_Location_Type
## Outlet_Size Tier 1 Tier 2 Tier 3
##      High        0      0    932
##      Medium    930   1855   2418
##      Small    1458    930      0
prop.table(my_table_19)
##            Outlet_Location_Type
## Outlet_Size    Tier 1    Tier 2    Tier 3
##      High   0.0000000 0.0000000 0.1093512
##      Medium 0.1091165 0.2176464 0.2837029
##      Small  0.1710665 0.1091165 0.0000000
#4.20 Outlet Size vs Outlet_Type
my_table_20 <- xtabs(~ Outlet_Size+Outlet_Type, data=BigMartsFinalData)
my_table_20
##            Outlet_Type
## Outlet_Size Grocery Store Supermarket Type1 Supermarket Type2
##      High               0               932                 0
##      Medium           555              2785               928
##      Small            528              1860                 0
##            Outlet_Type
## Outlet_Size Supermarket Type3
##      High                   0
##      Medium               935
##      Small                  0
prop.table(my_table_20)
##            Outlet_Type
## Outlet_Size Grocery Store Supermarket Type1 Supermarket Type2
##      High      0.00000000        0.10935117        0.00000000
##      Medium    0.06511792        0.32676288        0.10888185
##      Small     0.06195002        0.21823302        0.00000000
##            Outlet_Type
## Outlet_Size Supermarket Type3
##      High          0.00000000
##      Medium        0.10970316
##      Small         0.00000000
#4.21 Outlet_Location_Type vs Outlet_Type
my_table_21 <- xtabs(~ Outlet_Location_Type+Outlet_Type, data=BigMartsFinalData)
my_table_21
##                     Outlet_Type
## Outlet_Location_Type Grocery Store Supermarket Type1 Supermarket Type2
##               Tier 1           528              1860                 0
##               Tier 2             0              2785                 0
##               Tier 3           555               932               928
##                     Outlet_Type
## Outlet_Location_Type Supermarket Type3
##               Tier 1                 0
##               Tier 2                 0
##               Tier 3               935
prop.table(my_table_21)
##                     Outlet_Type
## Outlet_Location_Type Grocery Store Supermarket Type1 Supermarket Type2
##               Tier 1    0.06195002        0.21823302        0.00000000
##               Tier 2    0.00000000        0.32676288        0.00000000
##               Tier 3    0.06511792        0.10935117        0.10888185
##                     Outlet_Type
## Outlet_Location_Type Supermarket Type3
##               Tier 1        0.00000000
##               Tier 2        0.00000000
##               Tier 3        0.10970316
#task 5:Draw a  boxplot of the variables that belong to your study.
library(lattice)
boxplot(BigMartsFinalData$Item_Weight,data=BigMartsFinalData, main="Distribution of Weights of items", xlab="Items Weight", ylab="Weight in Kg",  varwidth=TRUE)

boxplot(BigMartsFinalData$Item_Visibility,data=BigMartsFinalData, main="Distribution of visibility of items", xlab="Items Visibility", ylab="Range of Visibility",  varwidth=TRUE)

boxplot(BigMartsFinalData$Item_MRP,data=BigMartsFinalData, main="Distribution of Retail Price of items", xlab="Items Retail Price", ylab="Range of Retail Price",  varwidth=TRUE)

#task 6:Draw Histograms for your suitable data fields.
library(lattice)
histogram(~as.factor(BigMartsFinalData$Item_Identifier),xlab="Item Identifier")

histogram(~as.factor(BigMartsFinalData$Item_Fat_Content),xlab="Fat contents")

histogram(~as.factor(BigMartsFinalData$Item_Type),xlab="Item Type", varwidth=TRUE)

histogram(~as.factor(BigMartsFinalData$Outlet_Identifier),xlab="Outlet Identifier", varwidth=TRUE)

histogram(~as.factor(BigMartsFinalData$Outlet_Establishment_Year),xlab="Outlet establishment year", varwidth=TRUE)

histogram(~as.factor(BigMartsFinalData$Outlet_Size),xlab="Outlet Size", varwidth=TRUE)

histogram(~as.factor(BigMartsFinalData$Outlet_Location_Type),xlab="Outlet Location Type", varwidth=TRUE)

histogram(~as.factor(BigMartsFinalData$Outlet_Type),xlab="Outlet Type", varwidth=TRUE)

#task 7: Draw suitable plot for your data fields.
library(car)
#7.1 Scatterplot between Item_Weight and Its Outlet_Type
scatterplot(BigMartsFinalData$Item_Weight~BigMartsFinalData$Outlet_Type,spread=FALSE, smoother.args=list(lty=2), pch=19,main="Scatterplot of Items Weight vs.Outlet Type ",xlab="Outlet Type",ylab="Items Weight")

#7.2 Boxplot of Items Weights vs Items Type
boxplot(BigMartsFinalData$Item_Weight ~BigMartsFinalData$Item_Type , xlab = "Item Type", ylab ="Items Weight", main = "Items Weight vs Items Type")

#7.3 Scatterplot between Items Visibility and Its Retail Price
scatterplot(BigMartsFinalData$Item_Visibility~BigMartsFinalData$Item_MRP,spread=FALSE, smoother.args=list(lty=2), pch=19,main="Scatterplot of Items Visibility vs. Item MRP ",xlab="MRP",ylab="Visibility")

#7.4 Boxplot between Items Outlet Sales and Its Outlets location Type
boxplot(BigMartsFinalData$Item_Outlet_Sales ~BigMartsFinalData$Outlet_Location_Type , xlab = "Item Outlet Sales", ylab ="Outlet Location Type", main = "Sales vs location type")

#7.5 Boxplot between Items Outlet Sales and Its Outlet_Identifier
boxplot(BigMartsFinalData$Item_Outlet_Sales ~BigMartsFinalData$Outlet_Identifier , xlab = "Outlet Identifier", ylab ="Outlet Sales", main = "Items Outlet Sales vs Outlet Identifier")

#task 8:Create a correlation matrix.
BigMartsFinalData$Item_Identifier <- as.numeric(BigMartsFinalData$Item_Identifier)
BigMartsFinalData$Item_Fat_Content <- as.numeric(BigMartsFinalData$Item_Fat_Content)
BigMartsFinalData$Item_Type <- as.numeric(BigMartsFinalData$Item_Type)
BigMartsFinalData$Outlet_Identifier <- as.numeric(BigMartsFinalData$Outlet_Identifier)
BigMartsFinalData$Outlet_Establishment_Year <- as.numeric(BigMartsFinalData$Outlet_Establishment_Year)
BigMartsFinalData$Outlet_Size <- as.numeric(BigMartsFinalData$Outlet_Size)
BigMartsFinalData$Outlet_Location_Type <- as.numeric(BigMartsFinalData$Outlet_Location_Type)
BigMartsFinalData$Outlet_Type<- as.numeric(BigMartsFinalData$Outlet_Type)
CorrelationForBigMartsData <- cor(BigMartsFinalData)
CorrelationForBigMartsData
##                                       X Item_Identifier   Item_Weight
## X                          1.0000000000     0.019648276 -0.0244558710
## Item_Identifier            0.0196482757     1.000000000  0.0482819395
## Item_Weight               -0.0244558710     0.048281939  1.0000000000
## Item_Fat_Content           0.0078955710    -0.114660488 -0.0267895290
## Item_Visibility            0.0012152670    -0.029188698 -0.0174774811
## Item_Type                  0.0001094730    -0.017973341  0.0356887486
## Item_MRP                   0.0025008782     0.012852955  0.0259732486
## Outlet_Identifier         -0.0065009850    -0.008601773 -0.0075913830
## Outlet_Establishment_Year  0.0003367819    -0.012771776 -0.0134156596
## Outlet_Size                0.0050191419     0.001388956 -0.0043051576
## Outlet_Location_Type      -0.0018663490     0.003655932  0.0029358518
## Outlet_Type                0.0021580047    -0.001177750  0.0005343981
## Item_Outlet_Sales         -0.0053861796     0.002868828  0.0131643568
##                           Item_Fat_Content Item_Visibility    Item_Type
## X                             0.0078955710     0.001215267  0.000109473
## Item_Identifier              -0.1146604881    -0.029188698 -0.017973341
## Item_Weight                  -0.0267895290    -0.017477481  0.035688749
## Item_Fat_Content              1.0000000000     0.049914978 -0.139434246
## Item_Visibility               0.0499149777     1.000000000 -0.035999729
## Item_Type                    -0.1394342456    -0.035999729  1.000000000
## Item_MRP                      0.0060628994    -0.005258788  0.032650737
## Outlet_Identifier             0.0007637264    -0.106376550  0.001655864
## Outlet_Establishment_Year     0.0031506634    -0.078354718  0.004970179
## Outlet_Size                  -0.0006220193     0.072347257 -0.001859350
## Outlet_Location_Type         -0.0015984765    -0.027859509  0.003084154
## Outlet_Type                   0.0021990092    -0.179603892  0.003053107
## Item_Outlet_Sales             0.0187185336    -0.134137692  0.017047670
##                                Item_MRP Outlet_Identifier
## X                          0.0025008782     -0.0065009850
## Item_Identifier            0.0128529549     -0.0086017730
## Item_Weight                0.0259732486     -0.0075913830
## Item_Fat_Content           0.0060628994      0.0007637264
## Item_Visibility           -0.0052587878     -0.1063765503
## Item_Type                  0.0326507373      0.0016558637
## Item_MRP                   1.0000000000      0.0033193595
## Outlet_Identifier          0.0033193595      1.0000000000
## Outlet_Establishment_Year  0.0050199162      0.0790347340
## Outlet_Size                0.0060588872      0.5046029624
## Outlet_Location_Type       0.0002322058     -0.7161760042
## Outlet_Type               -0.0019746190      0.0998732477
## Item_Outlet_Sales          0.5675744467      0.1623248975
##                           Outlet_Establishment_Year   Outlet_Size
## X                                      0.0003367819  0.0050191419
## Item_Identifier                       -0.0127717759  0.0013889555
## Item_Weight                           -0.0134156596 -0.0043051576
## Item_Fat_Content                       0.0031506634 -0.0006220193
## Item_Visibility                       -0.0783547179  0.0723472567
## Item_Type                              0.0049701787 -0.0018593496
## Item_MRP                               0.0050199162  0.0060588872
## Outlet_Identifier                      0.0790347340  0.5046029624
## Outlet_Establishment_Year              1.0000000000  0.1933885750
## Outlet_Size                            0.1933885750  1.0000000000
## Outlet_Location_Type                  -0.0892163898 -0.6143107047
## Outlet_Type                           -0.1223041428 -0.2014826222
## Item_Outlet_Sales                     -0.0491349704 -0.0861821954
##                           Outlet_Location_Type   Outlet_Type
## X                                -0.0018663490  0.0021580047
## Item_Identifier                   0.0036559317 -0.0011777502
## Item_Weight                       0.0029358518  0.0005343981
## Item_Fat_Content                 -0.0015984765  0.0021990092
## Item_Visibility                  -0.0278595089 -0.1796038921
## Item_Type                         0.0030841544  0.0030531075
## Item_MRP                          0.0002322058 -0.0019746190
## Outlet_Identifier                -0.7161760042  0.0998732477
## Outlet_Establishment_Year        -0.0892163898 -0.1223041428
## Outlet_Size                      -0.6143107047 -0.2014826222
## Outlet_Location_Type              1.0000000000  0.4672186616
## Outlet_Type                       0.4672186616  1.0000000000
## Item_Outlet_Sales                 0.0893667468  0.4015225000
##                           Item_Outlet_Sales
## X                              -0.005386180
## Item_Identifier                 0.002868828
## Item_Weight                     0.013164357
## Item_Fat_Content                0.018718534
## Item_Visibility                -0.134137692
## Item_Type                       0.017047670
## Item_MRP                        0.567574447
## Outlet_Identifier               0.162324898
## Outlet_Establishment_Year      -0.049134970
## Outlet_Size                    -0.086182195
## Outlet_Location_Type            0.089366747
## Outlet_Type                     0.401522500
## Item_Outlet_Sales               1.000000000
#task 9: 
library('corrplot')
## corrplot 0.84 loaded
corrplot(CorrelationForBigMartsData, method = "circle")

#task 10: Create a scatter plot matrix for your data set.
BigMartsFinalData$Item_Identifier <- as.factor(BigMartsFinalData$Item_Identifier)
BigMartsFinalData$Item_Fat_Content <- as.factor(BigMartsFinalData$Item_Fat_Content)
BigMartsFinalData$Item_Type <- as.factor(BigMartsFinalData$Item_Type)
BigMartsFinalData$Outlet_Identifier <- as.factor(BigMartsFinalData$Outlet_Identifier)
BigMartsFinalData$Outlet_Establishment_Year <- as.factor(BigMartsFinalData$Outlet_Establishment_Year)
BigMartsFinalData$Outlet_Size <- as.factor(BigMartsFinalData$Outlet_Size)
BigMartsFinalData$Outlet_Location_Type <- as.factor(BigMartsFinalData$Outlet_Location_Type)
BigMartsFinalData$Outlet_Type<- as.factor(BigMartsFinalData$Outlet_Type)
pairs(BigMartsFinalData[,c(3,5,7,13)], pch = 19)

#task 11: Run a suitable test to check your hypothesis for your suitable assumptions.
#Using Chi square Test
# loading the two way contigency table for outlet type versus outlet size
my_table_20 <- xtabs(~ Outlet_Size+Outlet_Type, data=BigMartsFinalData)
my_table_20
##            Outlet_Type
## Outlet_Size    1    2    3    4
##           1    0  932    0    0
##           2  555 2785  928  935
##           3  528 1860    0    0
# Null Hypothesis: The row and the column variables of the contingency table are independent.
chisq <- chisq.test(my_table_20)
chisq
## 
##  Pearson's Chi-squared test
## 
## data:  my_table_20
## X-squared = 1830, df = 6, p-value < 2.2e-16
# as the p value is less than 0.05 the null hypothesis is rejected and there is a dependence between Outlet size and Outlet Type
#task 12:Run a t-test to analyse your hypothesis.
# Null Hypothesis: There no significance between an Items Weight and an Items Fat Content.
t.test(BigMartsFinalData$Item_Weight~BigMartsFinalData$Item_Fat_Content)$p.value
## [1] 0.01324973
#As pval is < 0.05 suggests a significant difference between an Items Weight and an Items Fat Content and we would reject our null hypothesis.
#Task 13

#Formulate a Regression Model:  
#y = b0 + b1*x1 + b2*x2 + ..
#Think about what should 'y' be?
#Think about what could x = {x1, x2, ..} be?
#Fit Linear Regression Models using lm()
#Use the lm() model outputs to test your Hypotheses and draw inferences
#Prepare a list of insights based on your Regression Analysis

#Let Y be the Item_Outlet_Sales_Price as it is a dependent variable
# Finding out the"X" Variables positively correlated to this variable

for(i in 1:NCOL(CorrelationForBigMartsData))
{
for(j in 1:NROW(CorrelationForBigMartsData))
{
if(CorrelationForBigMartsData[i,j] > 0 && CorrelationForBigMartsData[i,j] < 1 && j>=i)
{
cat(colnames(CorrelationForBigMartsData)[[i]],rownames(CorrelationForBigMartsData)[[j]],"\n")
}
}
}
## X Item_Identifier 
## X Item_Fat_Content 
## X Item_Visibility 
## X Item_Type 
## X Item_MRP 
## X Outlet_Establishment_Year 
## X Outlet_Size 
## X Outlet_Type 
## Item_Identifier Item_Weight 
## Item_Identifier Item_MRP 
## Item_Identifier Outlet_Size 
## Item_Identifier Outlet_Location_Type 
## Item_Identifier Item_Outlet_Sales 
## Item_Weight Item_Type 
## Item_Weight Item_MRP 
## Item_Weight Outlet_Location_Type 
## Item_Weight Outlet_Type 
## Item_Weight Item_Outlet_Sales 
## Item_Fat_Content Item_Visibility 
## Item_Fat_Content Item_MRP 
## Item_Fat_Content Outlet_Identifier 
## Item_Fat_Content Outlet_Establishment_Year 
## Item_Fat_Content Outlet_Type 
## Item_Fat_Content Item_Outlet_Sales 
## Item_Visibility Outlet_Size 
## Item_Type Item_MRP 
## Item_Type Outlet_Identifier 
## Item_Type Outlet_Establishment_Year 
## Item_Type Outlet_Location_Type 
## Item_Type Outlet_Type 
## Item_Type Item_Outlet_Sales 
## Item_MRP Outlet_Identifier 
## Item_MRP Outlet_Establishment_Year 
## Item_MRP Outlet_Size 
## Item_MRP Outlet_Location_Type 
## Item_MRP Item_Outlet_Sales 
## Outlet_Identifier Outlet_Establishment_Year 
## Outlet_Identifier Outlet_Size 
## Outlet_Identifier Outlet_Type 
## Outlet_Identifier Item_Outlet_Sales 
## Outlet_Establishment_Year Outlet_Size 
## Outlet_Location_Type Outlet_Type 
## Outlet_Location_Type Item_Outlet_Sales 
## Outlet_Type Item_Outlet_Sales
#Following are the "X" variables affecting the Item_Outlet_Sales
#1.Item_MRP
#2.Outlet_Identifier
#3.Outlet_Type
#4.Item_Weight
#5.Item_Fat_Content
#6.Outlet_Size
#7.Item_Type
summary(BigMartSalesData)
##  Item_Identifier  Item_Weight     Item_Fat_Content Item_Visibility   
##  FDG33  :  10    Min.   : 4.555   Low Fat:5517     Min.   :0.003575  
##  FDW13  :  10    1st Qu.: 8.785   Regular:3006     1st Qu.:0.033085  
##  DRE49  :   9    Median :12.650                    Median :0.062517  
##  DRN47  :   9    Mean   :12.875                    Mean   :0.070213  
##  FDD38  :   9    3rd Qu.:16.850                    3rd Qu.:0.094585  
##  FDF52  :   9    Max.   :21.350                    Max.   :0.328391  
##  (Other):8467                                                        
##                  Item_Type       Item_MRP      Outlet_Identifier
##  Fruits and Vegetables:1232   Min.   : 31.29   OUT027 : 935     
##  Snack Foods          :1200   1st Qu.: 93.83   OUT013 : 932     
##  Household            : 910   Median :143.01   OUT035 : 930     
##  Frozen Foods         : 856   Mean   :140.99   OUT046 : 930     
##  Dairy                : 682   3rd Qu.:185.64   OUT049 : 930     
##  Canned               : 649   Max.   :266.89   OUT045 : 929     
##  (Other)              :2994                    (Other):2937     
##  Outlet_Establishment_Year Outlet_Size   Outlet_Location_Type
##  Min.   :1985              High  : 932   Tier 1:2388         
##  1st Qu.:1987              Medium:5203   Tier 2:2785         
##  Median :1999              Small :2388   Tier 3:3350         
##  Mean   :1998                                                
##  3rd Qu.:2004                                                
##  Max.   :2009                                                
##                                                              
##             Outlet_Type   Item_Outlet_Sales 
##  Grocery Store    :1083   Min.   :   33.29  
##  Supermarket Type1:5577   1st Qu.:  834.25  
##  Supermarket Type2: 928   Median : 1794.33  
##  Supermarket Type3: 935   Mean   : 2181.29  
##                           3rd Qu.: 3101.30  
##                           Max.   :13086.97  
## 
BigMartsFinalData$Outlet_Identifier <- as.factor(as.numeric(BigMartsFinalData$Outlet_Identifier))
BigMartsFinalData$Outlet_Type <- as.factor(as.numeric(BigMartsFinalData$Outlet_Type))
BigMartsFinalData$Item_Fat_Content <- as.factor(as.numeric(BigMartsFinalData$Item_Fat_Content))
BigMartsFinalData$Outlet_Size <- as.factor(as.numeric(BigMartsFinalData$Outlet_Size))
BigMartsFinalData$Item_Type <- as.factor(as.numeric(BigMartsFinalData$Item_Type))

modellinearregression <- lm(BigMartsFinalData$Item_Outlet_Sales~BigMartsFinalData$Item_MRP+BigMartsFinalData$Outlet_Identifier+BigMartsFinalData$Outlet_Type + BigMartsFinalData$Item_Weight+BigMartsFinalData$Item_Fat_Content+BigMartsFinalData$Outlet_Size + BigMartsFinalData$Item_Type, data=BigMartsFinalData)
summary(modellinearregression)
## 
## Call:
## lm(formula = BigMartsFinalData$Item_Outlet_Sales ~ BigMartsFinalData$Item_MRP + 
##     BigMartsFinalData$Outlet_Identifier + BigMartsFinalData$Outlet_Type + 
##     BigMartsFinalData$Item_Weight + BigMartsFinalData$Item_Fat_Content + 
##     BigMartsFinalData$Outlet_Size + BigMartsFinalData$Item_Type, 
##     data = BigMartsFinalData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4355.8  -680.0   -89.5   568.5  7957.2 
## 
## Coefficients: (5 not defined because of singularities)
##                                         Estimate Std. Error t value
## (Intercept)                           -1861.2888    77.4211 -24.041
## BigMartsFinalData$Item_MRP               15.5651     0.1977  78.736
## BigMartsFinalData$Outlet_Identifier2   1949.6316    60.5499  32.199
## BigMartsFinalData$Outlet_Identifier3   2022.6309    60.6301  33.360
## BigMartsFinalData$Outlet_Identifier4   1640.9231    60.6031  27.077
## BigMartsFinalData$Outlet_Identifier5     16.0509    68.6789   0.234
## BigMartsFinalData$Outlet_Identifier6   3368.5088    60.5119  55.667
## BigMartsFinalData$Outlet_Identifier7   2062.3074    60.5902  34.037
## BigMartsFinalData$Outlet_Identifier8   1848.8596    60.6033  30.508
## BigMartsFinalData$Outlet_Identifier9   1918.9800    60.5827  31.675
## BigMartsFinalData$Outlet_Identifier10  2015.5570    60.5759  33.273
## BigMartsFinalData$Outlet_Type2                NA         NA      NA
## BigMartsFinalData$Outlet_Type3                NA         NA      NA
## BigMartsFinalData$Outlet_Type4                NA         NA      NA
## BigMartsFinalData$Item_Weight             0.1291     2.6561   0.049
## BigMartsFinalData$Item_Fat_Content2      39.8469    28.2222   1.412
## BigMartsFinalData$Outlet_Size2                NA         NA      NA
## BigMartsFinalData$Outlet_Size3                NA         NA      NA
## BigMartsFinalData$Item_Type2              6.4785    84.0569   0.077
## BigMartsFinalData$Item_Type3              2.9086   116.5525   0.025
## BigMartsFinalData$Item_Type4             25.5585    62.7683   0.407
## BigMartsFinalData$Item_Type5            -42.3478    62.2271  -0.681
## BigMartsFinalData$Item_Type6            -27.4226    58.8607  -0.466
## BigMartsFinalData$Item_Type7             29.3690    54.9688   0.534
## BigMartsFinalData$Item_Type8              0.7781    90.1866   0.009
## BigMartsFinalData$Item_Type9             -7.2753    67.9414  -0.107
## BigMartsFinalData$Item_Type10           -37.9153    59.9276  -0.633
## BigMartsFinalData$Item_Type11             1.7483    70.6198   0.025
## BigMartsFinalData$Item_Type12           -19.6856    98.6220  -0.200
## BigMartsFinalData$Item_Type13           183.2622   148.0030   1.238
## BigMartsFinalData$Item_Type14           -11.2740    55.2557  -0.204
## BigMartsFinalData$Item_Type15           -26.3248    70.1642  -0.375
## BigMartsFinalData$Item_Type16            20.4637   103.0600   0.199
##                                       Pr(>|t|)    
## (Intercept)                             <2e-16 ***
## BigMartsFinalData$Item_MRP              <2e-16 ***
## BigMartsFinalData$Outlet_Identifier2    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier3    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier4    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier5     0.815    
## BigMartsFinalData$Outlet_Identifier6    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier7    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier8    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier9    <2e-16 ***
## BigMartsFinalData$Outlet_Identifier10   <2e-16 ***
## BigMartsFinalData$Outlet_Type2              NA    
## BigMartsFinalData$Outlet_Type3              NA    
## BigMartsFinalData$Outlet_Type4              NA    
## BigMartsFinalData$Item_Weight            0.961    
## BigMartsFinalData$Item_Fat_Content2      0.158    
## BigMartsFinalData$Outlet_Size2              NA    
## BigMartsFinalData$Outlet_Size3              NA    
## BigMartsFinalData$Item_Type2             0.939    
## BigMartsFinalData$Item_Type3             0.980    
## BigMartsFinalData$Item_Type4             0.684    
## BigMartsFinalData$Item_Type5             0.496    
## BigMartsFinalData$Item_Type6             0.641    
## BigMartsFinalData$Item_Type7             0.593    
## BigMartsFinalData$Item_Type8             0.993    
## BigMartsFinalData$Item_Type9             0.915    
## BigMartsFinalData$Item_Type10            0.527    
## BigMartsFinalData$Item_Type11            0.980    
## BigMartsFinalData$Item_Type12            0.842    
## BigMartsFinalData$Item_Type13            0.216    
## BigMartsFinalData$Item_Type14            0.838    
## BigMartsFinalData$Item_Type15            0.708    
## BigMartsFinalData$Item_Type16            0.843    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1129 on 8495 degrees of freedom
## Multiple R-squared:  0.5637, Adjusted R-squared:  0.5623 
## F-statistic: 406.5 on 27 and 8495 DF,  p-value: < 2.2e-16
modellinearregression$coefficients
##                           (Intercept) 
##                         -1861.2888112 
##            BigMartsFinalData$Item_MRP 
##                            15.5651024 
##  BigMartsFinalData$Outlet_Identifier2 
##                          1949.6316047 
##  BigMartsFinalData$Outlet_Identifier3 
##                          2022.6308929 
##  BigMartsFinalData$Outlet_Identifier4 
##                          1640.9231062 
##  BigMartsFinalData$Outlet_Identifier5 
##                            16.0509280 
##  BigMartsFinalData$Outlet_Identifier6 
##                          3368.5087650 
##  BigMartsFinalData$Outlet_Identifier7 
##                          2062.3074106 
##  BigMartsFinalData$Outlet_Identifier8 
##                          1848.8596243 
##  BigMartsFinalData$Outlet_Identifier9 
##                          1918.9800048 
## BigMartsFinalData$Outlet_Identifier10 
##                          2015.5569710 
##        BigMartsFinalData$Outlet_Type2 
##                                    NA 
##        BigMartsFinalData$Outlet_Type3 
##                                    NA 
##        BigMartsFinalData$Outlet_Type4 
##                                    NA 
##         BigMartsFinalData$Item_Weight 
##                             0.1290995 
##   BigMartsFinalData$Item_Fat_Content2 
##                            39.8469111 
##        BigMartsFinalData$Outlet_Size2 
##                                    NA 
##        BigMartsFinalData$Outlet_Size3 
##                                    NA 
##          BigMartsFinalData$Item_Type2 
##                             6.4784563 
##          BigMartsFinalData$Item_Type3 
##                             2.9085821 
##          BigMartsFinalData$Item_Type4 
##                            25.5584539 
##          BigMartsFinalData$Item_Type5 
##                           -42.3478266 
##          BigMartsFinalData$Item_Type6 
##                           -27.4226453 
##          BigMartsFinalData$Item_Type7 
##                            29.3690084 
##          BigMartsFinalData$Item_Type8 
##                             0.7781132 
##          BigMartsFinalData$Item_Type9 
##                            -7.2752954 
##         BigMartsFinalData$Item_Type10 
##                           -37.9153032 
##         BigMartsFinalData$Item_Type11 
##                             1.7483115 
##         BigMartsFinalData$Item_Type12 
##                           -19.6856429 
##         BigMartsFinalData$Item_Type13 
##                           183.2622413 
##         BigMartsFinalData$Item_Type14 
##                           -11.2739514 
##         BigMartsFinalData$Item_Type15 
##                           -26.3247854 
##         BigMartsFinalData$Item_Type16 
##                            20.4637022
#Hence the model is
#Item_Outlet_Sales = -1861.288 + 15.565*Item_MRP + a*Outlet_Identifier + b*Outlet_Type + c*Item_Weight + 0.129*Item_Weight + e*Item_Fat_Content + f*Item_Type
ItemOutletSalesTestPrediction <- predict(modellinearregression, BigMartSalesTestData)
## Warning: 'newdata' had 5681 rows but variables found have 8523 rows
## Warning in predict.lm(modellinearregression, BigMartSalesTestData):
## prediction from a rank-deficient fit may be misleading
head(ItemOutletSalesTestPrediction)
##         1         2         3         4         5         6 
## 4001.4267  545.2357 2362.5744 1044.7331  889.9386  620.8819
# Diagnostics of prediction.
ActualTestPrediction <- data.frame(cbind(actuals=BigMartSalesTestData$Item_Outlet_Sales,predicted=ItemOutletSalesTestPrediction))
correlation_accuracy <- cor(ActualTestPrediction) 
head(ActualTestPrediction) 
##   predicted
## 1 4001.4267
## 2  545.2357
## 3 2362.5744
## 4 1044.7331
## 5  889.9386
## 6  620.8819
#The list of analysis are

#A. Item Outlet Sales depends on the following seven parameters
#1.Item_MRP
#2.Outlet_Identifier
#3.Outlet_Type
#4.Item_Weight
#5.Item_Fat_Content
#6.Outlet_Size
#7.Item_Type

#B. The two biggest positive correlation Values Obtained Are:
#Item_MRP and Outlet_Type

#C. The pvalues obtained of Item MRP and Outlet Identifiers are less than the significant value of 0.05 hence contribute significantly to the values of the Item Outlet Sales