Car Seats Analysis

Sameer Mathur

BASIC SUMMARY

Read the Car Seats Data

carSeats.df <- read.csv(paste("CarSeatsDataV4.csv", sep=""))
attach(carSeats.df)
dim(carSeats.df)
[1] 400  13

Check Data Types in the dataset

# checking  data types of the data fields
str(carSeats.df)
'data.frame':   400 obs. of  13 variables:
 $ Sales      : num  9.5 4.15 10.81 9.01 10.14 ...
 $ CompPrice  : int  138 141 124 121 145 103 104 130 119 157 ...
 $ Income     : int  73 64 113 78 119 74 99 60 98 53 ...
 $ Advertising: int  110 30 130 90 160 0 150 0 0 0 ...
 $ Population : int  276 340 501 150 294 359 226 144 18 403 ...
 $ Price      : int  120 128 72 100 113 97 102 138 126 124 ...
 $ ShelveLoc  : Factor w/ 3 levels "0-Bad","1-Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Age        : int  42 38 78 26 42 55 58 38 73 58 ...
 $ Education  : int  17 13 16 10 12 11 17 10 17 16 ...
 $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 2 1 1 2 ...
 $ US         : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 1 1 1 ...
 $ Revenue    : num  1140 531 778 901 1146 ...
 $ Profit     : num  228 106 156 180 229 ...

Summarize the Advertising Data

# summarize the data
library(psych)
describe(carSeats.df)[,c(2,3,4,5,8,9)]   # selected columns
              n   mean     sd median min     max
Sales       400   7.50   2.82   7.49   0   16.27
CompPrice   400 124.97  15.33 125.00  77  175.00
Income      400  68.66  27.99  69.00  21  120.00
Advertising 400  66.35  66.50  50.00   0  290.00
Population  400 264.84 147.38 272.00  10  509.00
Price       400 115.80  23.68 117.00  24  191.00
ShelveLoc*  400   1.97   0.67   2.00   1    3.00
Age         400  53.32  16.20  54.50  25   80.00
Education   400  13.90   2.62  14.00  10   18.00
Urban*      400   1.70   0.46   2.00   1    2.00
US*         400   1.64   0.48   2.00   1    2.00
Revenue     400 838.36 302.59 824.30   0 1794.26
Profit      400 167.67  60.52 164.86   0  358.85

SINGLE VARIABLE VISUALIZATION

Box-Plot of Sales

boxplot(Sales, main="Number of Car Seats Sold ('000)")

plot of chunk unnamed-chunk-4

Histogram of Sales

hist(Sales , main="Histogram of Number of CarSeats Sold", xlab ="Sales ('000)")

plot of chunk unnamed-chunk-5

Box-Plot of Profit

boxplot(carSeats.df$Profit, main="Profit ('000 USD)")

plot of chunk unnamed-chunk-6

Histogram of Profit

hist(carSeats.df$Profit , main="Histogram of Profit", xlab ="Profit ('000 USD)")

plot of chunk unnamed-chunk-7

Box-Plot of Advertising

boxplot(Advertising, main="Advertising Budget ('000 USD)")

plot of chunk unnamed-chunk-8

Histogram of Advertising

hist(Advertising , main="Histogram of Advertising", xlab ="Advertising Budget ('000 USD)")

plot of chunk unnamed-chunk-9

Find the mean Sales depending on shelveLoc

aggregate(carSeats.df$Sales, list(ShelfLocation = carSeats.df$ShelveLoc), mean)
  ShelfLocation         x
1         0-Bad  5.522917
2      1-Medium  7.306575
3        2-Good 10.214000
aggregate(carSeats.df$Revenue, list(ShelfLocation = carSeats.df$ShelveLoc), mean)
  ShelfLocation         x
1         0-Bad  600.2085
2      1-Medium  816.7529
3        2-Good 1162.9985
aggregate(carSeats.df$Profit, list(ShelfLocation = carSeats.df$ShelveLoc), mean)
  ShelfLocation        x
1         0-Bad 120.0414
2      1-Medium 163.3505
3        2-Good 232.5998

Find the SD Sales depending on shelveLoc

aggregate(carSeats.df$Sales, list(ShelfLocation = carSeats.df$ShelveLoc), sd)
  ShelfLocation        x
1         0-Bad 2.356349
2      1-Medium 2.266373
3        2-Good 2.501243
aggregate(carSeats.df$Revenue, list(ShelfLocation = carSeats.df$ShelveLoc), sd)
  ShelfLocation        x
1         0-Bad 224.8103
2      1-Medium 234.5795
3        2-Good 249.5196
aggregate(carSeats.df$Profit, list(ShelfLocation = carSeats.df$ShelveLoc), sd)
  ShelfLocation        x
1         0-Bad 44.96228
2      1-Medium 46.91608
3        2-Good 49.90406

BoxPlot of Sales broken down by ShelfLoc

boxplot(Sales~ShelveLoc, data=carSeats.df, main="Sales broken down by ShelfLoc", 
    xlab="Shelf Location", ylab="Sales ('000 units sold)")

plot of chunk unnamed-chunk-12

BoxPlot of Profit broken down by ShelfLoc

boxplot(Profit~ShelveLoc, data=carSeats.df, main="Profit broken down by ShelfLoc", 
    xlab="Shelf Location", ylab="Profit ('000 USD)")

plot of chunk unnamed-chunk-13

Scatterplot of Sales versus Advertising

library(car)
scatterplot(Sales ~ Advertising, data=carSeats.df,
            spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Sales of Car Seats vs.Advertising ",
            xlab="Advertising",
            ylab="Sales")

plot of chunk unnamed-chunk-15

Scatterplot of Profit versus Advertising

library(car)
scatterplot(Profit ~ Advertising, data=carSeats.df,
            spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Profit on Car Seats vs.Advertising ",
            xlab="Advertising",
            ylab="Profit")

plot of chunk unnamed-chunk-17

Correlations Matrix with significance matrix

x <- carSeats.df[,c("Sales", "Profit", "Advertising", "Age", "Income", "CompPrice")]
library(Hmisc)
rcorr(as.matrix(x), type="pearson")
            Sales Profit Advertising   Age Income CompPrice
Sales        1.00   0.80        0.27 -0.23   0.15      0.06
Profit       0.80   1.00        0.34 -0.29   0.13      0.42
Advertising  0.27   0.34        1.00  0.00   0.06     -0.02
Age         -0.23  -0.29        0.00  1.00   0.00     -0.10
Income       0.15   0.13        0.06  0.00   1.00     -0.08
CompPrice    0.06   0.42       -0.02 -0.10  -0.08      1.00

n= 400 


P
            Sales  Profit Advertising Age    Income CompPrice
Sales              0.0000 0.0000      0.0000 0.0023 0.2009   
Profit      0.0000        0.0000      0.0000 0.0093 0.0000   
Advertising 0.0000 0.0000             0.9276 0.2391 0.6294   
Age         0.0000 0.0000 0.9276             0.9258 0.0451   
Income      0.0023 0.0093 0.2391      0.9258        0.1073   
CompPrice   0.2009 0.0000 0.6294      0.0451 0.1073          

Construct a Corrgram for 3 variables in the dataset

library(corrgram)
corrgram(carSeats.df[,c("Sales","Profit","Advertising")], order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram")

plot of chunk unnamed-chunk-20

Visualizing Correlations

# scatter plot matrix for the following variables" {"Sales","Revenue","Advertising","ShelveLoc","Income", "Revenue"}
library(car)
scatterplotMatrix(carSeats.df[,c("Sales","Profit","Advertising","ShelveLoc","Income", "Revenue")],
                  spread=FALSE, smoother.args=list(lty=2),
                  main="Scatter Plot Matrix")

plot of chunk unnamed-chunk-22