mtcars Analysis

Sameer Mathur

mtcars dataset

head(mtcars)   # first few rows of the data frame
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Summary Statistics of the mtcars dataset usin summary() function

summary(mtcars)   # summary of mtcars usinf summary()
      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb      
 Min.   :0.0000   Min.   :3.000   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
 Median :0.0000   Median :4.000   Median :2.000  
 Mean   :0.4062   Mean   :3.688   Mean   :2.812  
 3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :5.000   Max.   :8.000  

Summary Statistics of the data using describe() function in psych package

attach(mtcars)   # attaching the columns of the data frame
library(psych)
describe(mtcars)   # using describe() function
     vars  n   mean     sd median trimmed    mad   min    max  range  skew
mpg     1 32  20.09   6.03  19.20   19.70   5.41 10.40  33.90  23.50  0.61
cyl     2 32   6.19   1.79   6.00    6.23   2.97  4.00   8.00   4.00 -0.17
disp    3 32 230.72 123.94 196.30  222.52 140.48 71.10 472.00 400.90  0.38
hp      4 32 146.69  68.56 123.00  141.19  77.10 52.00 335.00 283.00  0.73
drat    5 32   3.60   0.53   3.70    3.58   0.70  2.76   4.93   2.17  0.27
wt      6 32   3.22   0.98   3.33    3.15   0.77  1.51   5.42   3.91  0.42
qsec    7 32  17.85   1.79  17.71   17.83   1.42 14.50  22.90   8.40  0.37
vs      8 32   0.44   0.50   0.00    0.42   0.00  0.00   1.00   1.00  0.24
am      9 32   0.41   0.50   0.00    0.38   0.00  0.00   1.00   1.00  0.36
gear   10 32   3.69   0.74   4.00    3.62   1.48  3.00   5.00   2.00  0.53
carb   11 32   2.81   1.62   2.00    2.65   1.48  1.00   8.00   7.00  1.05
     kurtosis    se
mpg     -0.37  1.07
cyl     -1.76  0.32
disp    -1.21 21.91
hp      -0.14 12.12
drat    -0.71  0.09
wt      -0.02  0.17
qsec     0.34  0.32
vs      -2.00  0.09
am      -1.92  0.09
gear    -1.07  0.13
carb     1.26  0.29

Sample means of mpg by transmission (automatic vs manual)

# subset of automatic and manual cars
cars_auto = subset(mtcars, am == 0)   # automatic transmission
cars_manu = subset(mtcars, am == 1)   # manual transmission
mean(cars_auto$mpg)   # average of mpg when manual is automatic
[1] 17.14737
mean(cars_manu$mpg)   # average of mpg when manual is manual
[1] 24.39231

Histogram of mpg

par(mfrow = c(2, 1))

# automatic transmission
hist(cars_auto$mpg, main = "Distribution mpg - automatic transmission", xlab = "mpg")
abline(v = mean(cars_auto$mpg), col = "red")

# manual transmission
hist(cars_manu$mpg, main = "Distribution mpg - manual transmission", xlab = "mpg")
abline(v = mean(cars_manu$mpg), col = "red")

Histogram of mpg

plot of chunk unnamed-chunk-7

Sample means of mpg by number of cylinders (4, 6, 8)

# subset of 4, 6, and 8 cylinder cars
cars_cyl4 = subset(mtcars, cyl == 4)
cars_cyl6 = subset(mtcars, cyl == 6)
cars_cyl8 = subset(mtcars, cyl == 8)
mean(cars_cyl4$mpg)   # average of mpg when cyl = 4
[1] 26.66364
mean(cars_cyl6$mpg)   # average of mpg when cyl = 6
[1] 19.74286
mean(cars_cyl8$mpg)   # average of mpg when cyl = 8
[1] 15.1

Histogram of mpg versus cylinders

par(mfrow = c(3, 1))

hist(cars_cyl4$mpg, main = "Distribution mpg - 4 cylinder", xlab = "mpg")
abline(v = mean(cars_cyl4$mpg), col = "red")

hist(cars_cyl6$mpg, main = "Distribution mpg - 6 cylinder", xlab = "mpg")
abline(v = mean(cars_cyl6$mpg), col = "red")

hist(cars_cyl8$mpg, main = "Distribution mpg - 8 cylinder", xlab = "mpg")
abline(v = mean(cars_cyl8$mpg), col = "red")

Histogram of mpg versus cylinders

plot of chunk unnamed-chunk-11