Hands on Lab – ggplot2

Generating descriptive statistics and basic graphics.

Load package and dataset

library(ggplot2)
library(car)
## Loading required package: carData
attach(diamonds)

The dataset

str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame':    53940 obs. of  10 variables:
##  $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 
summary(diamonds$clarity)
##    I1   SI2   SI1   VS2   VS1  VVS2  VVS1    IF 
##   741  9194 13065 12258  8171  5066  3655  1790

Frequency of each diamond color

table(diamonds$color)
## 
##     D     E     F     G     H     I     J 
##  6775  9797  9542 11292  8304  5422  2808

Frequency of each diamond color, by cut

table(diamonds$cut,diamonds$color)
##            
##                D    E    F    G    H    I    J
##   Fair       163  224  312  314  303  175  119
##   Good       662  933  909  871  702  522  307
##   Very Good 1513 2400 2164 2299 1824 1204  678
##   Premium   1603 2337 2331 2924 2360 1428  808
##   Ideal     2834 3903 3826 4884 3115 2093  896

Histogram of diamonds prices

info <-hist(diamonds$price,xlab="Price", main="Distribution of Prices in Diamonds Dataset", col="lightblue", xlim=c(0,20000))

info
## $breaks
##  [1]     0  1000  2000  3000  4000  5000  6000  7000  8000  9000 10000
## [12] 11000 12000 13000 14000 15000 16000 17000 18000 19000
## 
## $counts
##  [1] 14524  9683  6129  4225  4665  3163  2278  1668  1307  1076   934
## [12]   825   701   603   504   513   425   405   312
## 
## $density
##  [1] 2.692621e-04 1.795143e-04 1.136263e-04 7.832777e-05 8.648498e-05
##  [6] 5.863923e-05 4.223211e-05 3.092325e-05 2.423063e-05 1.994809e-05
## [11] 1.731554e-05 1.529477e-05 1.299592e-05 1.117909e-05 9.343715e-06
## [16] 9.510567e-06 7.879125e-06 7.508343e-06 5.784205e-06
## 
## $mids
##  [1]   500  1500  2500  3500  4500  5500  6500  7500  8500  9500 10500
## [12] 11500 12500 13500 14500 15500 16500 17500 18500
## 
## $xname
## [1] "diamonds$price"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

Scatterplots

scatterplot(table ~ carat , data=diamonds, xlab="Carat", ylab="Table", main="Depth vs. Table Scatter Plot")

plot(diamonds$price ~ diamonds$carat)

qplot(carat, price, data=diamonds)