Generating descriptive statistics and basic graphics.
library(ggplot2)
library(car)
## Loading required package: carData
attach(diamonds)
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
summary(diamonds$clarity)
## I1 SI2 SI1 VS2 VS1 VVS2 VVS1 IF
## 741 9194 13065 12258 8171 5066 3655 1790
table(diamonds$color)
##
## D E F G H I J
## 6775 9797 9542 11292 8304 5422 2808
table(diamonds$cut,diamonds$color)
##
## D E F G H I J
## Fair 163 224 312 314 303 175 119
## Good 662 933 909 871 702 522 307
## Very Good 1513 2400 2164 2299 1824 1204 678
## Premium 1603 2337 2331 2924 2360 1428 808
## Ideal 2834 3903 3826 4884 3115 2093 896
info <-hist(diamonds$price,xlab="Price", main="Distribution of Prices in Diamonds Dataset", col="lightblue", xlim=c(0,20000))
info
## $breaks
## [1] 0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000
## [12] 11000 12000 13000 14000 15000 16000 17000 18000 19000
##
## $counts
## [1] 14524 9683 6129 4225 4665 3163 2278 1668 1307 1076 934
## [12] 825 701 603 504 513 425 405 312
##
## $density
## [1] 2.692621e-04 1.795143e-04 1.136263e-04 7.832777e-05 8.648498e-05
## [6] 5.863923e-05 4.223211e-05 3.092325e-05 2.423063e-05 1.994809e-05
## [11] 1.731554e-05 1.529477e-05 1.299592e-05 1.117909e-05 9.343715e-06
## [16] 9.510567e-06 7.879125e-06 7.508343e-06 5.784205e-06
##
## $mids
## [1] 500 1500 2500 3500 4500 5500 6500 7500 8500 9500 10500
## [12] 11500 12500 13500 14500 15500 16500 17500 18500
##
## $xname
## [1] "diamonds$price"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
scatterplot(table ~ carat , data=diamonds, xlab="Carat", ylab="Table", main="Depth vs. Table Scatter Plot")
plot(diamonds$price ~ diamonds$carat)
qplot(carat, price, data=diamonds)