Load diamonds data set

library(ggplot2) #must load the ggplot package first
## Warning: package 'ggplot2' was built under R version 3.5.2
data(diamonds) #loads the diamonds data set since it comes with the ggplot package

summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 
?diamonds
## starting httpd help server ... done

Plot histogram of price of all diamonds in data set.

ggplot(aes(x = price), data = diamonds) +
  geom_histogram(stat='count')
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(diamonds, aes(price)) +
  geom_histogram( binwidth = 100 , bins = 300) + 
  coord_cartesian(xlim = c(0, 5000))

ggsave("img.jpg")
## Saving 7 x 5 in image
summary(diamonds$price >= 15000)
##    Mode   FALSE    TRUE 
## logical   52284    1656

Break out the histogram of diamond prices by cut.

# My ATTEMPT
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))

dia_plot <- dia_plot + geom_point(alpha = 0.2)

dia_plot + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_plot + geom_smooth(aes(col = clarity), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# ACTUAL ANSWER
qplot(x = price, data = diamonds) + facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Finding which cut has highest price diamond

dia_cut <- ggplot(diamonds, aes(x = cut, y = price))

dia_cut <- dia_cut + geom_point(alpha = 0.2)

dia_cut + geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + geom_smooth(aes(col = cut), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + coord_cartesian(ylim = c(18500, 19000))

Find which cut has lowest price diamond

dia_cut <- ggplot(diamonds, aes(x = cut, y = price))

dia_cut <- dia_cut + geom_point()

dia_cut + geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + geom_smooth(aes(col = cut), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + coord_cartesian(ylim = c(300, 350))

Which cut has the lowest median price

summary(diamonds$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326     950    2401    3933    5324   18823
aggregate(diamonds$price ~ diamonds$cut, FUN = median)

5 histograms as per cut

qplot(x = price, data = diamonds) + facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.