Load diamonds data set
library(ggplot2) #must load the ggplot package first
## Warning: package 'ggplot2' was built under R version 3.5.2
data(diamonds) #loads the diamonds data set since it comes with the ggplot package
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
?diamonds
## starting httpd help server ... done
Plot histogram of price of all diamonds in data set.
ggplot(aes(x = price), data = diamonds) +
geom_histogram(stat='count')
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(diamonds, aes(price)) +
geom_histogram( binwidth = 100 , bins = 300) +
coord_cartesian(xlim = c(0, 5000))

ggsave("img.jpg")
## Saving 7 x 5 in image
summary(diamonds$price >= 15000)
## Mode FALSE TRUE
## logical 52284 1656
Break out the histogram of diamond prices by cut.
# My ATTEMPT
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot <- dia_plot + geom_point(alpha = 0.2)
dia_plot + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_plot + geom_smooth(aes(col = clarity), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# ACTUAL ANSWER
qplot(x = price, data = diamonds) + facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Finding which cut has highest price diamond
dia_cut <- ggplot(diamonds, aes(x = cut, y = price))
dia_cut <- dia_cut + geom_point(alpha = 0.2)
dia_cut + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + geom_smooth(aes(col = cut), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + coord_cartesian(ylim = c(18500, 19000))

Find which cut has lowest price diamond
dia_cut <- ggplot(diamonds, aes(x = cut, y = price))
dia_cut <- dia_cut + geom_point()
dia_cut + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + geom_smooth(aes(col = cut), se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

dia_cut + coord_cartesian(ylim = c(300, 350))

5 histograms as per cut
qplot(x = price, data = diamonds) + facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
