library(ggplot2)
ggplot(aes(x = log(price)), data = diamonds) +
geom_histogram(aes(fill = cut)) +
facet_wrap( ~ color) +
scale_fill_brewer(type = 'qual')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = diamonds, aes(x = table, y = price)) +
geom_point(aes(colour = cut)) +
scale_color_brewer(type = 'qual')

volume <- diamonds$x*diamonds$y*diamonds$z
ggplot(data = diamonds, aes(x = volume, y = price)) +
geom_point(aes(colour = clarity)) +
scale_y_log10() +
coord_cartesian(xlim=c(0,quantile(volume, 0.99))) +
scale_color_brewer(type = 'div')

pf <- read.csv('pseudo_facebook.tsv', sep = '\t')
pf$prop_initiated = pf$friendships_initiated / pf$friend_count
pf$prop_initiated[is.nan(pf$prop_initiated)] <- 0
pf$year_joined <- floor(2014 - pf$tenure/365)
pf$year_joined.bucket <- cut(pf$year_joined, c(2004, 2009, 2011, 2012, 2014))
ggplot(data = pf, aes(x = tenure, y = prop_initiated)) +
geom_line(aes(color = year_joined.bucket), stat = 'summary', fun.y = median)
## Warning: Removed 2 rows containing non-finite values (stat_summary).

ggplot(data = pf, aes(x = tenure, y = prop_initiated)) +
geom_line(aes(color = year_joined.bucket), stat = 'summary', fun.y = median) +
geom_smooth()
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## `geom_smooth()` using method = 'gam'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

by(pf$prop_initiated, pf$year_joined.bucket, summary)
## pf$year_joined.bucket: (2004,2009]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3415 0.4669 0.4665 0.5909 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2009,2011]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3894 0.5342 0.5273 0.6744 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2011,2012]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.4500 0.6154 0.5911 0.7600 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2012,2014]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.5000 0.6912 0.6430 0.8438 1.0000
#by(pf$prop_initiated, pf$year_joined.bucket, mean, na.rm=T)
#summary(subset(pf, year_joined.bucket=='(2012,2014]')$prop_initiated)
ggplot(data = diamonds, aes(x = cut, y = price/carat)) +
geom_jitter(aes(colour = color)) +
facet_wrap(~ clarity) +
scale_color_brewer(type = 'div')
