install.packages("ggplot2", repos = 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/ggplot2_2.1.0.tgz')
## Warning: unable to access index for repository https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/ggplot2_2.1.0.tgz/src/contrib:
## cannot download all files
## Warning: package 'ggplot2' is not available (for R version 3.3.1)
## Warning: unable to access index for repository https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/ggplot2_2.1.0.tgz/bin/macosx/mavericks/contrib/3.3:
## cannot download all files
library(ggplot2)
data(diamonds)
pf <- read.csv('pseudo_facebook.tsv', sep = '\t')
ggplot(aes(x = price, color = cut), data = diamonds) +
facet_wrap(~color, ncol = 3) +
geom_histogram() +
scale_x_log10() +
scale_fill_brewer(type = 'qual')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = diamonds, aes(x = table, y = price, color = cut)) +
geom_point(alpha = 1/5) +
scale_x_continuous(limits = c(50, 80), breaks = seq(50, 80, 2))
## Warning: Removed 5 rows containing missing values (geom_point).
Answer: 53 to 57
diamonds$volume <- with(diamonds, x * y * z)
ggplot(data = diamonds, aes(x = volume, y = price, color = clarity)) +
geom_point() +
scale_color_brewer(type = 'div') +
scale_y_log10() +
scale_x_continuous(limits = c(0, quantile(diamonds$volume, 0.99)))
## Warning: Removed 540 rows containing missing values (geom_point).
pf$prop_initiated <- with(pf, friendships_initiated/friend_count)
summary(pf$prop_initiated)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.4524 0.6250 0.6078 0.7838 1.0000 1962
pf$year_joined <- with(pf, floor(2014 - (tenure/365)))
pf$year_joined.bucket <- cut(pf$year_joined, breaks = c(2004, 2009, 2011, 2012, 2014))
table(pf$year_joined.bucket)
##
## (2004,2009] (2009,2011] (2011,2012] (2012,2014]
## 6669 15308 33366 43658
ggplot(data = subset(pf, !is.na(tenure) & !is.na(prop_initiated)), aes(x = tenure, y = prop_initiated, color = year_joined.bucket)) +
geom_line(stat = 'summary', fun.y = median)
ggplot(data = subset(pf, !is.na(tenure) & !is.na(prop_initiated)), aes(x = tenure, y = prop_initiated, color = year_joined.bucket)) +
geom_line(stat = 'summary', fun.y = median, alpha = 1/5) +
geom_smooth()
Answer: People who joined after 2012. (The newest users)
by(pf$prop_initiated, pf$year_joined.bucket, summary)
## pf$year_joined.bucket: (2004,2009]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3418 0.4672 0.4668 0.5910 1.0000 5
## --------------------------------------------------------
## pf$year_joined.bucket: (2009,2011]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3924 0.5357 0.5301 0.6750 1.0000 81
## --------------------------------------------------------
## pf$year_joined.bucket: (2011,2012]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.4576 0.6189 0.5985 0.7619 1.0000 408
## --------------------------------------------------------
## pf$year_joined.bucket: (2012,2014]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.5115 0.7018 0.6654 0.8490 1.0000 1468
Answer: 0.647
Answer: They are new to Facebook so they are searching and adding all the people they know. People who have been on Facebook for a while have all of the new users add them. It is harder to find newer users than for new users to find older (tenure) users.
ggplot(data = diamonds, aes(x = cut, y = price/carat, color = color)) +
geom_point(alpha = 1/10) +
geom_jitter() +
facet_wrap(~clarity, ncol = 3) +
scale_color_brewer(type = 'div')