library(ggplot2)
pf <- read.delim2('pseudo_facebook.tsv')
ggplot(data = pf)+
geom_point(mapping = aes(x = age, y = friend_count))

summary(pf$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 20.00 28.00 37.28 50.00 113.00
ggplot(data = pf)+
geom_point(mapping = aes(x = age, y = friend_count))+
xlim(13,90)
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
geom_point(mapping = aes(x = age, y = friend_count), alpha = 1/20)+
xlim(13,90)
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
geom_jitter(mapping = aes(x = age, y = friend_count), alpha = 1/20)+
xlim(13,90)
## Warning: Removed 5188 rows containing missing values (geom_point).

ggplot(data = pf)+
geom_point(mapping = aes(x = age, y = friend_count), alpha = 1/20, position = position_jitter(h = 0))+
xlim(13,90)+
coord_trans(y = "sqrt")
## Warning: Removed 5165 rows containing missing values (geom_point).

ggplot(data = pf, aes(x = age, y = friendships_initiated))+
geom_point(alpha=1/50)+
xlim(13,90)+
coord_trans(y = "sqrt")
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
geom_point(mapping = aes(x = age, y = friendships_initiated), alpha = 1/20, position = position_jitter(h = 0))+
xlim(13,90)+
coord_trans(y = "sqrt")
## Warning: Removed 5214 rows containing missing values (geom_point).

library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
age_groups <- group_by(pf,age)
pf.fc_by_age <- summarise(age_groups, friend_count_mean=mean(friend_count), friend_count_median = median(friend_count), n= n())
pf.fc_by_age <- arrange(pf.fc_by_age,age)
head(pf.fc_by_age,20)
## # A tibble: 20 × 4
## age friend_count_mean friend_count_median n
## <int> <dbl> <dbl> <int>
## 1 13 164.7500 74.0 484
## 2 14 251.3901 132.0 1925
## 3 15 347.6921 161.0 2618
## 4 16 351.9371 171.5 3086
## 5 17 350.3006 156.0 3283
## 6 18 331.1663 162.0 5196
## 7 19 333.6921 157.0 4391
## 8 20 283.4991 135.0 3769
## 9 21 235.9412 121.0 3671
## 10 22 211.3948 106.0 3032
## 11 23 202.8426 93.0 4404
## 12 24 185.7121 92.0 2827
## 13 25 131.0211 62.0 3641
## 14 26 144.0082 75.0 2815
## 15 27 134.1473 72.0 2240
## 16 28 125.8354 66.0 2364
## 17 29 120.8182 66.0 1936
## 18 30 115.2080 67.5 1716
## 19 31 118.4599 63.0 1694
## 20 32 114.2800 63.0 1443
ggplot(data = pf.fc_by_age, aes(x = age, y = friend_count_mean))+
geom_point()+
xlim(30,90)
## Warning: Removed 40 rows containing missing values (geom_point).

ggplot(data = pf.fc_by_age, aes(x = age, y = friend_count_mean))+
geom_line()+
xlim(30,90)
## Warning: Removed 40 rows containing missing values (geom_path).
