library(ggplot2)
pf <- read.delim2('pseudo_facebook.tsv')
ggplot(data = pf)+
  geom_point(mapping = aes(x = age, y = friend_count))

summary(pf$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   20.00   28.00   37.28   50.00  113.00
ggplot(data = pf)+
  geom_point(mapping = aes(x = age, y = friend_count))+
  xlim(13,90)
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
  geom_point(mapping = aes(x = age, y = friend_count), alpha = 1/20)+
  xlim(13,90)
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
  geom_jitter(mapping = aes(x = age, y = friend_count), alpha = 1/20)+
  xlim(13,90)
## Warning: Removed 5188 rows containing missing values (geom_point).

ggplot(data = pf)+
  geom_point(mapping = aes(x = age, y = friend_count), alpha = 1/20, position = position_jitter(h = 0))+
  xlim(13,90)+
  coord_trans(y = "sqrt")
## Warning: Removed 5165 rows containing missing values (geom_point).

ggplot(data = pf, aes(x = age, y = friendships_initiated))+
 geom_point(alpha=1/50)+
 xlim(13,90)+
 coord_trans(y = "sqrt")
## Warning: Removed 4906 rows containing missing values (geom_point).

ggplot(data = pf)+
  geom_point(mapping = aes(x = age, y = friendships_initiated), alpha = 1/20, position = position_jitter(h = 0))+
  xlim(13,90)+
  coord_trans(y = "sqrt")
## Warning: Removed 5214 rows containing missing values (geom_point).

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
age_groups <- group_by(pf,age)
pf.fc_by_age <- summarise(age_groups, friend_count_mean=mean(friend_count), friend_count_median = median(friend_count), n= n())
pf.fc_by_age <- arrange(pf.fc_by_age,age)
head(pf.fc_by_age,20)
## # A tibble: 20 × 4
##      age friend_count_mean friend_count_median     n
##    <int>             <dbl>               <dbl> <int>
## 1     13          164.7500                74.0   484
## 2     14          251.3901               132.0  1925
## 3     15          347.6921               161.0  2618
## 4     16          351.9371               171.5  3086
## 5     17          350.3006               156.0  3283
## 6     18          331.1663               162.0  5196
## 7     19          333.6921               157.0  4391
## 8     20          283.4991               135.0  3769
## 9     21          235.9412               121.0  3671
## 10    22          211.3948               106.0  3032
## 11    23          202.8426                93.0  4404
## 12    24          185.7121                92.0  2827
## 13    25          131.0211                62.0  3641
## 14    26          144.0082                75.0  2815
## 15    27          134.1473                72.0  2240
## 16    28          125.8354                66.0  2364
## 17    29          120.8182                66.0  1936
## 18    30          115.2080                67.5  1716
## 19    31          118.4599                63.0  1694
## 20    32          114.2800                63.0  1443
ggplot(data = pf.fc_by_age, aes(x = age, y = friend_count_mean))+
 geom_point()+
 xlim(30,90)
## Warning: Removed 40 rows containing missing values (geom_point).

ggplot(data = pf.fc_by_age, aes(x = age, y = friend_count_mean))+
 geom_line()+
 xlim(30,90)
## Warning: Removed 40 rows containing missing values (geom_path).