ggplot(friendships.counts) +
geom_histogram(aes(freq), color='black', fill='gray') +
scale_x_continuous(limits=c(0,500),breaks=seq(0,500,50)) +
ylab("Number of people") +
xlab("Degree") +
ggtitle("Histogram showing degree distribution")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 198 rows containing non-finite values (stat_bin).
ggplot(friendships.counts) +
geom_histogram(aes(freq), color='black', fill='gray') +
scale_x_continuous(limits=c(0,500),breaks=seq(0,500,50)) +
scale_y_log10(breaks=c(10,1000,50,100,500,5000)) +
ylab("Number of people") +
xlab("Degree") +
ggtitle("Histogram showing degree distribution (log y-axis)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 198 rows containing non-finite values (stat_bin).
ggplot(friendships.counts) +
stat_ecdf(aes(freq)) +
scale_x_log10(breaks=c(10,25,50,100,250,500,1000)) +
ylab("Number of people") +
xlab("Degree") +
ggtitle("CDF of friendship frequency")
Notice that the CDF is almost linear on the above plot, and that the vast majority (90%+) of individuals have less than 250 friends.
Those with many friends have distributions of wall-post interactions almost indistinguishable from those with very few friends.
The following plot includes every relationship (A,B) in the dataset, with the degree of A on the x-axis and the number of wall posts from A to B on the y-axis.
ggplot(rels) +
geom_violin(aes(a_deg, ab_post, group=floor(a_deg/25))) +
scale_x_continuous(limits=c(0,250)) +
scale_y_log10(limits=c(10,100)) +
ylab("Posts from A to B") +
xlab("Degree of A")
## Warning: Removed 136474 rows containing non-finite values (stat_ydensity).
And we the same lack of pattern for those who contact the focal user.
ggplot(rels) +
geom_violin(aes(a_deg, ab_post, group=floor(a_deg/25))) +
scale_x_continuous(limits=c(0,250)) +
scale_y_log10(limits=c(10,100)) +
ylab("Posts from B to A") +
xlab("Degree of A")
## Warning: Removed 136474 rows containing non-finite values (stat_ydensity).
total_wp <- aggregate(ab_post ~ a + a_deg, data=rels, FUN="sum")
ggplot(total_wp) +
# geom_point(aes(a_deg, ab_post), alpha=0.1) +
geom_violin(aes(a_deg, ab_post, group=floor(total_wp$a_deg/60))) +
scale_x_continuous(limits=c(0,500)) +
scale_y_log10(limits=c(10,500)) +
ylab("Total number of outgoing posts") +
xlab("Degree")
## Warning: Removed 18860 rows containing non-finite values (stat_ydensity).
rels.post_sometimes <- rels[rels$ab_post + rels$ba_post > 5,]
ggplot(rels.post_sometimes) +
# geom_point(aes(a_deg, ab_post), alpha=0.1) +
geom_violin(aes(deg_diff, post_diff, group=floor(rels.post_sometimes$deg_diff/15))) +
scale_x_continuous(limits=c(0,100)) +
scale_y_continuous(limits=c(-30,30)) +
ylab("Post difference frequency") +
xlab("Difference in degree")
## Warning: Removed 35499 rows containing non-finite values (stat_ydensity).
diffs <- rels$post_diff
diffs <- diffs[abs(diffs)<100]
pdh <- hist(diffs, plot=F)
pdh.organize <- data.frame(
RangeStart=pdh$breaks[1:length(pdh$breaks)-1],
RangeEnd=pdh$breaks[2:length(pdh$breaks)],
Count=pdh$counts
)
kable(pdh.organize, caption = "Frequency table of difference in the number of posts A sends to B and the number B sends to A") %>% kable_styling()
| RangeStart | RangeEnd | Count |
|---|---|---|
| -100 | -90 | 1 |
| -90 | -80 | 12 |
| -80 | -70 | 10 |
| -70 | -60 | 14 |
| -60 | -50 | 33 |
| -50 | -40 | 41 |
| -40 | -30 | 89 |
| -30 | -20 | 223 |
| -20 | -10 | 1070 |
| -10 | 0 | 99469 |
| 0 | 10 | 42746 |
| 10 | 20 | 890 |
| 20 | 30 | 202 |
| 30 | 40 | 84 |
| 40 | 50 | 37 |
| 50 | 60 | 31 |
| 60 | 70 | 16 |
| 70 | 80 | 8 |
| 80 | 90 | 11 |
| 90 | 100 | 1 |