POPSIZE_PATH <- here("exploratory_analyses/01_reddit_pilot/data/subreddits_basic.csv")
popsize <- read_csv(POPSIZE_PATH, col_names = c("id", "reddit_id", "creation_time", "subreddit", "num_subscribers"))
popsize_tidy <- popsize %>%
mutate(num_subscribers = as.numeric(num_subscribers),
creation_time = anytime(creation_time),
subscriber_num_bin = cut(num_subscribers,
breaks = c(0,10,100,1000, 10000, 100000, 1000000, 10000000, 100000000))) %>%
filter(!is.na(num_subscribers),
num_subscribers > 0)
There are 835786 communities with at least 1 subscriber; 102346 communities with at least subscribers.
Number of subreddits by size:
ggplot(popsize_tidy, aes(num_subscribers)) +
geom_histogram() +
scale_x_log10(breaks = c(1,10,100,1000, 10000, 100000, 1000000, 10000000 )) +
xlab("Number of subscribers") +
ylab("Number of subreddits")
popsize_tidy %>%
count(subscriber_num_bin) %>%
kable()
| subscriber_num_bin | n |
|---|---|
| (0,10] | 567704 |
| (10,100] | 166158 |
| (100,1e+03] | 70282 |
| (1e+03,1e+04] | 24004 |
| (1e+04,1e+05] | 6444 |
| (1e+05,1e+06] | 1112 |
| (1e+06,1e+07] | 35 |
| (1e+07,1e+08] | 47 |
Sample subreddits by size:
popsize_tidy %>%
group_by(subscriber_num_bin) %>%
select(subscriber_num_bin, num_subscribers, subreddit, creation_time) %>%
sample_n(5) %>%
kable()
| subscriber_num_bin | num_subscribers | subreddit | creation_time |
|---|---|---|---|
| (0,10] | 1 | NewsGamingNews | 2015-08-06 17:22:07 |
| (0,10] | 6 | TopOnYoutube | 2017-05-29 15:20:11 |
| (0,10] | 5 | kingdomrushpc | 2014-01-29 22:02:21 |
| (0,10] | 3 | teencirclejerk | 2012-08-11 19:37:48 |
| (0,10] | 10 | orthodoxbahai | 2017-06-10 13:35:28 |
| (10,100] | 17 | lazyfuck | 2015-12-10 19:52:14 |
| (10,100] | 20 | Eagle | 2011-02-07 00:10:02 |
| (10,100] | 33 | BEANGRY | 2011-06-29 23:40:23 |
| (10,100] | 19 | OntarioPics | 2015-10-28 19:46:18 |
| (10,100] | 82 | ChristianPsychonauts | 2014-06-14 22:05:46 |
| (100,1e+03] | 506 | MouthCum | 2014-09-25 16:56:57 |
| (100,1e+03] | 103 | FunnyThingsKidsSay | 2013-12-30 23:42:27 |
| (100,1e+03] | 133 | NSFWAsianAmericans | 2014-08-31 02:43:39 |
| (100,1e+03] | 260 | climaxquotes | 2014-07-25 17:06:17 |
| (100,1e+03] | 117 | pkgobaltimore | 2016-07-07 22:46:59 |
| (1e+03,1e+04] | 9855 | NSFWebms | 2014-09-21 09:20:54 |
| (1e+03,1e+04] | 2266 | PornNetwork | 2014-07-13 18:28:24 |
| (1e+03,1e+04] | 1111 | ImaginaryGlaciers | 2013-11-27 06:57:56 |
| (1e+03,1e+04] | 1510 | PaladinsAcademy | 2017-07-09 01:25:03 |
| (1e+03,1e+04] | 1436 | B4CGI | 2016-05-16 09:20:14 |
| (1e+04,1e+05] | 23310 | gis | 2008-09-20 20:25:31 |
| (1e+04,1e+05] | 22706 | Splitview | 2015-04-27 12:08:54 |
| (1e+04,1e+05] | 36718 | brisbane | 2009-03-13 01:23:33 |
| (1e+04,1e+05] | 18749 | selfpix | 2010-01-14 22:11:07 |
| (1e+04,1e+05] | 17699 | laptops | 2008-11-13 01:27:29 |
| (1e+05,1e+06] | 214929 | nutrition | 2008-11-16 21:35:46 |
| (1e+05,1e+06] | 171075 | medicine | 2008-03-13 17:18:04 |
| (1e+05,1e+06] | 103951 | Memes_Of_The_Dank | 2016-11-03 19:01:37 |
| (1e+05,1e+06] | 633329 | BikiniBottomTwitter | 2016-03-31 19:52:59 |
| (1e+05,1e+06] | 237136 | trashyboners | 2014-09-29 22:39:57 |
| (1e+06,1e+07] | 1029949 | tattoos | 2008-06-23 21:01:06 |
| (1e+06,1e+07] | 1094291 | Whatcouldgowrong | 2013-04-27 18:06:14 |
| (1e+06,1e+07] | 1753074 | woahdude | 2009-10-04 09:32:40 |
| (1e+06,1e+07] | 1133871 | trees | 2009-10-15 11:51:10 |
| (1e+06,1e+07] | 1188361 | lifehacks | 2008-06-20 21:20:34 |
| (1e+07,1e+08] | 13123701 | InternetIsBeautiful | 2012-07-24 23:02:51 |
| (1e+07,1e+08] | 13238571 | tifu | 2012-03-03 21:45:54 |
| (1e+07,1e+08] | 17523468 | movies | 2008-01-25 01:52:30 |
| (1e+07,1e+08] | 18755662 | todayilearned | 2008-12-28 01:46:59 |
| (1e+07,1e+08] | 13007534 | dataisbeautiful | 2012-02-14 18:45:58 |