POPSIZE_PATH <-  here("exploratory_analyses/01_reddit_pilot/data/subreddits_basic.csv")
popsize <- read_csv(POPSIZE_PATH,  col_names = c("id", "reddit_id", "creation_time", "subreddit", "num_subscribers")) 

popsize_tidy <- popsize %>%
  mutate(num_subscribers = as.numeric(num_subscribers),
         creation_time = anytime(creation_time),
         subscriber_num_bin = cut(num_subscribers, 
                                  breaks = c(0,10,100,1000, 10000, 100000, 1000000, 10000000, 100000000))) %>%
  filter(!is.na(num_subscribers),
         num_subscribers > 0)  

There are 835786 communities with at least 1 subscriber; 102346 communities with at least subscribers.

Number of subreddits by size:

ggplot(popsize_tidy, aes(num_subscribers)) +
  geom_histogram() +
  scale_x_log10(breaks = c(1,10,100,1000, 10000, 100000, 1000000, 10000000 )) +
  xlab("Number of subscribers") +
  ylab("Number of subreddits")

 popsize_tidy %>%
  count(subscriber_num_bin) %>%
   kable()
subscriber_num_bin n
(0,10] 567704
(10,100] 166158
(100,1e+03] 70282
(1e+03,1e+04] 24004
(1e+04,1e+05] 6444
(1e+05,1e+06] 1112
(1e+06,1e+07] 35
(1e+07,1e+08] 47

Sample subreddits by size:

 popsize_tidy %>%
   group_by(subscriber_num_bin) %>%
   select(subscriber_num_bin, num_subscribers, subreddit, creation_time) %>%
   sample_n(5) %>%
   kable()
subscriber_num_bin num_subscribers subreddit creation_time
(0,10] 1 NewsGamingNews 2015-08-06 17:22:07
(0,10] 6 TopOnYoutube 2017-05-29 15:20:11
(0,10] 5 kingdomrushpc 2014-01-29 22:02:21
(0,10] 3 teencirclejerk 2012-08-11 19:37:48
(0,10] 10 orthodoxbahai 2017-06-10 13:35:28
(10,100] 17 lazyfuck 2015-12-10 19:52:14
(10,100] 20 Eagle 2011-02-07 00:10:02
(10,100] 33 BEANGRY 2011-06-29 23:40:23
(10,100] 19 OntarioPics 2015-10-28 19:46:18
(10,100] 82 ChristianPsychonauts 2014-06-14 22:05:46
(100,1e+03] 506 MouthCum 2014-09-25 16:56:57
(100,1e+03] 103 FunnyThingsKidsSay 2013-12-30 23:42:27
(100,1e+03] 133 NSFWAsianAmericans 2014-08-31 02:43:39
(100,1e+03] 260 climaxquotes 2014-07-25 17:06:17
(100,1e+03] 117 pkgobaltimore 2016-07-07 22:46:59
(1e+03,1e+04] 9855 NSFWebms 2014-09-21 09:20:54
(1e+03,1e+04] 2266 PornNetwork 2014-07-13 18:28:24
(1e+03,1e+04] 1111 ImaginaryGlaciers 2013-11-27 06:57:56
(1e+03,1e+04] 1510 PaladinsAcademy 2017-07-09 01:25:03
(1e+03,1e+04] 1436 B4CGI 2016-05-16 09:20:14
(1e+04,1e+05] 23310 gis 2008-09-20 20:25:31
(1e+04,1e+05] 22706 Splitview 2015-04-27 12:08:54
(1e+04,1e+05] 36718 brisbane 2009-03-13 01:23:33
(1e+04,1e+05] 18749 selfpix 2010-01-14 22:11:07
(1e+04,1e+05] 17699 laptops 2008-11-13 01:27:29
(1e+05,1e+06] 214929 nutrition 2008-11-16 21:35:46
(1e+05,1e+06] 171075 medicine 2008-03-13 17:18:04
(1e+05,1e+06] 103951 Memes_Of_The_Dank 2016-11-03 19:01:37
(1e+05,1e+06] 633329 BikiniBottomTwitter 2016-03-31 19:52:59
(1e+05,1e+06] 237136 trashyboners 2014-09-29 22:39:57
(1e+06,1e+07] 1029949 tattoos 2008-06-23 21:01:06
(1e+06,1e+07] 1094291 Whatcouldgowrong 2013-04-27 18:06:14
(1e+06,1e+07] 1753074 woahdude 2009-10-04 09:32:40
(1e+06,1e+07] 1133871 trees 2009-10-15 11:51:10
(1e+06,1e+07] 1188361 lifehacks 2008-06-20 21:20:34
(1e+07,1e+08] 13123701 InternetIsBeautiful 2012-07-24 23:02:51
(1e+07,1e+08] 13238571 tifu 2012-03-03 21:45:54
(1e+07,1e+08] 17523468 movies 2008-01-25 01:52:30
(1e+07,1e+08] 18755662 todayilearned 2008-12-28 01:46:59
(1e+07,1e+08] 13007534 dataisbeautiful 2012-02-14 18:45:58