JSD over community time

REDPILL_C_OVERTIME <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_redpill_community_overtime.csv")
PGH_C_OVERTIME <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_pittsburgh_community_overtime.csv")

pgh_overtime <- read_csv(PGH_C_OVERTIME) %>%
  mutate(subreddit = "pgh")
redpill_overtime <- read_csv(REDPILL_C_OVERTIME) %>%
  mutate(subreddit = "redpill")

over_community_time <- bind_rows(pgh_overtime, redpill_overtime)

ggplot(over_community_time, 
       aes(x = group, y = mean_jsd )) +
  facet_wrap(~subreddit, scale = "free") +
  geom_point(aes(size = n), alpha = .3) +
  xlab("Date (in weeks)") +
  ylab("Mean JSD") +
  ggtitle("Post similarity over community time") +
  geom_smooth(color = "blue" ) +
  geom_smooth(method = "lm",color = "red" ) +
  theme_classic(base_size = 14)

JSD over individual time

pink - mean distance of current post to all other posts in that time period in community;

green - distance to previous author post

PAIRWISE_TOPIC_JSD_REDPILL <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_redpill_nth_post.csv")
PAIRWISE_TOPIC_JSD_PGH <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_pittsburgh_nth_post.csv")


pgh_overtime_nth <- read_csv(PAIRWISE_TOPIC_JSD_PGH) %>%
  mutate(subreddit = "pgh")
redpill_overtime_nth <- read_csv(PAIRWISE_TOPIC_JSD_REDPILL) %>%
  mutate(subreddit = "redpill")

over_individual_time <- bind_rows(pgh_overtime_nth, redpill_overtime_nth) %>%
  select(author, nth_post, previous_author_JSD, current_community_JSD, subreddit) %>%
  gather("measure", "value", -nth_post, -subreddit, -author)

n >= 50

over_individual_time_ms <- over_individual_time %>%
  group_by(subreddit, nth_post, measure) %>%
  summarize(mean_JSD = mean(value, na.rm = T),
            n = n()) %>%
  filter(n >= 50)

over_individual_time_ms %>%
  ggplot(aes(x = nth_post, y = mean_JSD, group = measure, color = measure)) +
  geom_point() +
  geom_smooth() +
  ggtitle("Post similarity over indvidual time (with community reference)") +
  xlab("Nth post") +
  ylab("Mean JSD") +
  facet_wrap(~subreddit) +
  theme_classic()

n = all

over_individual_time_ms <- over_individual_time %>%
  group_by(subreddit, nth_post, measure) %>%
  summarize(mean_JSD = mean(value, na.rm = T),
            n = n())

over_individual_time_ms %>%
  ggplot(aes(x = nth_post, y = mean_JSD, group = measure, color = measure)) +
  geom_point() +
  geom_smooth() +
  ggtitle("Post similarity over indvidual time (with community reference)") +
  xlab("Nth post") +
  ylab("Mean JSD") +
  facet_wrap(~subreddit) +
  theme_classic()

Invididual trajectories

pgh individual authors (nposts >=25)

over_individual_time %>%
  filter(subreddit == "pgh") %>%
  group_by(subreddit, measure) %>%
  filter(author %in% (over_individual_time %>% count(author, measure) %>% filter(n >= 25) %>% pull(author))) %>%
  ggplot(aes(x = nth_post, y = value, group = measure, color = measure)) +
  geom_smooth() +
  facet_wrap(~author ) +
  theme_classic() +
  theme(legend.position = "bottom")

redpill individual authors (nposts >=25)

over_individual_time %>%
  filter(subreddit == "redpill") %>%
  group_by(subreddit, measure) %>%
  filter(author %in% (over_individual_time %>% count(author, measure) %>% filter(n >= 25) %>% pull(author))) %>%
  ggplot(aes(x = nth_post, y = value, group = measure, color = measure)) +
  geom_smooth() +
  facet_wrap(~author ) +
  theme_classic() +
  theme(legend.position = "bottom")

community + individual JSD

n >= 50

over_individual_time %>%
  spread(measure, value) %>%
  mutate(community_to_poster_JSD = current_community_JSD + previous_author_JSD) %>%
  filter(!is.na(community_to_poster_JSD)) %>%
  group_by(subreddit, nth_post) %>%
  summarize(community_to_poster_JSD= mean(community_to_poster_JSD),
            n = n()) %>%
  filter(n >= 50) %>%
  ggplot(aes(x = nth_post, y = community_to_poster_JSD)) +
  geom_smooth() +
  geom_point() +
  facet_wrap(~subreddit ) +
  theme_classic() +
  theme(legend.position = "bottom")

n = all

over_individual_time %>%
  spread(measure, value) %>%
  mutate(community_to_poster_JSD = current_community_JSD + previous_author_JSD) %>%
  filter(!is.na(community_to_poster_JSD)) %>%
  group_by(subreddit, nth_post) %>%
  summarize(community_to_poster_JSD= mean(community_to_poster_JSD),
            n = n()) %>%
  ggplot(aes(x = nth_post, y = community_to_poster_JSD)) +
  geom_smooth() +
  geom_point() +
  facet_wrap(~subreddit ) +
  theme_classic() +
  theme(legend.position = "bottom")