REDPILL_C_OVERTIME <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_redpill_community_overtime.csv")
PGH_C_OVERTIME <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_pittsburgh_community_overtime.csv")
pgh_overtime <- read_csv(PGH_C_OVERTIME) %>%
mutate(subreddit = "pgh")
redpill_overtime <- read_csv(REDPILL_C_OVERTIME) %>%
mutate(subreddit = "redpill")
over_community_time <- bind_rows(pgh_overtime, redpill_overtime)
ggplot(over_community_time,
aes(x = group, y = mean_jsd )) +
facet_wrap(~subreddit, scale = "free") +
geom_point(aes(size = n), alpha = .3) +
xlab("Date (in weeks)") +
ylab("Mean JSD") +
ggtitle("Post similarity over community time") +
geom_smooth(color = "blue" ) +
geom_smooth(method = "lm",color = "red" ) +
theme_classic(base_size = 14)
pink - mean distance of current post to all other posts in that time period in community;
green - distance to previous author post
PAIRWISE_TOPIC_JSD_REDPILL <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_redpill_nth_post.csv")
PAIRWISE_TOPIC_JSD_PGH <- here("exploratory_analyses/01_reddit_pilot/data/post_JSD_pittsburgh_nth_post.csv")
pgh_overtime_nth <- read_csv(PAIRWISE_TOPIC_JSD_PGH) %>%
mutate(subreddit = "pgh")
redpill_overtime_nth <- read_csv(PAIRWISE_TOPIC_JSD_REDPILL) %>%
mutate(subreddit = "redpill")
over_individual_time <- bind_rows(pgh_overtime_nth, redpill_overtime_nth) %>%
select(author, nth_post, previous_author_JSD, current_community_JSD, subreddit) %>%
gather("measure", "value", -nth_post, -subreddit, -author)
over_individual_time_ms <- over_individual_time %>%
group_by(subreddit, nth_post, measure) %>%
summarize(mean_JSD = mean(value, na.rm = T),
n = n()) %>%
filter(n >= 50)
over_individual_time_ms %>%
ggplot(aes(x = nth_post, y = mean_JSD, group = measure, color = measure)) +
geom_point() +
geom_smooth() +
ggtitle("Post similarity over indvidual time (with community reference)") +
xlab("Nth post") +
ylab("Mean JSD") +
facet_wrap(~subreddit) +
theme_classic()
over_individual_time_ms <- over_individual_time %>%
group_by(subreddit, nth_post, measure) %>%
summarize(mean_JSD = mean(value, na.rm = T),
n = n())
over_individual_time_ms %>%
ggplot(aes(x = nth_post, y = mean_JSD, group = measure, color = measure)) +
geom_point() +
geom_smooth() +
ggtitle("Post similarity over indvidual time (with community reference)") +
xlab("Nth post") +
ylab("Mean JSD") +
facet_wrap(~subreddit) +
theme_classic()
over_individual_time %>%
spread(measure, value) %>%
mutate(community_to_poster_JSD = current_community_JSD + previous_author_JSD) %>%
filter(!is.na(community_to_poster_JSD)) %>%
group_by(subreddit, nth_post) %>%
summarize(community_to_poster_JSD= mean(community_to_poster_JSD),
n = n()) %>%
filter(n >= 50) %>%
ggplot(aes(x = nth_post, y = community_to_poster_JSD)) +
geom_smooth() +
geom_point() +
facet_wrap(~subreddit ) +
theme_classic() +
theme(legend.position = "bottom")
over_individual_time %>%
spread(measure, value) %>%
mutate(community_to_poster_JSD = current_community_JSD + previous_author_JSD) %>%
filter(!is.na(community_to_poster_JSD)) %>%
group_by(subreddit, nth_post) %>%
summarize(community_to_poster_JSD= mean(community_to_poster_JSD),
n = n()) %>%
ggplot(aes(x = nth_post, y = community_to_poster_JSD)) +
geom_smooth() +
geom_point() +
facet_wrap(~subreddit ) +
theme_classic() +
theme(legend.position = "bottom")