Load libraries
library(ggplot2)
library(data.table)
library(dplyr)
library(langcog)
library(readr)
library(cowplot)
library(tidyr)
library(stringr)
library(magrittr)
library(directlabels)
library(langcog)
source('../helper/multiboot.R')
# twitter.data <- fread('results.csv')
childes.wl.data <- read_csv('../results/CHILDES_clean_results.csv') %>%
filter(!Corpus == "Corpus") %>%
select(-ConvId) %>%
mutate(Alignment = as.numeric(Alignment)) %>%
mutate(MarkerSet = "WL")
childes.recursive.data <- read_csv('../results/CHILDES_recursive.csv') %>%
filter(!Corpus == "Corpus") %>%
select(-ConvId) %>%
mutate(Alignment = as.numeric(Alignment)) %>%
mutate(MarkerSet = "Recursive")
childes.data <- bind_rows(childes.wl.data,childes.recursive.data)
# # User info for twitter data
# userinfo <- read_delim('pairedtweets1000.txt.userinfo',delim='\t') %>%
# mutate(uid = as.character(uid)) %>%
# select(uid,verified,numfollowers) %>%
# mutate(tophalf.followers = numfollowers > median(numfollowers,na.rm=T))
childes.alignment <- childes.data %>%
mutate(Child = substr(DocId,1,3),
File = as.numeric(substr(DocId,4,5))) %>%
rename(Speaker = SpeakerA, Replier = SpeakerB) %>%
filter(Speaker%in% c("CHI", "MOT"), Replier %in% c("CHI", "MOT")) %>%
group_by(MarkerSet,Child,File,Speaker,Replier,Marker) %>%
summarise(Alignment = mean(Alignment))
childes.alignment <- multi_boot(childes.alignment, column="Alignment",
summary_groups = c("MarkerSet","Child", "File",
"Speaker", "Replier"),
statistics_functions = c("ci_lower","ci_upper")) %>%
left_join(summarise(childes.alignment,Alignment = mean(Alignment)))
WL Markers
ggplot(aes(x = File,y = Alignment, color = Replier),
data = filter(childes.alignment, MarkerSet == "WL")) +
facet_wrap(~ Child) +
geom_smooth(method = "loess")+
geom_hline(yintercept=0,lty=2) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "none") +
scale_x_continuous(limits = c(0,95))+
geom_dl(aes(label=Replier),method=list("last.points",cex=.8,dl.trans(x=x +.2)))
Recursive Markers
ggplot(aes(x = File,y = Alignment, color = Replier),
data = filter(childes.alignment,MarkerSet == "Recursive")) +
facet_wrap(~ Child) +
geom_smooth(method = "loess")+
geom_hline(yintercept=0,lty=2) +
geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "none") +
scale_x_continuous(limits = c(0,95))+
geom_dl(aes(label=Replier),method=list("last.points",cex=.8,dl.trans(x=x +.2)))
#
# twitter.alignment <- twitter.data %>%
# group_by(speaker.id,replier.id,marker) %>%
# summarise(alignment = mean(alignment)) %>%
# mutate(speaker.id.char = as.character(speaker.id),
# replier.id.char = as.character(replier.id)) %>% #bit.64 issues
# ungroup() %>%
# select(-speaker.id,replier.id) %>%
# group_by(speaker.id.char,replier.id.char) %>%
# summarise(alignment = mean(alignment))
#
# # Make copies of userinfo columns for joining
# userinfo.speaker <- userinfo %>%
# rename(speaker.id.char = uid,
# verified.speaker = verified,
# numfollowers.speaker = numfollowers,
# tophalf.followers.speaker = tophalf.followers)
# userinfo.replier <- userinfo %>%
# rename(replier.id.char = uid,
# verified.replier = verified,
# numfollowers.replier = numfollowers,
# tophalf.followers.replier = tophalf.followers)
# # Add userinfo data to alignments
# twitter.info.alignment <- left_join(twitter.alignment,
# userinfo.speaker,copy=TRUE) %>%
# left_join(userinfo.replier, copy=TRUE)
Compute alignments by userinfo
# twitter.verified.comp <- twitter.info.alignment %>%
# filter(!is.na(verified.speaker),!is.na(verified.replier)) %>%
# group_by(verified.speaker,verified.replier)
#
# twitter.verified.comp <- twitter.verified.comp %>%
# summarise(mean = mean(alignment)) %>%
# left_join(multi_boot(twitter.verified.comp,column="alignment",
# statistics_functions = c("ci_lower","ci_upper")),copy=TRUE)
#
# twitter.numfollowers.comp <- twitter.info.alignment %>%
# filter(!is.na(tophalf.followers.speaker),
# !is.na(tophalf.followers.replier)) %>%
# group_by(tophalf.followers.speaker,tophalf.followers.replier)
#
# twitter.numfollowers.comp <- twitter.numfollowers.comp %>%
# summarise(mean = mean(alignment)) %>%
# left_join(multi_boot(twitter.numfollowers.comp,column="alignment",
# statistics_functions = c("ci_lower","ci_upper")),copy=TRUE)
Plot
# ggplot(aes(x = verified.replier, y = mean,
# color = verified.speaker,label=verified.speaker),
# data=twitter.verified.comp) +
# geom_pointrange(aes(ymin=ci_lower,ymax = ci_upper),position=position_dodge(.5)) +
# scale_y_continuous(name = "Cooridnation Score") +
# scale_x_discrete(name = "Replier Verified") +
# theme(legend.position=c(.8,.8))
#
# ggplot(aes(x = tophalf.followers.replier, y = mean,
# color = tophalf.followers.speaker,label=tophalf.followers.speaker),
# data=twitter.numfollowers.comp) +
# geom_pointrange(aes(ymin=ci_lower,ymax = ci_upper),position=position_dodge(.5)) +
# scale_y_continuous(name = "Cooridnation Score") +
# scale_x_discrete(name = "Replier >Median Followers") +
# theme(legend.position=c(.8,.2))