Load libraries

library(ggplot2)
library(data.table)
library(dplyr)
library(langcog)
library(readr)
library(cowplot)
library(tidyr)
library(stringr)
library(magrittr)
library(directlabels)
library(langcog)
source('../helper/multiboot.R')
# twitter.data <- fread('results.csv') 

childes.wl.data <- read_csv('../results/CHILDES_clean_results.csv') %>%
  filter(!Corpus == "Corpus") %>%
  select(-ConvId) %>%
  mutate(Alignment = as.numeric(Alignment)) %>%
  mutate(MarkerSet = "WL")

childes.recursive.data <- read_csv('../results/CHILDES_recursive.csv') %>%
  filter(!Corpus == "Corpus") %>%
  select(-ConvId) %>% 
  mutate(Alignment = as.numeric(Alignment)) %>%
  mutate(MarkerSet = "Recursive")

childes.data <- bind_rows(childes.wl.data,childes.recursive.data)


# # User info for twitter data
# userinfo <- read_delim('pairedtweets1000.txt.userinfo',delim='\t') %>%
#   mutate(uid = as.character(uid)) %>%
#   select(uid,verified,numfollowers) %>%
#   mutate(tophalf.followers = numfollowers > median(numfollowers,na.rm=T))
childes.alignment <- childes.data %>%
  mutate(Child = substr(DocId,1,3),
         File = as.numeric(substr(DocId,4,5))) %>%
  rename(Speaker = SpeakerA, Replier = SpeakerB) %>%
  filter(Speaker%in% c("CHI", "MOT"), Replier %in% c("CHI", "MOT")) %>%
  group_by(MarkerSet,Child,File,Speaker,Replier,Marker) %>%
  summarise(Alignment = mean(Alignment)) 


childes.alignment <- multi_boot(childes.alignment, column="Alignment",
                                summary_groups = c("MarkerSet","Child", "File", 
                                                   "Speaker", "Replier"),
             statistics_functions = c("ci_lower","ci_upper")) %>%
  left_join(summarise(childes.alignment,Alignment = mean(Alignment)))

WL Markers

ggplot(aes(x = File,y = Alignment, color = Replier), 
       data = filter(childes.alignment, MarkerSet == "WL")) +
  facet_wrap(~ Child) +
  geom_smooth(method = "loess")+
  geom_hline(yintercept=0,lty=2) +
  geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
  scale_color_brewer(palette = "Set1") +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(0,95))+
  geom_dl(aes(label=Replier),method=list("last.points",cex=.8,dl.trans(x=x +.2)))

Recursive Markers

ggplot(aes(x = File,y = Alignment, color = Replier), 
       data = filter(childes.alignment,MarkerSet == "Recursive")) +
  facet_wrap(~ Child) +
  geom_smooth(method = "loess")+
  geom_hline(yintercept=0,lty=2) +
  geom_pointrange(aes(ymax = ci_upper, ymin = ci_lower)) +
  scale_color_brewer(palette = "Set1") +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(0,95))+
  geom_dl(aes(label=Replier),method=list("last.points",cex=.8,dl.trans(x=x +.2)))

# 
# twitter.alignment <- twitter.data %>%
#   group_by(speaker.id,replier.id,marker) %>%
#   summarise(alignment = mean(alignment)) %>%
#   mutate(speaker.id.char = as.character(speaker.id),
#          replier.id.char = as.character(replier.id)) %>% #bit.64 issues
#   ungroup() %>%
#   select(-speaker.id,replier.id) %>%
#   group_by(speaker.id.char,replier.id.char) %>%
#   summarise(alignment = mean(alignment))
# 
# # Make copies of userinfo columns for joining
# userinfo.speaker <- userinfo %>%
#   rename(speaker.id.char = uid,
#          verified.speaker = verified,
#          numfollowers.speaker = numfollowers,
#          tophalf.followers.speaker = tophalf.followers)
# userinfo.replier <- userinfo %>%
#   rename(replier.id.char = uid,
#          verified.replier = verified,
#          numfollowers.replier = numfollowers,
#          tophalf.followers.replier = tophalf.followers)

# # Add userinfo data to alignments 
# twitter.info.alignment <- left_join(twitter.alignment,
#                                     userinfo.speaker,copy=TRUE) %>%
#   left_join(userinfo.replier, copy=TRUE)

Compute alignments by userinfo

# twitter.verified.comp <- twitter.info.alignment %>%
#   filter(!is.na(verified.speaker),!is.na(verified.replier)) %>%
#   group_by(verified.speaker,verified.replier) 
# 
# twitter.verified.comp <- twitter.verified.comp %>%
#   summarise(mean = mean(alignment)) %>%
#   left_join(multi_boot(twitter.verified.comp,column="alignment",
#            statistics_functions = c("ci_lower","ci_upper")),copy=TRUE)
# 
# twitter.numfollowers.comp <- twitter.info.alignment %>%
#    filter(!is.na(tophalf.followers.speaker),
#           !is.na(tophalf.followers.replier)) %>%
#   group_by(tophalf.followers.speaker,tophalf.followers.replier)
# 
# twitter.numfollowers.comp <- twitter.numfollowers.comp %>%
#   summarise(mean = mean(alignment)) %>%
#   left_join(multi_boot(twitter.numfollowers.comp,column="alignment",
#            statistics_functions = c("ci_lower","ci_upper")),copy=TRUE)

Plot

# ggplot(aes(x = verified.replier, y = mean, 
#            color = verified.speaker,label=verified.speaker),
#        data=twitter.verified.comp) +
#   geom_pointrange(aes(ymin=ci_lower,ymax = ci_upper),position=position_dodge(.5)) +
#   scale_y_continuous(name = "Cooridnation Score") +
#   scale_x_discrete(name = "Replier Verified") +
#   theme(legend.position=c(.8,.8))
# 
# ggplot(aes(x = tophalf.followers.replier, y = mean, 
#            color = tophalf.followers.speaker,label=tophalf.followers.speaker),
#        data=twitter.numfollowers.comp) +
#   geom_pointrange(aes(ymin=ci_lower,ymax = ci_upper),position=position_dodge(.5)) +
#   scale_y_continuous(name = "Cooridnation Score") +
#   scale_x_discrete(name = "Replier >Median Followers") +
#   theme(legend.position=c(.8,.2))