linguistic_analysis_C-D.Rmd : Linguistic quantity and complexity

STEP THREE - Linguistic quantity and complexity

d<- read_csv("../data/processed_data/joined_data/filtered_raw_chat.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   roundID = col_character(),
##   index = col_double(),
##   gameId = col_character(),
##   createdAt = col_datetime(format = ""),
##   repNum = col_double(),
##   blockNum = col_double(),
##   trialNum = col_double(),
##   numPlayers = col_double(),
##   text = col_character(),
##   playerId = col_character(),
##   type = col_character(),
##   time = col_double(),
##   submitted = col_logical(),
##   conditionName = col_character(),
##   condition = col_character()
## )

d_flowers <- d %>% 
  filter(type=="message") %>%
  filter(!is.na(text))

#data_target = "pilot0"
#data_location=paste0("data/",data_target)

#d_flowers<- read_csv(here(data_location, "raw_chat2.csv"))%>%
 #filter(!is.na(text))

Utt Length (MLU)

d_flowers_utt_length_block <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.

#jpeg("~/Desktop/nwords_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=total_num_words, color=condition))+
  #facet_grid(rows =vars(condition)) +
  geom_point()+
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of words", y="Total number of words", x="Block number")+
  theme(legend.position="bottom") + 
  theme_jmank() + scale_color_bay() +
  ggtitle("Number of words per block")#+

 # theme(text = element_text(size = 30))
#color="gameId" cols = vars(blockNum),
#dev.off() 

#jpeg("~/Desktop/plots/mlu_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=mlu,  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="Mean Length of Utterance", y="MLU", x="Block number")+
  theme(legend.position="bottom") + 
  theme_jmank() + scale_color_bay() + theme(legend.position = "bottom")

#  theme(text = element_text(size = 30))
#dev.off() 
#ggsave("../figs/mean_utt_length_block.png", height = 3, width = 3)


d_flowers_utt_length_trial <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nwords_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=total_num_words,  color=condition))+
#  facet_grid(rows =vars(condition)) +
  geom_point()+
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of words", y="Total number of words", x="Trial number")+
  theme(legend.position="bottom") + 
  theme_jmank() + scale_color_bay() +
  ggtitle("Number of words per trial")#+

#  theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/mlu_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=mlu,  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="Mean Length of Utterance", y="MLU", x="Trial number")+
  theme_jmank() + scale_color_bay() + theme(legend.position="bottom") #+

  #theme(text = element_text(size = 30))
#dev.off() 

ggsave("../figs/mean_utt_length.png", height = 5, width = 4)

d_flowers_utt_length_rep <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, repNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nword_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_rep, aes(x=repNum, y=log(mlu+1),  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="MLU", y="MLU", x="Rep number")+
  theme(legend.position="bottom") + 
  theme_jmank() + scale_color_bay() +
  ggtitle("MLU per rep")#+

 # theme(text = element_text(size = 30))
#dev.off()

Number of turns

d_flowers_n_turns_trial <- d_flowers %>%
  group_by(gameId, condition, trialNum)  %>%
  summarise(n=n())

## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nturn_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_trial, aes(x=trialNum, y=n,  color=condition))+
  #facet_grid(rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of turns", y="Number of turns", x="Trial number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of turns per trial")#+

  #theme(text = element_text(size = 30))
#dev.off() 


d_flowers_n_turns_block <- d_flowers %>%
  group_by(gameId, condition, blockNum)  %>%
  summarise(n=n())

## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nturn_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_block, aes(x=blockNum, y=n,  color=condition))+
 # facet_grid(rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of turns", y="Number of turns", x="Block number")+
  theme(legend.position="bottom") + 
  theme_jmank() + scale_color_bay() + theme(legend.position = "bottom")

  ggtitle("Number of turns per block")#+

## $title
## [1] "Number of turns per block"
## 
## attr(,"class")
## [1] "labels"

 # theme(text = element_text(size = 30))
#dev.off() 

ggsave("../figs/num_turns_block.png", height = 3, width = 3)

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by( trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nutts_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker, 
       aes(x=trialNum, y=n_utt_speaker,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(~condition) +
  xlab("Trial number") + 
  ylab("Number of utterances") + 
  theme_bw()#+

 # theme(text = element_text(size = 30))
#dev.off() 

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by( blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.

#jpeg("~/Desktop/plots/nutts_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker, 
       aes(x=blockNum, y=n_utt_speaker,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(~condition) +
  xlab("Block number") + 
  ylab("Number of utterances") + 
  theme_bw()#+

#  theme(text = element_text(size = 30))
#dev.off()

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(trialNum, blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'trialNum', 'blockNum', 'gameId'. You can override using the `.groups` argument.

turnb_function <- function(d_flowers_n_utt_speaker, c, q) {
  p<-ggplot2::ggplot(d_flowers_n_utt_speaker, aes(y=n_utt_speaker, x=trialNum, label=gameId, color=condition)) + xlim(c,q) +
  geom_point(alpha = .5)+
  geom_smooth(method = "lm", formula = y~x)+
  xlab("Trial number") + 
  theme_bw()+
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
return(p)
}

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
a<- turnb_function(d_flowers_n_utt_speaker %>% filter(blockNum==0), 0,5) + ylab("Number of utterances")+  theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank())+ labs(subtitle = "Block1")

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
b<- turnb_function(d_flowers_n_utt_speaker %>% filter(blockNum==1), 6,11) +  theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.y = element_blank())+ labs(subtitle = "Block2")

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
c<- turnb_function(d_flowers_n_utt_speaker %>% filter(blockNum==2), 12,17) +  theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.y = element_blank())+ labs(subtitle = "Block3")

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
d<- turnb_function(d_flowers_n_utt_speaker %>% filter(blockNum==3), 18,23) +theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.y = element_blank())+ labs(subtitle = "Block4")

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<- plot_grid(a,b,c,d, legend,  nrow = 1, ncol=5, align = "v")

x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Number of utterances", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

Parts of speech

d_flowers_uttlength_nwords <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, blockNum, trialNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum'. You can override using the `.groups` argument.

 text <- d_flowers$text
 parsed <- spacy_parse(text,pos=TRUE) %>%
   select(doc_id, pos) %>%
   group_by(doc_id) %>%
   mutate(pos = paste(pos, collapse=",")) %>%
   distinct()

## Found 'spacy_condaenv'. spacyr will use this environment

## successfully initialized (spaCy Version: 3.0.6, language model: en_core_web_sm)

## (python options: type = "condaenv", value = "spacy_condaenv")

#   
# d_flowers_pos  <- cbind(d_flowers, parsed) %>%
#   separate_rows(pos, convert = TRUE) %>%
#   group_by(condition, blockNum, gameId, pos) %>%
 #  filter(pos=="NOUN"| pos=="VERB") %>% #| pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
  # summarise(pos_count=n()) %>%
  # left_join(d_flowers_uttlength_block) %>%
  # mutate(prop_pos=pos_count/total_num_words)
         
# #jpeg("~/Desktop/plots/pos_block.jpg", width = 950, height = 950)
 #ggplot(d_flowers_pos, 
#        aes(x=blockNum, y=prop_pos, label=gameId,  color=condition)) + 
#   geom_smooth(method = "lm", formula = y~x) + 
#   geom_point(alpha=.1) +
#   facet_grid( cols=vars(pos)) +
#   xlab("Block number") + 
#   ylab("Proportion of pos") + 
#   theme_bw()#+
#  # theme(text = element_text(size = 30))
# #dev.off() 
## 
# d_flowers_pos  <- cbind(d_flowers, parsed) %>%
#   separate_rows(pos, convert = TRUE) %>%
#   group_by(condition, trialNum, gameId, pos) %>%
#   filter(pos=="NOUN"| pos=="VERB" ) %>% #| pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
#   summarise(pos_count=n()) %>%
#   left_join(d_flowers_utt_length_trial) %>%
#   mutate(prop_pos=pos_count/total_num_words)
#         
# #jpeg("~/Desktop/plots/pos_trial.jpg", width = 950, height = 950)
# ggplot(d_flowers_pos, 
#        aes(x=trialNum, y=prop_pos, label=gameId,  color=condition)) + 
#   geom_smooth(method = "lm", formula = y~x) + 
#   geom_point(alpha=.1) +
#   facet_grid( cols=vars(pos)) +
#   xlab("Trial number") + 
 #  ylab("Proportion of pos") + 
#   ylim(0,5)+
#   theme_bw()#+
   #theme(text = element_text(size = 30))
# #dev.off() 
# 


text <- d_flowers$text
parsed <- spacy_parse(text,pos=TRUE) %>%
  select(doc_id, pos) %>%
  group_by(doc_id) %>%
  mutate(pos = paste(pos, collapse=",")) %>%
  distinct()
  
d_flowers_pos  <- cbind(d_flowers, parsed) %>%
  separate_rows(pos, convert = TRUE) %>%
  group_by(condition, blockNum, trialNum, gameId, pos) %>%
  filter(pos=="NOUN"| pos=="VERB" ) %>% # pos=="ADJ"| pos=="DET" | pos=="PRON"
  summarise(pos_count=n()) %>%
  left_join(d_flowers_uttlength_nwords) %>%
  mutate(prop_pos=pos_count/total_num_words)

## `summarise()` has grouped output by 'condition', 'blockNum', 'trialNum', 'gameId'. You can override using the `.groups` argument.

## Joining, by = c("condition", "blockNum", "trialNum", "gameId")

#jpeg("~/Desktop/plots/pos_block.jpg", width = 950, height = 950)
pos_function <-function(d_flowers_pos, c,q){

ggplot(d_flowers_pos, 
       aes(x=trialNum, y=prop_pos, label=gameId, color=condition, linetype=pos)) + 
  geom_smooth(method = "lm", formula = y~x) + 
 # geom_point(alpha=.5) +
 # facet_grid(cols=vars(condition)) +
 # ylim(0,1)+
  xlim(c,q)+
  theme_bw() 
}

a<- pos_function(d_flowers_pos %>% filter(blockNum==0), 0,5) +  theme(legend.position="none", axis.title.y = element_blank(),axis.title.x = element_blank())+
   labs(subtitle = "Block1") + ylim(0.0, 0.4)
b<- pos_function(d_flowers_pos %>% filter(blockNum==1), 6,11) +  theme(legend.position="none", axis.title.y = element_blank(),axis.text.y = element_blank(), axis.title.x = element_blank())+
   labs(subtitle = "Block2")+ ylim(0.0, 0.4)
c<- pos_function(d_flowers_pos %>% filter(blockNum==2), 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.title.x = element_blank(),axis.text.y = element_blank())+
   labs(subtitle = "Block3")+ ylim(0.0, 0.4)
d<- pos_function(d_flowers_pos %>% filter(blockNum==3), 18,23) +  theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.y = element_blank())+
   labs(subtitle = "Block4")+ ylim(0.0, 0.4)

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<-plot_grid(a,b,c,d, legend, nrow = 1, ncol=5,  align = "v")
x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Proportion Verbs, Nouns", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

Lexical Diversity (TTR)

ttr <- function(text_){
  text_ <- tolower(text_)
  total_words = sapply(strsplit(text_, " "), length)
  no_duplicates = vapply(lapply(strsplit(text_, " "), unique), paste, character(1L), collapse = " ")
  total_unique = str_count(no_duplicates, "\\W+") + 1
  ttr_ = total_unique/total_words
  return(ttr_)
}    

d_flowers_text_trial <- d_flowers %>%
    group_by(gameId, trialNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_trial <- d_flowers_text_trial %>%
  mutate(ttr_ = ttr(text_block))

#t<-str_split(text_, " ", 1:6)
#y<- paste(unlist(lapply(t,head,n=5)), collapse=" ")

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_trial, 
       aes(x=trialNum, y=ttr_,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(rows =vars(condition)) +
  xlab("Trial number") + 
  ylab("Type_token ratio") + 
  ylim(0,1)+
  theme_bw()+
  ggtitle("TTR per trial")#+

 # theme(text = element_text(size = 30))
#dev.off() 


d_flowers_text_block <- d_flowers %>%
    group_by(gameId, blockNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_block <- d_flowers_text_block %>%
  mutate(ttr_ = ttr(text_block))

#jpeg("~/Desktop/plots/ttr_block.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_block, 
       aes(x=blockNum, y=ttr_,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(rows =vars(condition)) +
  xlab("Block number") + 
  ylab("Type_token ratio") + 
  ylim(0,1)+
  theme_bw()+
  ggtitle("TTR per block")#+

#  theme(text = element_text(size = 30))
#dev.off()

d_flowers_text_trial <- d_flowers %>%
    group_by(gameId, trialNum, blockNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_trial <- d_flowers_text_trial %>%
  mutate(ttr_ = ttr(text_block))

ttr_function <- function(d_flowers_ttr_trial, c, q) {
  p<-ggplot2::ggplot(d_flowers_ttr_trial, aes(y=ttr_, x=trialNum, label=gameId, color=condition)) + xlim(c,q) + ylim(0.5, 1.0)+
  geom_point(alpha = .5)+
  geom_smooth(method = "lm", formula = y~x)+
  theme_bw()+
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
return(p)
}

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
a<- ttr_function(d_flowers_ttr_trial %>% filter(blockNum==0), 0,5) + ylab("Type token ration")+  theme(legend.position="none",  axis.title.x = element_blank(), axis.title.y = element_blank())+ labs(subtitle = "Block1")

b<- ttr_function(d_flowers_ttr_trial %>% filter(blockNum==1), 6,11) + theme(legend.position="none", axis.title.x = element_blank(), axis.title.y = element_blank(),axis.text.y = element_blank())+ labs(subtitle = "Block2")+ylim(0.5, 1.0)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

c<- ttr_function(d_flowers_ttr_trial %>% filter(blockNum==2), 12,17) + theme(legend.position="none", axis.title.x = element_blank(), axis.title.y = element_blank(),axis.text.y = element_blank())+ labs(subtitle = "Block3")+ylim(0.5, 1.0)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

d<- ttr_function(d_flowers_ttr_trial %>% filter(blockNum==3), 18,23) + labs(subtitle = "Block4")+ theme(legend.position="none", axis.title.x = element_blank(), axis.title.y = element_blank(),axis.text.y = element_blank())+ylim(0.5, 1.0)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)


p<-plot_grid(a,b,c,d, legend, nrow = 1, ncol=5, align = "v")
#dev.off() 
x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Type token ratio", rot=90)
p<- grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

ggsave(file="/Users/lscpuser/Documents/last_AAFLOWERS/AA-flowers/figs/ttr.png", p)

## Saving 7 x 5 in image

Distribution of speech across speakers

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_game <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  summarise(n_utt_game=n())

## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)

## Joining, by = c("trialNum", "gameId", "condition")

#jpeg("~/Desktop/plots/proputts_trial_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist, 
       aes(x=trialNum, y=overall_turns, label=playerId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
#  facet_wrap(~condition)+
  xlab("Trial number") + 
  ylab("Proportion of turns") + 
  theme_bw()+
  ggtitle("Proportion of turns per trial and speaker")#+

  #theme(text = element_text(size = 30))
#dev.off() 

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'playerId', 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_game <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  summarise(n_utt_game=n())

## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)

## Joining, by = c("blockNum", "gameId", "condition")

#jpeg("~/Desktop/plots/proputts_block_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist, 
       aes(x=blockNum, y=overall_turns, label=playerId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_wrap(~condition)+
  xlab("Block number") + 
  ylab("Proportion of turns") + 
  theme_bw()+
  ggtitle("Proportion of turns per block and speaker")#+

 # theme(text = element_text(size = 30))
#dev.off()

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, trialNum, blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'playerId', 'trialNum', 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_game <- d_flowers %>%
  group_by(trialNum, blockNum, gameId, condition) %>%
  summarise(n_utt_game=n())

## `summarise()` has grouped output by 'trialNum', 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)

## Joining, by = c("trialNum", "blockNum", "gameId", "condition")

turn_per_plot_function <- function(d_flowers_n_utt_dist, overall_turns, c,q) {
  p<-ggplot2::ggplot(d_flowers_n_utt_dist, aes(y={{overall_turns}}, x=trialNum, label=playerId)) +
  geom_point(alpha = .5)+
  xlim(c,q) +
  facet_wrap(~condition)+
  geom_smooth(method = "lm", formula = y~x)+
  theme_bw()+
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),axis.title.y = element_blank(),axis.title.x = element_blank())
return(p)
}

#jpeg("~/Desktop/plots/proputts_trial_speaker.jpg", width = 950, height = 950)
a<-turn_per_plot_function(d_flowers_n_utt_dist %>% filter(blockNum==0), overall_turns, 0,5) + ylab("Proportion of turns")+ labs(subtitle = "Block1") + theme_bw() + theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank())

b<-turn_per_plot_function(d_flowers_n_utt_dist %>% filter(blockNum==1), overall_turns, 6,11) +  theme(legend.position="none", axis.text.y=element_blank(),axis.title.x = element_blank(),axis.title.y = element_blank())+ labs(subtitle = "Block2")+ theme_bw() 

c<-turn_per_plot_function(d_flowers_n_utt_dist%>% filter(blockNum==2), overall_turns, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), axis.title.x = element_blank())+ labs(subtitle = "Block3")+ theme_bw() 

d<-turn_per_plot_function(d_flowers_n_utt_dist%>% filter(blockNum==3), overall_turns, 18,23) +  theme(legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank(), axis.text.y=element_blank(), plot.tag.position = "topright")+ labs(subtitle = "Block4")+ theme_bw() 

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<-plot_grid(a,b,c,d,  nrow = 2, ncol=3,  legend, align = "v")

x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Proportion of turns", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

Questions

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(trialNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )

## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.

## Joining, by = c("trialNum", "gameId")

#jpeg("~/Desktop/plots/propquestions_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_q, 
       aes(x=trialNum, y=prop_questions, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
 # facet_wrap(~ condition) +
  geom_point(alpha=.5) +
  xlab("Trial number") + 
  ylab("Proportion of questions") + 
  theme_bw()+
  ggtitle("Proportion of questions per trial")#+

 # theme(text = element_text(size = 30))
#dev.off() 
  
##

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(blockNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )

## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.

## Joining, by = c("blockNum", "gameId")

#jpeg("~/Desktop/plots/propquestions_block.jpg", width = 950, height = 950)
ggplot(d_flowers_q, 
       aes(x=blockNum, y=prop_questions, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  #facet_wrap(~ condition) +
  geom_point(alpha=.5) +
  xlab("Block number") + 
  ylab("Proportion of questions") + 
  theme_bw()+
  ggtitle("Proportion of questions per block")#+

 # theme(text = element_text(size = 30))
#dev.off()

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(trialNum, blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())

## `summarise()` has grouped output by 'trialNum', 'blockNum', 'gameId'. You can override using the `.groups` argument.

d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(trialNum, blockNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )

## `summarise()` has grouped output by 'trialNum', 'blockNum', 'gameId'. You can override using the `.groups` argument.

## Joining, by = c("trialNum", "blockNum", "gameId")

word_per_plot_function <- function(d_flowers, prop_words, c, q) {
  p<-ggplot2::ggplot(d_flowers, aes(y={{prop_words}}, x=trialNum, label=gameId, color=condition)) + ylim(0.0,0.20)+ xlim(c,q) +
  geom_point(alpha = .5)+
  geom_smooth(method = "lm", formula = y~x)+
  theme_bw()+
  xlab("Trial number") + 
  theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
return(p)
}

#jpeg("~/Desktop/plots/propquestions_trial.jpg", width = 950, height = 950)
a<- word_per_plot_function(d_flowers_q %>% filter(blockNum==0), prop_questions, 0,5) + ylab("Proportion of questions")+  theme(legend.position="none", axis.title.x = element_blank(), axis.title.y = element_blank())+ labs(subtitle = "Block1")

b<-word_per_plot_function(d_flowers_q %>% filter(blockNum==1), prop_questions, 6,11) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), axis.title.x = element_blank())+ labs(subtitle = "Block2")

c<-word_per_plot_function(d_flowers_q%>% filter(blockNum==2), prop_questions, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), axis.title.x = element_blank())+ labs(subtitle = "Block3")

d<-word_per_plot_function(d_flowers_q%>% filter(blockNum==3), prop_questions, 18,23) +  theme(legend.position="none",axis.title.y = element_blank(), axis.text.y=element_blank(),axis.title.x = element_blank(), plot.tag.position = "topright")+ labs(subtitle = "Block4")

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<-plot_grid(a,b,c,d,  nrow = 1, ncol=5, legend, align = "v")
x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Proportion of questions", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

Feedback words (backchannel, hedge)

backchanneling<- c("mm", "okay", "uh-huh", "ok", "mm-hm", "uh", "um", "agree", "uhuh", "mmm", "wow", "great", "mm", "hm",  "ummm", "hmmmm", "huh", "un", "um", "ohh", "ooo", "see", "oooo", "ununun", "oh", "ah", "true", "agree", "right", "yeah", "good", "really") #backchanneling: (https://www.reading.ac.uk/AcaDepts/ll/app_ling/internal/Cutrone_vol_2.pdf) 


hedging <- c("may", "perhaps", "might", "possible", "likely", "possibly", "maybe", "probable", "appear", "seem", "suggest", "sometimes", "seemingly", "apparently", "often", "could", "usually", "likely", "tend", "sometimes", "probably", "primarily", "tendency", "largely") #based on https://www.researchgate.net/publication/280125979_Linguistic_Markers_and_Stylistic_Attributes_of_Hedging_in_English_Academic_Papers_Written_by_Native_and_Non-Native_Speakers_of_English/figures

d_flowers_nwords <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.

d_flowers_feedback<- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)

## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "trialNum", "condition")

#jpeg("~/Desktop/plots/propbackchannel_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of backchannel words") + 
  theme_bw()+
  ggtitle("Proportion of backchannel words per trial")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  #theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/prophedge_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of hedge words") + 
  theme_bw()+
  ggtitle("Proportion of hedge words per trial")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 # theme(text = element_text(size = 30))
#dev.off() 


#ggplot(d_flowers_feedback, aes(y=trialNum , x=backchannel_length, label=gameId)) +
#  geom_point(alpha = .5)+
#  geom_smooth() +
#  geom_text_repel(aes(label=ifelse(backchannel=="character(0)",  "", backchannel), #max.overlaps = 20))+
#  xlim(c(1,5))+
#  theme_bw()+
#  facet_wrap(~ condition) 
  

#ggplot(d_flowers_feedback, aes(y=trialNum , x=hedge_length, label=gameId)) +
#  geom_point(alpha = .5)+
#  geom_smooth() +
#  geom_text_repel(aes(label=ifelse(hedge=="character(0)", "", hedge), max.overlaps = 20))+
#  xlim(c(1,5))+
#  theme_bw()+
#  facet_wrap(~ condition) 
  

d_flowers_nwords <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.

d_flowers_feedback<- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)

## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "blockNum", "condition")

#jpeg("~/Desktop/plots/propbackchannel_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=blockNum, label=gameId,  color=condition)) +
#  facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Block number") + 
  ylab("Proportion of backchannel words") + 
  theme_bw()+
  ggtitle("Proportion of backchannel words per block")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 # theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/prophedge_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=blockNum, label=gameId,  color=condition)) +
#  facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Block number") + 
  ylab("Proportion of hedge words") + 
  theme_bw()+
  ggtitle("Proportion of hedge words per block")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#  theme(text = element_text(size = 30))
#dev.off() 



d_flowers_nwords <- d_flowers %>%
  group_by(repNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, repNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))

## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.

d_flowers_feedback<- d_flowers %>%
  group_by(trialNum, blockNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, trialNum, blockNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_uttlength_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)

## `summarise()` has grouped output by 'gameId', 'trialNum', 'blockNum'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "trialNum", "blockNum", "condition")

a<- word_per_plot_function(d_flowers_feedback %>% filter(blockNum==0), prop_backchannel_words, 0,5) +  theme(legend.position="none",  axis.title.y = element_blank(), axis.title.x = element_blank())+
   labs(subtitle = "Block1")

b<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==1), prop_backchannel_words, 6,11) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), axis.title.x = element_blank())+ labs(subtitle = "Block2")

c<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==2), prop_backchannel_words, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), axis.title.x = element_blank())+ labs(subtitle = "Block3")

d<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==3), prop_backchannel_words, 18,23) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), plot.tag.position = "topright", axis.title.x = element_blank())+ labs(subtitle = "Block4")

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<-plot_grid(a,b,c,d,  nrow = 1, ncol=5, legend, align = "v")
x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Backchannel words", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

a<- word_per_plot_function(d_flowers_feedback %>% filter(blockNum==0), prop_hedge_words, 0,5) +  theme(legend.position="none", axis.title.y = element_blank())+
   labs(subtitle = "Block1")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

b<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==1), prop_hedge_words, 6,11) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank())+ labs(subtitle = "Block2")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

c<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==2), prop_hedge_words, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank())+ labs(subtitle = "Block3")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

d<-word_per_plot_function(d_flowers_feedback%>% filter(blockNum==3), prop_hedge_words, 18,23) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(), plot.tag.position = "topright")+ labs(subtitle = "Block4")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)

p<-plot_grid(a,b,c,d, legend,  nrow = 1, ncol=5,align = "v")

x.grob <- textGrob("Trial Number")
y.grob <- textGrob("Hedge words", rot=90)
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

LIWC (positive, negative) categories

liwc<- read_csv("/Users/lscpuser/Downloads/liwc1.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Sad = col_character(),
##   Anger = col_character(),
##   Anx = col_character(),
##   Negemo = col_character(),
##   Posemo = col_character(),
##   X6 = col_logical(),
##   X7 = col_logical(),
##   Swear = col_character()
## )

liwc <- liwc[!is.na(liwc$Negemo), ]
liwc <- liwc[!is.na(liwc$Posemo), ]

d_flowers_liwc<- d_flowers %>%
  group_by(blockNum, trialNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(positive=list(intersect(liwc$Posemo, text))) %>%
  mutate(positive_length=length(positive))%>%
  mutate(negative=list(intersect(liwc$Negemo, text))) %>%
  mutate(negative_length=length(negative)) %>%
  group_by(gameId, trialNum, blockNum, condition)  %>%
  summarise(total_num_positive = sum(positive_length),
            total_num_negative = sum(negative_length)) %>%
  left_join(d_flowers_uttlength_nwords) %>%
  mutate(prop_negative_words= total_num_negative/total_num_words) %>%
  mutate(prop_positive_words= total_num_positive/total_num_words)

## `summarise()` has grouped output by 'gameId', 'trialNum', 'blockNum'. You can override using the `.groups` argument.

## Joining, by = c("gameId", "trialNum", "blockNum", "condition")

#######positive LIWC
ggplot(d_flowers_liwc, aes(y=prop_positive_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of positive words") + 
  theme_bw()+
  ggtitle("Proportion of positive words per trial")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  #theme(text = element_text(size = 30))



a<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==0), prop_positive_words, 0,5) +  theme(legend.position="none",  axis.title.y = element_blank(),axis.title.x = element_blank())+
   labs(subtitle = "Block1") +  ylab("LIWC positive words")  

b<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==1), prop_positive_words, 6,11) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(),axis.title.x = element_blank())+ labs(subtitle = "Block2")

c<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==2), prop_positive_words, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(),axis.title.x = element_blank())+ labs(subtitle = "Block3")

d<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==3), prop_positive_words, 18,23) +  theme(legend.position="none", axis.title.y = element_blank(), axis.text.y=element_blank(),axis.title.x = element_blank(), plot.tag.position = "topright")+ labs(subtitle = "Block4")

legend <- get_legend(
  # create some space to the left of the legend
  d +  theme(legend.position = "right")
)
p<-plot_grid(a,b,c,d, nrow = 1, legend,  ncol=5, align = "v")

x.grob <- textGrob("Trial Number")
y.grob <- textGrob("LIWC positive words", rot=90)
p<-grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

ggsave(file="/Users/lscpuser/Documents/last_AAFLOWERS/AA-flowers/figs/liwc_pos.png", p)

## Saving 7 x 5 in image

######### negative LIWC
ggplot(d_flowers_liwc, aes(y=prop_negative_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of negative words") + 
  theme_bw()+
  ggtitle("Proportion of negative words per trial")#+

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  #theme(text = element_text(size = 30))
#dev.off() 

e<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==0), prop_negative_words, 0,5) +  theme(legend.position="none", axis.title.x = element_blank(),  axis.title.y=element_blank()) +
   labs(subtitle = "Block1")  +ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

f<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==1), prop_negative_words, 6,11) +  theme(legend.position="none", axis.title.y = element_blank(),axis.title.x = element_blank(), axis.text.y=element_blank())+ labs(subtitle = "Block2")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

g<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==2), prop_negative_words, 12,17) +  theme(legend.position="none", axis.title.y = element_blank(),axis.title.x = element_blank(), axis.text.y=element_blank())+ labs(subtitle = "Block3")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

h<-word_per_plot_function(d_flowers_liwc%>% filter(blockNum==3), prop_negative_words, 18,23) +  theme(legend.position="none", axis.title.y = element_blank(),axis.title.x = element_blank(), axis.text.y=element_blank(), plot.tag.position = "topright")+ labs(subtitle = "Block4")+ylim(0.0, 0.05)

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

legend <- get_legend(
  # create some space to the left of the legend
  h +  theme(legend.position = "right")
)

p<- plot_grid(e,f,g,h, legend, nrow = 1, ncol=5, align = "v")
x.grob <- textGrob("Trial Number")
y.grob <- textGrob("LIWC negative words", rot=90)

#add to plot
grid.arrange(arrangeGrob(p, left = y.grob, bottom = x.grob))

### Alignment

library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

d3<- read_csv("../data/processed_data/joined_data/output_stan_model.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   category = col_character(),
##   subpop = col_character(),
##   ba = col_double(),
##   nba = col_double(),
##   bna = col_double(),
##   nbna = col_double(),
##   category_num = col_double(),
##   subpop_num = col_double(),
##   model_eta = col_double(),
##   model_mu = col_double(),
##   model_dnm = col_double()
## )

d4 <- d3 %>%
  group_by(subpop) %>%
  multi_boot_standard("model_eta", na.rm = F)


options(ggrepel.max.overlaps = 50)
ggplot(aes(y = model_eta, x = subpop, colour = subpop,
), data=d3) + 
  #geom_line(aes(group=category))+
  geom_point(data=d3)+
  #geom_smooth(method = "loess") +
  theme_bw(base_size = 14) +
  geom_text_repel(aes(label=category))+
  theme(panel.grid = element_blank()) +
  labs(title='Model-estimated Alignment for speaker pairs, with mean and CI',
       y='Alignment (delta log-odds)',
       x='Conditions',
       colour='Alignment')+
  geom_pointrange(data=d4, mapping=aes(y = mean, x = subpop, ymax = ci_upper, ymin = ci_lower), col="black")+
ylim(-0.5,0.5)+
  grids(linetype = "dashed")

ggsave("../figs/alignment.png", height = 3, width = 10)