STEP THREE - Linguistic quantity and complexity

d<- read_csv("/Users/lscpuser/Documents/AA-flowers2/AA-flowers/data/processed_data/joined_data/filtered_raw_chat.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   gameId = col_character(),
##   trialNum = col_double(),
##   condition = col_character(),
##   chatEnabled = col_logical(),
##   playerId = col_character(),
##   name = col_character(),
##   text = col_character(),
##   participantAction = col_character(),
##   roundID = col_character(),
##   index = col_double(),
##   createdAt = col_datetime(format = ""),
##   repNum = col_double(),
##   blockNum = col_double(),
##   numPlayers = col_double(),
##   type = col_character(),
##   submitted = col_logical(),
##   playerResponse = col_character(),
##   playerUtility = col_double()
## )
d_flowers <- d %>% 
  filter(type=="message") %>%
  filter(!is.na(text)) %>% 
  filter(!condition=="coopCartel")
write_csv(d, "~/Downloads/raw_chat3.csv")

d_flowers <- d_flowers %>% 
  filter(!condition=="coopCartel")
#data_target = "pilot0"
#data_location=paste0("data/",data_target)

#d_flowers<- read_csv(here(data_location, "raw_chat2.csv"))%>%
 #filter(!is.na(text)) 

1. Utt Length

d_flowers_utt_length_block <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
#, color=as.factor(gameId)

#jpeg("~/Desktop/plots/nwords_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=total_num_words, color=condition))+
  #facet_grid(rows =vars(condition)) +
  geom_point()+
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of words", y="Total number of words", x="Block number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of words per block")#+

 # theme(text = element_text(size = 30))
#color="gameId" cols = vars(blockNum),
#dev.off() 

#jpeg("~/Desktop/plots/mlu_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=mlu,  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="MLU", y="MLU", x="Block number")+
  theme(legend.position="bottom") + 
  theme_bw()+
  ggtitle("MLU per block")#+

#  theme(text = element_text(size = 30))
#dev.off() 



d_flowers_utt_length_trial <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nwords_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=total_num_words,  color=condition))+
#  facet_grid(rows =vars(condition)) +
  geom_point()+
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of words", y="Total number of words", x="Trial number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of words per trial")#+

#  theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/mlu_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=mlu,  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="MLU", y="MLU", x="Trial number")+
  theme(legend.position="bottom") + 
  theme_bw()+
  ggtitle("MLU per trial")#+

  #theme(text = element_text(size = 30))
#dev.off() 


d_flowers_utt_length_rep <- d_flowers %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, repNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words),
            mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nword_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_rep, aes(x=repNum, y=total_num_words,  color=condition))+
#  facet_grid(rows =vars(condition)) +
  geom_point()+
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of words", y="Total number of words", x="Rep number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of words per rep")#+

#  theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/mlu_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_rep, aes(x=repNum, y=mlu,  color=condition))+
#  facet_grid( rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
 # scale_y_continuous(limits = c(0,15))+
  labs(title="MLU", y="MLU", x="Rep number")+
  theme(legend.position="bottom") + 
  theme_bw()+
  ggtitle("MLU per rep")#+

 # theme(text = element_text(size = 30))
#dev.off() 

2. Number of turns

d_flowers_n_turns_rep <- d_flowers %>%
  group_by(gameId, condition, repNum)  %>%
  summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_rep, aes(x=repNum, y=n,  color=condition))+
 # facet_grid(rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of turns", y="Number of turns", x="Rep number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of turns per Rep")#+

#  theme(text = element_text(size = 30))
#dev.off() 


d_flowers_n_turns_trial <- d_flowers %>%
  group_by(gameId, condition, trialNum)  %>%
  summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_trial, aes(x=trialNum, y=n,  color=condition))+
  #facet_grid(rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of turns", y="Number of turns", x="Trial number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of turns per trial")#+

  #theme(text = element_text(size = 30))
#dev.off() 


d_flowers_n_turns_block <- d_flowers %>%
  group_by(gameId, condition, blockNum)  %>%
  summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_block, aes(x=blockNum, y=n,  color=condition))+
 # facet_grid(rows =vars(condition)) +
  geom_jitter(alpha=.05)+
  geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
  stat_summary(fun.data = "mean_cl_boot")+
  labs(title="Number of turns", y="Number of turns", x="Block number")+
  theme(legend.position="bottom") + 
  theme_bw() +
  ggtitle("Number of turns per block")#+

 # theme(text = element_text(size = 30))
#dev.off() 

#second plot ### 2b. Number of turns

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by( trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker, 
       aes(x=trialNum, y=n_utt_speaker,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(~condition) +
  xlab("Trial number") + 
  ylab("Number of utterances") + 
  theme_bw()#+

 # theme(text = element_text(size = 30))
#dev.off() 

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by( blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker, 
       aes(x=blockNum, y=n_utt_speaker,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(~condition) +
  xlab("Block number") + 
  ylab("Number of utterances") + 
  theme_bw()#+

#  theme(text = element_text(size = 30))
#dev.off() 

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by( repNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker, 
       aes(x=repNum, y=n_utt_speaker,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
#  facet_grid(~condition) +
  xlab("Rep number") + 
  ylab("Number of utterances") + 
  theme_bw()#+

#  theme(text = element_text(size = 30))
#dev.off() 

3. Parts of speech

text <- d_flowers$text
parsed <- spacy_parse(text,pos=TRUE) %>%
  select(doc_id, pos) %>%
  group_by(doc_id) %>%
  mutate(pos = paste(pos, collapse=",")) %>%
  distinct()
## Found 'spacy_condaenv'. spacyr will use this environment
## successfully initialized (spaCy Version: 3.0.6, language model: en_core_web_sm)
## (python options: type = "condaenv", value = "spacy_condaenv")
d_flowers_pos  <- cbind(d_flowers, parsed) %>%
  separate_rows(pos, convert = TRUE) %>%
  group_by(condition, blockNum, gameId, pos) %>%
  filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
  summarise(pos_count=n()) %>%
  left_join(d_flowers_utt_length_block) %>%
  mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'blockNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "blockNum", "gameId")
#jpeg("~/Desktop/plots/pos_block.jpg", width = 950, height = 950)
ggplot(d_flowers_pos, 
       aes(x=blockNum, y=prop_pos, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
  facet_grid( cols=vars(pos)) +
  xlab("Block number") + 
  ylab("Proportion of pos") + 
  theme_bw()#+

 # theme(text = element_text(size = 30))
#dev.off() 

d_flowers_pos  <- cbind(d_flowers, parsed) %>%
  separate_rows(pos, convert = TRUE) %>%
  group_by(condition, trialNum, gameId, pos) %>%
  filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
  summarise(pos_count=n()) %>%
  left_join(d_flowers_utt_length_trial) %>%
  mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'trialNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "trialNum", "gameId")
#jpeg("~/Desktop/plots/pos_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_pos, 
       aes(x=trialNum, y=prop_pos, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
  facet_grid( cols=vars(pos)) +
  xlab("Trial number") + 
  ylab("Proportion of pos") + 
  ylim(0,5)+
  theme_bw()#+

  #theme(text = element_text(size = 30))
#dev.off() 

d_flowers_pos  <- cbind(d_flowers, parsed) %>%
  separate_rows(pos, convert = TRUE) %>%
  group_by(condition, repNum, gameId, pos) %>%
  filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
  summarise(pos_count=n()) %>%
  left_join(d_flowers_utt_length_rep) %>%
  mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'repNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "repNum", "gameId")
#jpeg("~/Desktop/plots/pos_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_pos, 
       aes(x=repNum, y=prop_pos, label=gameId, color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
  facet_grid(cols=vars(pos)) +
  xlab("Rep number") + 
  ylab("Proportion of pos") +
  ylim(0,5)+
  theme_bw()#+

 # theme(text = element_text(size = 30))
#dev.off() 

4. Lexical Diversity (TTR)

ttr <- function(text_){
  text_ <- tolower(text_)
  total_words = sapply(strsplit(text_, " "), length)
  no_duplicates = vapply(lapply(strsplit(text_, " "), unique), paste, character(1L), collapse = " ")
  total_unique = str_count(no_duplicates, "\\W+") + 1
  ttr_ = total_unique/total_words
  return(ttr_)
}    

d_flowers_text_trial <- d_flowers %>%
    group_by(gameId, trialNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_trial <- d_flowers_text_trial %>%
  mutate(ttr_ = ttr(text_block))

#t<-str_split(text_, " ", 1:6)
#y<- paste(unlist(lapply(t,head,n=5)), collapse=" ")

#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_trial, 
       aes(x=trialNum, y=ttr_,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(rows =vars(condition)) +
  xlab("Trial number") + 
  ylab("Type_token ratio") + 
  ylim(0,1)+
  theme_bw()+
  ggtitle("TTR per trial")#+

 # theme(text = element_text(size = 30))
#dev.off() 


d_flowers_text_rep <- d_flowers %>%
    group_by(gameId, repNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_rep <- d_flowers_text_rep %>%
  mutate(ttr_ = ttr(text_block))

#jpeg("~/Desktop/plots/ttr_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_rep, 
       aes(x=repNum, y=ttr_,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(rows =vars(condition)) +
  xlab("Rep number") + 
  ylab("Type_token ratio") + 
  ylim(0,1)+
  theme_bw()+
  ggtitle("TTR per rep")#+

 # theme(text = element_text(size = 30))
#dev.off() 


d_flowers_text_block <- d_flowers %>%
    group_by(gameId, blockNum, condition) %>%
    mutate(text_block = paste0(text, collapse = " ")) %>%
    select(-c(text)) %>%
    distinct() 

d_flowers_ttr_block <- d_flowers_text_block %>%
  mutate(ttr_ = ttr(text_block))

#jpeg("~/Desktop/plots/ttr_block.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_block, 
       aes(x=blockNum, y=ttr_,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_grid(rows =vars(condition)) +
  xlab("Block number") + 
  ylab("Type_token ratio") + 
  ylim(0,1)+
  theme_bw()+
  ggtitle("TTR per block")#+

#  theme(text = element_text(size = 30))
#dev.off() 

STEP FOUR - Feedback items and markers

1. Distribution of speech across speakers

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  summarise(n_utt_game=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("trialNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_trial_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist, 
       aes(x=trialNum, y=overall_turns, label=playerId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
#  facet_wrap(~condition)+
  xlab("Trial number") + 
  ylab("Proportion of turns") + 
  theme_bw()+
  ggtitle("Proportion of turns per trial and speaker")#+

  #theme(text = element_text(size = 30))
#dev.off() 



d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  summarise(n_utt_game=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("blockNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_block_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist, 
       aes(x=blockNum, y=overall_turns, label=playerId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_wrap(~condition)+
  xlab("Block number") + 
  ylab("Proportion of turns") + 
  theme_bw()+
  ggtitle("Proportion of turns per block and speaker")#+

 # theme(text = element_text(size = 30))
#dev.off() 


d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(playerId, repNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
  group_by(repNum, gameId, condition) %>%
  summarise(n_utt_game=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
  left_join(d_flowers_n_utt_game) %>%
  mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("repNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_rep_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist, 
       aes(x=repNum, y=overall_turns, label=playerId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  geom_point(alpha=.1) +
 # facet_wrap(~condition)+
  xlab("Rep number") + 
  ylab("Proportion of turns") + 
  theme_bw()+
  ggtitle("Proportion of turns per rep and speaker")#+

 # theme(text = element_text(size = 30))
#dev.off() 

2. Turn-eliciting questions

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(trialNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )  
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("trialNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_q, 
       aes(x=trialNum, y=prop_questions, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
 # facet_wrap(~ condition) +
  geom_point(alpha=.5) +
  xlab("Trial number") + 
  ylab("Proportion of questions") + 
  theme_bw()+
  ggtitle("Proportion of questions per trial")#+

 # theme(text = element_text(size = 30))
#dev.off() 
  
#####

d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(repNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(repNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )  
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("repNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_q, 
       aes(x=repNum, y=prop_questions, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
#  facet_wrap(~ condition) +
  geom_point(alpha=.5) +
  xlab("Rep number") + 
  ylab("Proportion of questions") + 
  theme_bw()+
  ggtitle("Proportion of questions per rep")#+

#theme(text = element_text(size = 30))
#dev.off() 


d_flowers_n_utt_speaker <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>% 
 mutate(sentence_type = ifelse(str_detect(text, "\\?"), 
                             "question",
                             "other")) %>%
 group_by(blockNum, gameId, sentence_type) %>%
 summarise(n_sentence_Type=n()) %>%
 filter(sentence_type=="question") %>%
 left_join(d_flowers_n_utt_speaker) %>%
 mutate(prop_questions = n_sentence_Type/n_utt_speaker )  
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("blockNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_block.jpg", width = 950, height = 950)
ggplot(d_flowers_q, 
       aes(x=blockNum, y=prop_questions, label=gameId,  color=condition)) + 
  geom_smooth(method = "lm", formula = y~x) + 
  #facet_wrap(~ condition) +
  geom_point(alpha=.5) +
  xlab("Block number") + 
  ylab("Proportion of questions") + 
  theme_bw()+
  ggtitle("Proportion of questions per block")#+

 # theme(text = element_text(size = 30))
#dev.off() 

1. Distribution of speech across speakers

###politeness markers

###backchanneling: (https://www.reading.ac.uk/AcaDepts/ll/app_ling/internal/Cutrone_vol_2.pdf) 

backchanneling<- c("mm", "okay", "uh-huh", "ok", "mm-hm", "uh", "um", "agree", "uhuh", "mmm", "wow", "great", "mm", "hm",  "ummm", "hmmmm", "huh", "un", "um", "ohh", "ooo", "see", "oooo", "ununun", "oh", "ah", "true", "agree", "right", "yeah", "good", "really")

###hedges: (https://www.researchgate.net/publication/280125979_Linguistic_Markers_and_Stylistic_Attributes_of_Hedging_in_English_Academic_Papers_Written_by_Native_and_Non-Native_Speakers_of_English/figures) 

hedging <- c("may", "perhaps", "might", "possible", "likely", "possibly", "maybe", "probable", "appear", "seem", "suggest", "sometimes", "seemingly", "apparently", "often", "could", "usually", "likely", "tend", "sometimes", "probably", "primarily", "tendency", "largely")   

d_flowers_nwords <- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
  group_by(trialNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, trialNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)  
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "trialNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of backchannel words") + 
  theme_bw()+
  ggtitle("Proportion of backchannel words per trial")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  #theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/prophedge_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=trialNum, label=gameId,  color=condition)) +
 # facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Trial number") + 
  ylab("Proportion of hedge words") + 
  theme_bw()+
  ggtitle("Proportion of hedge words per trial")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 # theme(text = element_text(size = 30))
#dev.off() 


#ggplot(d_flowers_feedback, aes(y=trialNum , x=backchannel_length, label=gameId)) +
#  geom_point(alpha = .5)+
#  geom_smooth() +
#  geom_text_repel(aes(label=ifelse(backchannel=="character(0)",  "", backchannel), #max.overlaps = 20))+
#  xlim(c(1,5))+
#  theme_bw()+
#  facet_wrap(~ condition) 
  

#ggplot(d_flowers_feedback, aes(y=trialNum , x=hedge_length, label=gameId)) +
#  geom_point(alpha = .5)+
#  geom_smooth() +
#  geom_text_repel(aes(label=ifelse(hedge=="character(0)", "", hedge), max.overlaps = 20))+
#  xlim(c(1,5))+
#  theme_bw()+
#  facet_wrap(~ condition) 
  

d_flowers_nwords <- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
  group_by(blockNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, blockNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)  
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "blockNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=blockNum, label=gameId,  color=condition)) +
#  facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Block number") + 
  ylab("Proportion of backchannel words") + 
  theme_bw()+
  ggtitle("Proportion of backchannel words per block")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 # theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/prophedge_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=blockNum, label=gameId,  color=condition)) +
#  facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Block number") + 
  ylab("Proportion of hedge words") + 
  theme_bw()+
  ggtitle("Proportion of hedge words per block")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#  theme(text = element_text(size = 30))
#dev.off() 



d_flowers_nwords <- d_flowers %>%
  group_by(repNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
  mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>% 
  group_by(gameId, repNum, condition)  %>%
  summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
  group_by(repNum, gameId, condition) %>%
  mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%          
  mutate(text = strsplit(text, " ")) %>%
  rowwise() %>%
  mutate(backchannel=list(intersect(backchanneling, text))) %>%
  mutate(backchannel_length=length(backchannel))%>%
  mutate(hedge=list(intersect(hedging, text))) %>%
  mutate(hedge_length=length(hedge)) %>%
  group_by(gameId, repNum, condition)  %>%
  summarise(total_num_backchannel = sum(backchannel_length),
            total_num_hedge = sum(hedge_length)) %>%
  left_join(d_flowers_nwords) %>%
  mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
  mutate(prop_hedge_words= total_num_hedge/total_num_words)  
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "repNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=repNum, label=gameId,  color=condition)) +
  #facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Rep number") + 
  ylab("Proportion of backchannel words") + 
  theme_bw()+
  ggtitle("Proportion of backchannel words per rep")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 # theme(text = element_text(size = 30))
#dev.off() 

#jpeg("~/Desktop/plots/prophedge_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=repNum, label=gameId,  color=condition)) +
#  facet_wrap(~ condition) +
  geom_point(alpha = .5)+
  geom_smooth()+
  xlab("Rep number") + 
  ylab("Proportion of hedge words") + 
  theme_bw()+
  ggtitle("Proportion of hedge words per rep")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  #theme(text = element_text(size = 30))
#dev.off()