d<- read_csv("/Users/lscpuser/Documents/AA-flowers2/AA-flowers/data/processed_data/joined_data/filtered_raw_chat.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## gameId = col_character(),
## trialNum = col_double(),
## condition = col_character(),
## chatEnabled = col_logical(),
## playerId = col_character(),
## name = col_character(),
## text = col_character(),
## participantAction = col_character(),
## roundID = col_character(),
## index = col_double(),
## createdAt = col_datetime(format = ""),
## repNum = col_double(),
## blockNum = col_double(),
## numPlayers = col_double(),
## type = col_character(),
## submitted = col_logical(),
## playerResponse = col_character(),
## playerUtility = col_double()
## )
d_flowers <- d %>%
filter(type=="message") %>%
filter(!is.na(text)) %>%
filter(!condition=="coopCartel")
write_csv(d, "~/Downloads/raw_chat3.csv")
d_flowers <- d_flowers %>%
filter(!condition=="coopCartel")
#data_target = "pilot0"
#data_location=paste0("data/",data_target)
#d_flowers<- read_csv(here(data_location, "raw_chat2.csv"))%>%
#filter(!is.na(text))
d_flowers_utt_length_block <- d_flowers %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, blockNum, condition) %>%
summarise(total_num_words = sum(utt_length_words),
mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
#, color=as.factor(gameId)
#jpeg("~/Desktop/plots/nwords_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=total_num_words, color=condition))+
#facet_grid(rows =vars(condition)) +
geom_point()+
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of words", y="Total number of words", x="Block number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of words per block")#+
# theme(text = element_text(size = 30))
#color="gameId" cols = vars(blockNum),
#dev.off()
#jpeg("~/Desktop/plots/mlu_block.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_block, aes(x=blockNum, y=mlu, color=condition))+
# facet_grid( rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
# scale_y_continuous(limits = c(0,15))+
labs(title="MLU", y="MLU", x="Block number")+
theme(legend.position="bottom") +
theme_bw()+
ggtitle("MLU per block")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_utt_length_trial <- d_flowers %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, trialNum, condition) %>%
summarise(total_num_words = sum(utt_length_words),
mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nwords_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=total_num_words, color=condition))+
# facet_grid(rows =vars(condition)) +
geom_point()+
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of words", y="Total number of words", x="Trial number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of words per trial")#+
# theme(text = element_text(size = 30))
#dev.off()
#jpeg("~/Desktop/plots/mlu_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_trial, aes(x=trialNum, y=mlu, color=condition))+
# facet_grid( rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
# scale_y_continuous(limits = c(0,15))+
labs(title="MLU", y="MLU", x="Trial number")+
theme(legend.position="bottom") +
theme_bw()+
ggtitle("MLU per trial")#+
#theme(text = element_text(size = 30))
#dev.off()
d_flowers_utt_length_rep <- d_flowers %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, repNum, condition) %>%
summarise(total_num_words = sum(utt_length_words),
mlu = mean(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nword_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_rep, aes(x=repNum, y=total_num_words, color=condition))+
# facet_grid(rows =vars(condition)) +
geom_point()+
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of words", y="Total number of words", x="Rep number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of words per rep")#+
# theme(text = element_text(size = 30))
#dev.off()
#jpeg("~/Desktop/plots/mlu_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_utt_length_rep, aes(x=repNum, y=mlu, color=condition))+
# facet_grid( rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
# scale_y_continuous(limits = c(0,15))+
labs(title="MLU", y="MLU", x="Rep number")+
theme(legend.position="bottom") +
theme_bw()+
ggtitle("MLU per rep")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_turns_rep <- d_flowers %>%
group_by(gameId, condition, repNum) %>%
summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_rep, aes(x=repNum, y=n, color=condition))+
# facet_grid(rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of turns", y="Number of turns", x="Rep number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of turns per Rep")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_turns_trial <- d_flowers %>%
group_by(gameId, condition, trialNum) %>%
summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_trial, aes(x=trialNum, y=n, color=condition))+
#facet_grid(rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of turns", y="Number of turns", x="Trial number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of turns per trial")#+
#theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_turns_block <- d_flowers %>%
group_by(gameId, condition, blockNum) %>%
summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'condition'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nturn_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_turns_block, aes(x=blockNum, y=n, color=condition))+
# facet_grid(rows =vars(condition)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of turns", y="Number of turns", x="Block number")+
theme(legend.position="bottom") +
theme_bw() +
ggtitle("Number of turns per block")#+
# theme(text = element_text(size = 30))
#dev.off()
#second plot ### 2b. Number of turns
d_flowers_n_utt_speaker <- d_flowers %>%
group_by( trialNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker,
aes(x=trialNum, y=n_utt_speaker, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(~condition) +
xlab("Trial number") +
ylab("Number of utterances") +
theme_bw()#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by( blockNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_block.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker,
aes(x=blockNum, y=n_utt_speaker, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(~condition) +
xlab("Block number") +
ylab("Number of utterances") +
theme_bw()#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by( repNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
#jpeg("~/Desktop/plots/nutts_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_speaker,
aes(x=repNum, y=n_utt_speaker, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(~condition) +
xlab("Rep number") +
ylab("Number of utterances") +
theme_bw()#+
# theme(text = element_text(size = 30))
#dev.off()
text <- d_flowers$text
parsed <- spacy_parse(text,pos=TRUE) %>%
select(doc_id, pos) %>%
group_by(doc_id) %>%
mutate(pos = paste(pos, collapse=",")) %>%
distinct()
## Found 'spacy_condaenv'. spacyr will use this environment
## successfully initialized (spaCy Version: 3.0.6, language model: en_core_web_sm)
## (python options: type = "condaenv", value = "spacy_condaenv")
d_flowers_pos <- cbind(d_flowers, parsed) %>%
separate_rows(pos, convert = TRUE) %>%
group_by(condition, blockNum, gameId, pos) %>%
filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
summarise(pos_count=n()) %>%
left_join(d_flowers_utt_length_block) %>%
mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'blockNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "blockNum", "gameId")
#jpeg("~/Desktop/plots/pos_block.jpg", width = 950, height = 950)
ggplot(d_flowers_pos,
aes(x=blockNum, y=prop_pos, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_grid( cols=vars(pos)) +
xlab("Block number") +
ylab("Proportion of pos") +
theme_bw()#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_pos <- cbind(d_flowers, parsed) %>%
separate_rows(pos, convert = TRUE) %>%
group_by(condition, trialNum, gameId, pos) %>%
filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
summarise(pos_count=n()) %>%
left_join(d_flowers_utt_length_trial) %>%
mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'trialNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "trialNum", "gameId")
#jpeg("~/Desktop/plots/pos_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_pos,
aes(x=trialNum, y=prop_pos, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_grid( cols=vars(pos)) +
xlab("Trial number") +
ylab("Proportion of pos") +
ylim(0,5)+
theme_bw()#+
#theme(text = element_text(size = 30))
#dev.off()
d_flowers_pos <- cbind(d_flowers, parsed) %>%
separate_rows(pos, convert = TRUE) %>%
group_by(condition, repNum, gameId, pos) %>%
filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
summarise(pos_count=n()) %>%
left_join(d_flowers_utt_length_rep) %>%
mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'condition', 'repNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("condition", "repNum", "gameId")
#jpeg("~/Desktop/plots/pos_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_pos,
aes(x=repNum, y=prop_pos, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_grid(cols=vars(pos)) +
xlab("Rep number") +
ylab("Proportion of pos") +
ylim(0,5)+
theme_bw()#+
# theme(text = element_text(size = 30))
#dev.off()
ttr <- function(text_){
text_ <- tolower(text_)
total_words = sapply(strsplit(text_, " "), length)
no_duplicates = vapply(lapply(strsplit(text_, " "), unique), paste, character(1L), collapse = " ")
total_unique = str_count(no_duplicates, "\\W+") + 1
ttr_ = total_unique/total_words
return(ttr_)
}
d_flowers_text_trial <- d_flowers %>%
group_by(gameId, trialNum, condition) %>%
mutate(text_block = paste0(text, collapse = " ")) %>%
select(-c(text)) %>%
distinct()
d_flowers_ttr_trial <- d_flowers_text_trial %>%
mutate(ttr_ = ttr(text_block))
#t<-str_split(text_, " ", 1:6)
#y<- paste(unlist(lapply(t,head,n=5)), collapse=" ")
#jpeg("~/Desktop/plots/ttr_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_trial,
aes(x=trialNum, y=ttr_, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(rows =vars(condition)) +
xlab("Trial number") +
ylab("Type_token ratio") +
ylim(0,1)+
theme_bw()+
ggtitle("TTR per trial")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_text_rep <- d_flowers %>%
group_by(gameId, repNum, condition) %>%
mutate(text_block = paste0(text, collapse = " ")) %>%
select(-c(text)) %>%
distinct()
d_flowers_ttr_rep <- d_flowers_text_rep %>%
mutate(ttr_ = ttr(text_block))
#jpeg("~/Desktop/plots/ttr_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_rep,
aes(x=repNum, y=ttr_, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(rows =vars(condition)) +
xlab("Rep number") +
ylab("Type_token ratio") +
ylim(0,1)+
theme_bw()+
ggtitle("TTR per rep")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_text_block <- d_flowers %>%
group_by(gameId, blockNum, condition) %>%
mutate(text_block = paste0(text, collapse = " ")) %>%
select(-c(text)) %>%
distinct()
d_flowers_ttr_block <- d_flowers_text_block %>%
mutate(ttr_ = ttr(text_block))
#jpeg("~/Desktop/plots/ttr_block.jpg", width = 950, height = 950)
ggplot(d_flowers_ttr_block,
aes(x=blockNum, y=ttr_, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_grid(rows =vars(condition)) +
xlab("Block number") +
ylab("Type_token ratio") +
ylim(0,1)+
theme_bw()+
ggtitle("TTR per block")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, trialNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
group_by(trialNum, gameId, condition) %>%
summarise(n_utt_game=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
left_join(d_flowers_n_utt_game) %>%
mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("trialNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_trial_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist,
aes(x=trialNum, y=overall_turns, label=playerId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_wrap(~condition)+
xlab("Trial number") +
ylab("Proportion of turns") +
theme_bw()+
ggtitle("Proportion of turns per trial and speaker")#+
#theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, blockNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
group_by(blockNum, gameId, condition) %>%
summarise(n_utt_game=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
left_join(d_flowers_n_utt_game) %>%
mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("blockNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_block_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist,
aes(x=blockNum, y=overall_turns, label=playerId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_wrap(~condition)+
xlab("Block number") +
ylab("Proportion of turns") +
theme_bw()+
ggtitle("Proportion of turns per block and speaker")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, repNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
group_by(repNum, gameId, condition) %>%
summarise(n_utt_game=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
left_join(d_flowers_n_utt_game) %>%
mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("repNum", "gameId", "condition")
#jpeg("~/Desktop/plots/proputts_rep_speaker.jpg", width = 950, height = 950)
ggplot(d_flowers_n_utt_dist,
aes(x=repNum, y=overall_turns, label=playerId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
# facet_wrap(~condition)+
xlab("Rep number") +
ylab("Proportion of turns") +
theme_bw()+
ggtitle("Proportion of turns per rep and speaker")#+
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(trialNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>%
mutate(sentence_type = ifelse(str_detect(text, "\\?"),
"question",
"other")) %>%
group_by(trialNum, gameId, sentence_type) %>%
summarise(n_sentence_Type=n()) %>%
filter(sentence_type=="question") %>%
left_join(d_flowers_n_utt_speaker) %>%
mutate(prop_questions = n_sentence_Type/n_utt_speaker )
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("trialNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_q,
aes(x=trialNum, y=prop_questions, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
# facet_wrap(~ condition) +
geom_point(alpha=.5) +
xlab("Trial number") +
ylab("Proportion of questions") +
theme_bw()+
ggtitle("Proportion of questions per trial")#+
# theme(text = element_text(size = 30))
#dev.off()
#####
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(repNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>%
mutate(sentence_type = ifelse(str_detect(text, "\\?"),
"question",
"other")) %>%
group_by(repNum, gameId, sentence_type) %>%
summarise(n_sentence_Type=n()) %>%
filter(sentence_type=="question") %>%
left_join(d_flowers_n_utt_speaker) %>%
mutate(prop_questions = n_sentence_Type/n_utt_speaker )
## `summarise()` has grouped output by 'repNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("repNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_q,
aes(x=repNum, y=prop_questions, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
# facet_wrap(~ condition) +
geom_point(alpha=.5) +
xlab("Rep number") +
ylab("Proportion of questions") +
theme_bw()+
ggtitle("Proportion of questions per rep")#+
#theme(text = element_text(size = 30))
#dev.off()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(blockNum, gameId, condition) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>%
mutate(sentence_type = ifelse(str_detect(text, "\\?"),
"question",
"other")) %>%
group_by(blockNum, gameId, sentence_type) %>%
summarise(n_sentence_Type=n()) %>%
filter(sentence_type=="question") %>%
left_join(d_flowers_n_utt_speaker) %>%
mutate(prop_questions = n_sentence_Type/n_utt_speaker )
## `summarise()` has grouped output by 'blockNum', 'gameId'. You can override using the `.groups` argument.
## Joining, by = c("blockNum", "gameId")
#jpeg("~/Desktop/plots/propquestions_block.jpg", width = 950, height = 950)
ggplot(d_flowers_q,
aes(x=blockNum, y=prop_questions, label=gameId, color=condition)) +
geom_smooth(method = "lm", formula = y~x) +
#facet_wrap(~ condition) +
geom_point(alpha=.5) +
xlab("Block number") +
ylab("Proportion of questions") +
theme_bw()+
ggtitle("Proportion of questions per block")#+
# theme(text = element_text(size = 30))
#dev.off()
###politeness markers
###backchanneling: (https://www.reading.ac.uk/AcaDepts/ll/app_ling/internal/Cutrone_vol_2.pdf)
backchanneling<- c("mm", "okay", "uh-huh", "ok", "mm-hm", "uh", "um", "agree", "uhuh", "mmm", "wow", "great", "mm", "hm", "ummm", "hmmmm", "huh", "un", "um", "ohh", "ooo", "see", "oooo", "ununun", "oh", "ah", "true", "agree", "right", "yeah", "good", "really")
###hedges: (https://www.researchgate.net/publication/280125979_Linguistic_Markers_and_Stylistic_Attributes_of_Hedging_in_English_Academic_Papers_Written_by_Native_and_Non-Native_Speakers_of_English/figures)
hedging <- c("may", "perhaps", "might", "possible", "likely", "possibly", "maybe", "probable", "appear", "seem", "suggest", "sometimes", "seemingly", "apparently", "often", "could", "usually", "likely", "tend", "sometimes", "probably", "primarily", "tendency", "largely")
d_flowers_nwords <- d_flowers %>%
group_by(trialNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, trialNum, condition) %>%
summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
group_by(trialNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(text = strsplit(text, " ")) %>%
rowwise() %>%
mutate(backchannel=list(intersect(backchanneling, text))) %>%
mutate(backchannel_length=length(backchannel))%>%
mutate(hedge=list(intersect(hedging, text))) %>%
mutate(hedge_length=length(hedge)) %>%
group_by(gameId, trialNum, condition) %>%
summarise(total_num_backchannel = sum(backchannel_length),
total_num_hedge = sum(hedge_length)) %>%
left_join(d_flowers_nwords) %>%
mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
mutate(prop_hedge_words= total_num_hedge/total_num_words)
## `summarise()` has grouped output by 'gameId', 'trialNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "trialNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=trialNum, label=gameId, color=condition)) +
# facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Trial number") +
ylab("Proportion of backchannel words") +
theme_bw()+
ggtitle("Proportion of backchannel words per trial")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#theme(text = element_text(size = 30))
#dev.off()
#jpeg("~/Desktop/plots/prophedge_trial.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=trialNum, label=gameId, color=condition)) +
# facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Trial number") +
ylab("Proportion of hedge words") +
theme_bw()+
ggtitle("Proportion of hedge words per trial")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# theme(text = element_text(size = 30))
#dev.off()
#ggplot(d_flowers_feedback, aes(y=trialNum , x=backchannel_length, label=gameId)) +
# geom_point(alpha = .5)+
# geom_smooth() +
# geom_text_repel(aes(label=ifelse(backchannel=="character(0)", "", backchannel), #max.overlaps = 20))+
# xlim(c(1,5))+
# theme_bw()+
# facet_wrap(~ condition)
#ggplot(d_flowers_feedback, aes(y=trialNum , x=hedge_length, label=gameId)) +
# geom_point(alpha = .5)+
# geom_smooth() +
# geom_text_repel(aes(label=ifelse(hedge=="character(0)", "", hedge), max.overlaps = 20))+
# xlim(c(1,5))+
# theme_bw()+
# facet_wrap(~ condition)
d_flowers_nwords <- d_flowers %>%
group_by(blockNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, blockNum, condition) %>%
summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
group_by(blockNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(text = strsplit(text, " ")) %>%
rowwise() %>%
mutate(backchannel=list(intersect(backchanneling, text))) %>%
mutate(backchannel_length=length(backchannel))%>%
mutate(hedge=list(intersect(hedging, text))) %>%
mutate(hedge_length=length(hedge)) %>%
group_by(gameId, blockNum, condition) %>%
summarise(total_num_backchannel = sum(backchannel_length),
total_num_hedge = sum(hedge_length)) %>%
left_join(d_flowers_nwords) %>%
mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
mutate(prop_hedge_words= total_num_hedge/total_num_words)
## `summarise()` has grouped output by 'gameId', 'blockNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "blockNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=blockNum, label=gameId, color=condition)) +
# facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Block number") +
ylab("Proportion of backchannel words") +
theme_bw()+
ggtitle("Proportion of backchannel words per block")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# theme(text = element_text(size = 30))
#dev.off()
#jpeg("~/Desktop/plots/prophedge_block.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=blockNum, label=gameId, color=condition)) +
# facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Block number") +
ylab("Proportion of hedge words") +
theme_bw()+
ggtitle("Proportion of hedge words per block")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# theme(text = element_text(size = 30))
#dev.off()
d_flowers_nwords <- d_flowers %>%
group_by(repNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(utt_length_words = sapply(strsplit(text, " "), length)) %>%
group_by(gameId, repNum, condition) %>%
summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
d_flowers_feedback<- d_flowers %>%
group_by(repNum, gameId, condition) %>%
mutate(text = gsub('[[:punct:] ]+',' ',text)) %>%
mutate(text = strsplit(text, " ")) %>%
rowwise() %>%
mutate(backchannel=list(intersect(backchanneling, text))) %>%
mutate(backchannel_length=length(backchannel))%>%
mutate(hedge=list(intersect(hedging, text))) %>%
mutate(hedge_length=length(hedge)) %>%
group_by(gameId, repNum, condition) %>%
summarise(total_num_backchannel = sum(backchannel_length),
total_num_hedge = sum(hedge_length)) %>%
left_join(d_flowers_nwords) %>%
mutate(prop_backchannel_words= total_num_backchannel/total_num_words) %>%
mutate(prop_hedge_words= total_num_hedge/total_num_words)
## `summarise()` has grouped output by 'gameId', 'repNum'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "repNum", "condition")
#jpeg("~/Desktop/plots/propbackchannel_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_backchannel_words , x=repNum, label=gameId, color=condition)) +
#facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Rep number") +
ylab("Proportion of backchannel words") +
theme_bw()+
ggtitle("Proportion of backchannel words per rep")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# theme(text = element_text(size = 30))
#dev.off()
#jpeg("~/Desktop/plots/prophedge_rep.jpg", width = 950, height = 950)
ggplot(d_flowers_feedback, aes(y=prop_hedge_words , x=repNum, label=gameId, color=condition)) +
# facet_wrap(~ condition) +
geom_point(alpha = .5)+
geom_smooth()+
xlab("Rep number") +
ylab("Proportion of hedge words") +
theme_bw()+
ggtitle("Proportion of hedge words per rep")#+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#theme(text = element_text(size = 30))
#dev.off()