data_target = "pilot0"
data_location=paste0("data/",data_target)
d_flowers<- read_csv(here(data_location, "raw_chat.csv"))%>%
filter(!is.na(text))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## row_id = col_character(),
## index = col_double(),
## stageIds = col_character(),
## gameId = col_character(),
## createdAt = col_datetime(format = ""),
## context = col_character(),
## targetNum = col_logical(),
## repNum = col_double(),
## trialNum = col_double(),
## numPlayers = col_double(),
## text = col_character(),
## playerId = col_character(),
## type = col_character(),
## submitted = col_logical(),
## blockNum = col_double()
## )
#1.
d_flowers <- d_flowers %>%
mutate(utt_length_words = str_count(text, "\\W+") + 1) %>%
group_by(gameId, blockNum, trialNum, repNum, playerId)
d_flowers_utt_length <- d_flowers %>%
summarise(total_num_words = sum(utt_length_words))
## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum', 'repNum'. You can override using the `.groups` argument.
ggplot(d_flowers_utt_length, aes(x=repNum, y=total_num_words, color=as.factor(playerId)))+
scale_color_brewer(palette="Dark2")+
facet_grid(cols = vars(blockNum), rows =vars(gameId)) +
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
scale_y_continuous(limits = c(0,50))+
labs(title="Number of words", y="Number of words", x="Round number", color="playerId")+
theme(legend.position="bottom")
d_flowers_n_turns <- d_flowers %>%
summarise(n=n())
## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum', 'repNum'. You can override using the `.groups` argument.
ggplot(d_flowers_n_turns, aes(x=trialNum, y=n, color=as.factor(playerId)))+
facet_grid(cols = vars(blockNum), rows =vars(gameId)) +
scale_color_brewer(palette="Dark2")+
geom_jitter(alpha=.05)+
geom_smooth(method=glm, formula=y~poly(x,2), alpha=.3)+
stat_summary(fun.data = "mean_cl_boot")+
labs(title="Number of turns", y="Number of turns", x="Round number", color="playerId")+
theme(legend.position="bottom") +
theme_bw()
#second plot ### 2b. Number of turns
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, trialNum, gameId, blockNum) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.
ggplot(d_flowers_n_utt_speaker,
aes(x=trialNum, y=n_utt_speaker, label=playerId)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_grid(~playerId) +
xlab("Round number") +
ylab("Number of turns") +
theme_bw()
library("spacyr")
text <- d_flowers$text
parsed <- spacy_parse(text,pos=TRUE) %>%
select(doc_id, pos) %>%
group_by(doc_id) %>%
mutate(pos = paste(pos, collapse=",")) %>%
distinct()
## Found 'spacy_condaenv'. spacyr will use this environment
## successfully initialized (spaCy Version: 3.0.6, language model: en_core_web_sm)
## (python options: type = "condaenv", value = "spacy_condaenv")
d_flowers_pos <- cbind(d_flowers, parsed) %>%
separate_rows(pos, convert = TRUE) %>%
group_by(gameId, blockNum, trialNum, repNum, playerId, pos) %>%
filter(pos=="NOUN"| pos=="VERB" | pos=="ADJ"| pos=="ADP"| pos=="DET" | pos=="PRON") %>%
summarise(pos_count=n()) %>%
left_join(d_flowers_utt_length) %>%
mutate(prop_pos=pos_count/total_num_words)
## `summarise()` has grouped output by 'gameId', 'blockNum', 'trialNum', 'repNum', 'playerId'. You can override using the `.groups` argument.
## Joining, by = c("gameId", "blockNum", "trialNum", "repNum", "playerId")
ggplot(d_flowers_pos,
aes(x=trialNum, y=prop_pos, label=playerId)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_wrap(~pos, nrow=1)+
xlab("Round number") +
ylab("Proportion of pos") +
theme_bw()
ttr <- function(text_){
text_ <- tolower(text_)
text_ <- gsub('[[:punct:] ]+',' ',text_)
total_words = str_count(text_, "\\W+") + 1
no_duplicates = vapply(lapply(strsplit(text_, " "), unique), paste, character(1L), collapse = " ")
total_unique = str_count(no_duplicates, "\\W+") + 1
ttr_ = total_unique/total_words
return(ttr_)
}
d_flowers_text <- d_flowers %>%
group_by(gameId, blockNum, trialNum, repNum, playerId) %>%
mutate(text_block = paste0(text, collapse = " ")) %>%
select(-c(text)) %>%
distinct()
d_flowers_ttr <- d_flowers_text %>%
mutate(ttr_ = ttr(text_block))
#t<-str_split(text_, " ", 1:6)
#y<- paste(unlist(lapply(t,head,n=5)), collapse=" ")
ggplot(d_flowers_ttr,
aes(x=trialNum, y=ttr_, label=playerId)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_grid(rows =vars(gameId)) +
xlab("Round number") +
ylab("Type_token ratio") +
theme_bw()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, trialNum, gameId, blockNum) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_game <- d_flowers %>%
group_by(trialNum, gameId, blockNum) %>%
summarise(n_utt_game=n())
## `summarise()` has grouped output by 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_n_utt_dist <- d_flowers_n_utt_speaker %>%
left_join(d_flowers_n_utt_game) %>%
mutate(overall_turns=n_utt_speaker/n_utt_game)
## Joining, by = c("trialNum", "gameId", "blockNum")
ggplot(d_flowers_n_utt_dist,
aes(x=trialNum, y=overall_turns, label=playerId)) +
geom_smooth(method = "lm", formula = y~x) +
geom_point(alpha=.1) +
facet_wrap(~playerId)+
xlab("Round number") +
ylab("Proportion of turns") +
theme_bw()
d_flowers_n_utt_speaker <- d_flowers %>%
group_by(playerId, trialNum, gameId, blockNum) %>%
summarise(n_utt_speaker=n())
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId'. You can override using the `.groups` argument.
d_flowers_q <- d_flowers %>%
mutate(sentence_type = ifelse(str_detect(text, "\\?"),
"question",
"other")) %>%
group_by(playerId, trialNum, gameId, blockNum, sentence_type) %>%
summarise(n_sentence_Type=n()) %>%
filter(sentence_type=="question") %>%
left_join(d_flowers_n_utt_speaker) %>%
mutate(prop_questions = n_sentence_Type/n_utt_speaker )
## `summarise()` has grouped output by 'playerId', 'trialNum', 'gameId', 'blockNum'. You can override using the `.groups` argument.
## Joining, by = c("playerId", "trialNum", "gameId", "blockNum")
ggplot(d_flowers_q,
aes(x=trialNum, y=prop_questions, label=playerId)) +
geom_smooth(method = "lm", formula = y~x) +
facet_wrap(~ playerId) +
geom_point(alpha=.1) +
xlab("Round number") +
ylab("Proportion of questions") +
theme_bw()