calc_age <- function(birthDate, refDate = Sys.Date()) {
require(lubridate)
period <- as.period(interval(birthDate, refDate),
unit = "day")
round(period$day/365, 2)
}
loading and cleaning data
data_path <- "../data/raw_data/v3_lookit"
file <- "polcon_lookit_raw_data.csv"
# file <- "test.csv"
d_raw <- read_csv(here::here(data_path, file))
ord <- read_csv(here::here("../data/info/v3/polcon_order.csv"))
d_tidy <- d_raw %>%
# filter(response_completed == "True") %>%
# filter(participant_id == "3434" | participant_id == "3432") %>%
filter(!grepl("test", child_additional_information, ignore.case=TRUE)) %>%
# mutate(order = case_when(
# grepl("'conditionNum': 0", substr(response_conditions, 1, 108)) ~ "1",
# grepl("'conditionNum': 1", substr(response_conditions, 1, 108)) ~"2"
# )) %>%
mutate(response_conditions = gsub("\'","\"",response_conditions)) %>%
mutate(response_exp_data = gsub("\'","\"",response_exp_data),
response_exp_data = gsub("None","\"None\"",response_exp_data),
response_exp_data = gsub("True","\"True\"",response_exp_data),
response_exp_data = gsub("False","\"False\"",response_exp_data),
response_exp_data = gsub("\"s ","\'s ",response_exp_data),
response_exp_data = gsub("\"rude\"","\\'rude\\'",response_exp_data),
response_exp_data = gsub("I\"m ","I\'m ",response_exp_data),
response_exp_data = gsub("I\"d ","I\'d ",response_exp_data),
response_exp_data = gsub("they\"d ","they\'d ",response_exp_data),
response_exp_data = gsub("n\"t ","n\'t ",response_exp_data),
response_exp_data = gsub("\"please\"","\'please\'",response_exp_data),
response_exp_data = gsub("\"can ","\'can ",response_exp_data),
response_exp_data = gsub("please\".\"","please\'.\"",response_exp_data),
response_exp_data = gsub("\"\"None\"\"","\"None\"", response_exp_data),
response_exp_data = gsub("\"Next\"","\\'Next\\'",response_exp_data)
) %>%
select(response_id, response_conditions, child_birthday, response_exp_data) %>%
rename(subid = response_id) %>%
mutate(
# order 1 = cond 0, order 2 = cond 1
order_practice =
jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_conditions)))$`5-practice-trial`$conditionNum,
order_test =
jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_conditions)))$`6-nested-trials`$conditionNum,
dot =
jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`0-0-video-config`$eventTimings,
practice1 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`9-9-practice-trial`$currentlyHighlighted) %>%
filter(!is.na(practice1)) %>%
mutate(
practice2 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`10-10-practice-trial`$currentlyHighlighted,
test1 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`16-16-nested-trials`$currentlyHighlighted,
test2 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`22-22-nested-trials`$currentlyHighlighted,
test3 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`28-28-nested-trials`$currentlyHighlighted,
test4 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`34-34-nested-trials`$currentlyHighlighted,
test5 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`40-40-nested-trials`$currentlyHighlighted,
test6 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`46-46-nested-trials`$currentlyHighlighted,
test7 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`52-52-nested-trials`$currentlyHighlighted,
test8 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`58-58-nested-trials`$currentlyHighlighted,
test9 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`64-64-nested-trials`$currentlyHighlighted,
test10 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`70-70-nested-trials`$currentlyHighlighted,
test11 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`76-76-nested-trials`$currentlyHighlighted,
test12 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`82-82-nested-trials`$currentlyHighlighted
) %>%
rowwise() %>%
mutate(dot = as.Date(substr(dot$timestamp[1], 1, 10))) %>%
filter(dot > "2018-04-05") %>%
mutate(age = calc_age(child_birthday, dot))
d_tidy %>%
ggplot(., aes(practice1)) +
geom_bar() +
facet_grid(floor(age)~order_practice)
d_tidy %>%
ggplot(., aes(practice2)) +
geom_bar() +
facet_grid(floor(age)~order_practice)
d <- d_tidy %>%
filter(!is.na(test6)) %>%
filter((order_practice == 0 & practice1 == "speaker1" & practice2 == "speaker2") |
(order_practice == 1 & practice1 == "speaker2" & practice2 == "speaker1")) %>%
select(-order_practice, -practice1, -practice2) %>%
gather(trial_num, choice, test1:test12) %>%
mutate(trial_num = as.numeric(as.character(substr(trial_num, 5, 6)))) %>%
left_join(., ord) %>%
mutate(correct = as.numeric(as.character(case_when(
choice == correct_answer ~ "1",
choice != correct_answer ~ "0",
TRUE ~ "NA"
))))
write.csv(d, here::here("../data/polcon_v3_lookit_processed.csv"), row.names = FALSE)
d <- read_csv(here::here("../data/polcon_v3_lookit_processed.csv"))
## Parsed with column specification:
## cols(
## subid = col_integer(),
## response_conditions = col_character(),
## child_birthday = col_date(format = ""),
## response_exp_data = col_character(),
## order_test = col_integer(),
## dot = col_date(format = ""),
## age = col_double(),
## trial_num = col_integer(),
## choice = col_character(),
## question = col_character(),
## correct_answer = col_character(),
## request_type = col_character(),
## correct = col_integer()
## )
d %>%
mutate(agebin = floor(age)) %>%
group_by(agebin, subid) %>%
summarise(n=n()) %>%
group_by(agebin) %>%
summarise(n=n())
## # A tibble: 3 x 2
## agebin n
## <dbl> <int>
## 1 2 21
## 2 3 31
## 3 4 24
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, question, subid) %>%
summarise(correct = mean(correct)) %>%
mutate(agebin = floor(age)) %>%
group_by(agebin, question) %>%
multi_boot_standard(col="correct", na.rm=TRUE) %>%
rename(correct = mean) %>%
ggplot(aes(x=agebin, y=correct, fill = agebin)) +
geom_bar(stat="identity", position="dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
geom_hline(yintercept = .5, lty=2) +
ylab("mean correct answer") +
facet_grid(.~question)
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, question, subid) %>%
summarise(correct = mean(correct)) %>%
# group_by(question) %>%
# multi_boot_standard(col="correct", na.rm=TRUE) %>%
ggplot(aes(x=age, y=correct)) +
geom_point(aes(col=subid)) +
geom_hline(yintercept = .5, lty=2) +
geom_smooth(size=.4, alpha=.1) +
ylab("mean correct answer") +
facet_grid(.~question)
## `geom_smooth()` using method = 'loess'
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, request_type, subid) %>%
summarise(correct = mean(correct)) %>%
mutate(agebin = floor(age)) %>%
group_by(agebin, request_type) %>%
multi_boot_standard(col="correct", na.rm=TRUE) %>%
rename(correct = mean) %>%
ggplot(aes(x=agebin, y=correct, fill = agebin)) +
geom_bar(stat="identity", position="dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
geom_hline(yintercept = .5, lty=2) +
ylab("mean correct answer") +
facet_grid(.~request_type)
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, request_type, subid) %>%
summarise(correct = mean(correct)) %>%
# group_by(question) %>%
# multi_boot_standard(col="correct", na.rm=TRUE) %>%
ggplot(aes(x=age, y=correct)) +
geom_point(aes(col=subid)) +
geom_hline(yintercept = .5, lty=2) +
geom_smooth(size=.4, alpha=.1) +
ylab("mean correct answer") +
facet_grid(.~request_type)
## `geom_smooth()` using method = 'loess'
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, question, request_type, subid) %>%
summarise(correct = mean(correct)) %>%
mutate(agebin = floor(age)) %>%
group_by(agebin, question, request_type) %>%
multi_boot_standard(col="correct", na.rm=TRUE) %>%
rename(correct = mean) %>%
ggplot(aes(x=agebin, y=correct, fill = agebin)) +
geom_bar(stat="identity", position="dodge") +
geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
geom_hline(yintercept = .5, lty=2) +
ylab("mean correct answer") +
facet_grid(question~request_type)
Do participants improve as they get exposed to more trials?
d %>%
mutate(subid = as.factor(subid)) %>%
filter(!is.na(correct)) %>%
group_by(age, question, trial_num, subid) %>%
summarise(correct = mean(correct, na.rm=TRUE)) %>%
# group_by(question) %>%
# multi_boot_standard(col="correct", na.rm=TRUE) %>%
ggplot(aes(x=trial_num, y=correct)) +
geom_jitter(aes(col=age), height=.1) +
geom_hline(yintercept = .5, lty=2) +
geom_smooth() +
ylab("mean correct answer")
## `geom_smooth()` using method = 'loess'
# facet_grid(.~question)