calc_age <- function(birthDate, refDate = Sys.Date()) {

    require(lubridate)

    period <- as.period(interval(birthDate, refDate),
                        unit = "day")

    round(period$day/365, 2)

}

loading and cleaning data

data_path <- "../data/raw_data/v3_lookit"
file <- "polcon_lookit_raw_data.csv"
# file <- "test.csv"
d_raw <- read_csv(here::here(data_path, file))

ord <- read_csv(here::here("../data/info/v3/polcon_order.csv"))
d_tidy <- d_raw %>%
  # filter(response_completed == "True") %>%
  # filter(participant_id == "3434" | participant_id == "3432") %>%
  filter(!grepl("test", child_additional_information, ignore.case=TRUE)) %>%
  # mutate(order = case_when(
  #   grepl("'conditionNum': 0", substr(response_conditions, 1, 108)) ~ "1",
  #   grepl("'conditionNum': 1", substr(response_conditions, 1, 108)) ~"2"
  #   )) %>%
  mutate(response_conditions = gsub("\'","\"",response_conditions)) %>%
  mutate(response_exp_data = gsub("\'","\"",response_exp_data),
         response_exp_data = gsub("None","\"None\"",response_exp_data),
         response_exp_data = gsub("True","\"True\"",response_exp_data),
         response_exp_data = gsub("False","\"False\"",response_exp_data),
         response_exp_data = gsub("\"s ","\'s ",response_exp_data),
         response_exp_data = gsub("\"rude\"","\\'rude\\'",response_exp_data),
         response_exp_data = gsub("I\"m ","I\'m ",response_exp_data),
         response_exp_data = gsub("I\"d ","I\'d ",response_exp_data),
         response_exp_data = gsub("they\"d ","they\'d ",response_exp_data),
         response_exp_data = gsub("n\"t ","n\'t ",response_exp_data),
         response_exp_data = gsub("\"please\"","\'please\'",response_exp_data),
         response_exp_data = gsub("\"can ","\'can ",response_exp_data),
         response_exp_data = gsub("please\".\"","please\'.\"",response_exp_data),
         response_exp_data = gsub("\"\"None\"\"","\"None\"", response_exp_data),
         response_exp_data = gsub("\"Next\"","\\'Next\\'",response_exp_data)

         ) %>%
  select(response_id, response_conditions, child_birthday, response_exp_data) %>%
  rename(subid = response_id) %>%
  mutate(
    # order 1 = cond 0, order 2 = cond 1
    order_practice =
      jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_conditions)))$`5-practice-trial`$conditionNum,
    order_test = 
      jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_conditions)))$`6-nested-trials`$conditionNum,

    dot = 
      jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`0-0-video-config`$eventTimings,
      
    practice1 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`9-9-practice-trial`$currentlyHighlighted) %>%
  filter(!is.na(practice1)) %>%
  mutate(
    practice2 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`10-10-practice-trial`$currentlyHighlighted,
    test1 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`16-16-nested-trials`$currentlyHighlighted,
    test2 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`22-22-nested-trials`$currentlyHighlighted,
    test3 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`28-28-nested-trials`$currentlyHighlighted,
    test4 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`34-34-nested-trials`$currentlyHighlighted,
    test5 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`40-40-nested-trials`$currentlyHighlighted,
    test6 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`46-46-nested-trials`$currentlyHighlighted,
    test7 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`52-52-nested-trials`$currentlyHighlighted,
    test8 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`58-58-nested-trials`$currentlyHighlighted,
    test9 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`64-64-nested-trials`$currentlyHighlighted,
    test10 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`70-70-nested-trials`$currentlyHighlighted,
    test11 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`76-76-nested-trials`$currentlyHighlighted,
    test12 = jsonlite::stream_in(textConnection(gsub("\\n", "", .$response_exp_data)))$`82-82-nested-trials`$currentlyHighlighted
         ) %>%
    rowwise() %>%
    mutate(dot = as.Date(substr(dot$timestamp[1], 1, 10))) %>%
    filter(dot > "2018-04-05") %>%
    mutate(age = calc_age(child_birthday, dot))
d_tidy %>%
  ggplot(., aes(practice1)) +
  geom_bar() +
  facet_grid(floor(age)~order_practice)

d_tidy %>%
  ggplot(., aes(practice2)) +
  geom_bar() +
  facet_grid(floor(age)~order_practice)
d <- d_tidy %>%
  filter(!is.na(test6)) %>%
  filter((order_practice == 0 & practice1 == "speaker1" & practice2 == "speaker2") | 
           (order_practice == 1 & practice1 == "speaker2" & practice2 == "speaker1")) %>%
  select(-order_practice, -practice1, -practice2) %>%
  gather(trial_num, choice, test1:test12) %>%
  mutate(trial_num = as.numeric(as.character(substr(trial_num, 5, 6)))) %>%
  left_join(., ord) %>%
  mutate(correct = as.numeric(as.character(case_when(
    choice == correct_answer ~ "1",
    choice != correct_answer ~ "0",    
    TRUE ~ "NA"
  ))))

write.csv(d, here::here("../data/polcon_v3_lookit_processed.csv"), row.names = FALSE)

d <- read_csv(here::here("../data/polcon_v3_lookit_processed.csv"))
## Parsed with column specification:
## cols(
##   subid = col_integer(),
##   response_conditions = col_character(),
##   child_birthday = col_date(format = ""),
##   response_exp_data = col_character(),
##   order_test = col_integer(),
##   dot = col_date(format = ""),
##   age = col_double(),
##   trial_num = col_integer(),
##   choice = col_character(),
##   question = col_character(),
##   correct_answer = col_character(),
##   request_type = col_character(),
##   correct = col_integer()
## )
d %>%
  mutate(agebin = floor(age)) %>%
  group_by(agebin, subid) %>%
  summarise(n=n()) %>%
  group_by(agebin) %>%
  summarise(n=n())
## # A tibble: 3 x 2
##   agebin     n
##    <dbl> <int>
## 1      2    21
## 2      3    31
## 3      4    24

confirmatory ana

correct ~ age x question type

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, question, subid) %>%
  summarise(correct = mean(correct)) %>%
  mutate(agebin = floor(age)) %>%
  group_by(agebin, question) %>%
  multi_boot_standard(col="correct", na.rm=TRUE) %>%
  rename(correct = mean) %>%
  ggplot(aes(x=agebin, y=correct, fill = agebin)) +
  geom_bar(stat="identity", position="dodge") +
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
  geom_hline(yintercept = .5, lty=2) +
  ylab("mean correct answer") +
  facet_grid(.~question)

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, question, subid) %>%
  summarise(correct = mean(correct)) %>%
  # group_by(question) %>%
  # multi_boot_standard(col="correct", na.rm=TRUE) %>%
  ggplot(aes(x=age, y=correct)) +
  geom_point(aes(col=subid)) +
  geom_hline(yintercept = .5, lty=2) +
  geom_smooth(size=.4, alpha=.1) +
  ylab("mean correct answer") +
  facet_grid(.~question)
## `geom_smooth()` using method = 'loess'

correct ~ age x request type

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, request_type, subid) %>%
  summarise(correct = mean(correct)) %>%
  mutate(agebin = floor(age)) %>%
  group_by(agebin, request_type) %>%
  multi_boot_standard(col="correct", na.rm=TRUE) %>%
  rename(correct = mean) %>%
  ggplot(aes(x=agebin, y=correct, fill = agebin)) +
  geom_bar(stat="identity", position="dodge") +
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
  geom_hline(yintercept = .5, lty=2) +
  ylab("mean correct answer") +
  facet_grid(.~request_type)

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, request_type, subid) %>%
  summarise(correct = mean(correct)) %>%
  # group_by(question) %>%
  # multi_boot_standard(col="correct", na.rm=TRUE) %>%
  ggplot(aes(x=age, y=correct)) +
  geom_point(aes(col=subid)) +
  geom_hline(yintercept = .5, lty=2) +
  geom_smooth(size=.4, alpha=.1) +
  ylab("mean correct answer") +
  facet_grid(.~request_type)
## `geom_smooth()` using method = 'loess'

correct ~ age x question type x request type

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, question, request_type, subid) %>%
  summarise(correct = mean(correct)) %>%
  mutate(agebin = floor(age)) %>%
  group_by(agebin, question, request_type) %>%
  multi_boot_standard(col="correct", na.rm=TRUE) %>%
  rename(correct = mean) %>%
  ggplot(aes(x=agebin, y=correct, fill = agebin)) +
  geom_bar(stat="identity", position="dodge") +
  geom_linerange(aes(ymin=ci_lower, ymax=ci_upper)) +
  geom_hline(yintercept = .5, lty=2) +
  ylab("mean correct answer") +
  facet_grid(question~request_type)

exploratory ana

Do participants improve as they get exposed to more trials?

d %>%
  mutate(subid = as.factor(subid)) %>%
  filter(!is.na(correct)) %>%
  group_by(age, question, trial_num, subid) %>%
  summarise(correct = mean(correct, na.rm=TRUE)) %>%
  # group_by(question) %>%
  # multi_boot_standard(col="correct", na.rm=TRUE) %>%
  ggplot(aes(x=trial_num, y=correct)) +
  geom_jitter(aes(col=age), height=.1) +
  geom_hline(yintercept = .5, lty=2) +
  geom_smooth() +
  ylab("mean correct answer")
## `geom_smooth()` using method = 'loess'

  # facet_grid(.~question)