Run sheet - demographics

runsheet = read.csv("../data/pilot1_and_2/nextKids_pilot_1_2_subject_deID.csv") %>%
  select(subjCode, Gender, Age, ageBin) %>%
  mutate(age_group = ifelse(ageBin == "5to6", "middle", ifelse(ageBin == "3to4", "young", "old"))) %>%
  mutate(age_group = fct_relevel(age_group, "young", "middle")) %>%
  select(-ageBin) %>%
  rename(subid = subjCode) 

Exclude subjects

p029 was run by nanny; p022 didn’t do next

runsheet = runsheet %>%
  filter(subid != "p029" & subid != "p022") 
runsheet %>%
  group_by(subid) %>%
  group_by(age_group) %>%
  summarize(n = n()) %>%
  kable()
age_group n
young 23
middle 23
old 10

Age Distribution

runsheet %>%
  group_by(subid) %>%
  group_by(age_group) %>%
  ggplot(aes(y= Age, x= age_group, fill = age_group)) +
  geom_boxplot() +
  theme_bw() +
  theme(legend.position = "none")

Practice trials

ptrials = read.csv("../data/pilot1_and_2/nextKids_pilot_1_2_practice_trials.csv")  %>%
    mutate(age_group = fct_relevel(age_group, "young", "middle")) %>%
      mutate(subid = tolower(subid)) 

Exclude subjects

ptrials = ptrials %>%
  filter(subid != "p029" & subid != "p022") # p029 was run by nanny; p022 didn't do next

Proportion practice trials correct

unique.subs = ptrials %>%
  group_by(subid) %>%
  slice(1) %>%
  select(-response, -trial_num, -trial_type) %>%
  group_by(age_group) 

s.prop.correct = ptrials %>%
  mutate(correct = ifelse(response == "banana" | response == "chair", 1, 0)) %>%
  group_by(subid, age_group) %>%
  slice(1:2) %>% # first two trials if repeated multiple times
  summarize(prop_correct = sum(correct)/2)

ggplot(s.prop.correct, aes(x= prop_correct, fill = age_group)) +
  geom_histogram(binwidth = .1) +
  facet_grid(~age_group) +
  theme_bw() +
  theme(legend.position = "none")

Many younger kids missing one of the practice trials.

Vocab data

vocab = read.csv("../data/pilot1_and_2/nextKids_pilot_1_2_parentsurvey.csv")  %>%
  mutate(age_group = fct_relevel(age_group, "young", "middle"),
         subid = tolower(subid))%>%
  filter(subid != "p022") 

Total words

ggplot(vocab, aes(x= total_words, fill = age_group)) +
  geom_histogram() +
  facet_grid(~age_group) +
  theme_bw() +
  theme(legend.position = "none")

ggplot(vocab, aes(y= total_words, x= age_group, fill = age_group)) +
  geom_boxplot() +
  theme_bw() +
  theme(legend.position = "none")

Not sure why we have data for some in old group? Only a few kids.

Word items

concat.vocab = vocab %>%
   unite(vocab, 8:47, remove = TRUE, sep = ",") %>%
   filter(total_words > 0) %>%
   mutate(vocab = gsub('[[:digit:]]+', '', vocab),
         vocab = gsub("[^[:alnum:],]", "", vocab))
long.vocab = data.frame(do.call('rbind', strsplit(as.character(concat.vocab$vocab),',',fixed=TRUE))) %>%
  cbind(subid = concat.vocab$subid) %>%
  gather(var, word, 1:40) %>%
  select(-var) %>%
  mutate(word = as.factor(word)) %>%
  filter(word !="")

vocab.counts = long.vocab %>%
  group_by(word) %>%
  summarize(n = n()) %>%
  mutate(word = fct_reorder(word, n))

ggplot(vocab.counts, aes(x = word, y = n)) +
  geom_bar(stat = "identity") +
  ylab("n_know") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = .3, hjust = 1)) 

Merge all data together

d = runsheet %>%
  full_join(s.prop.correct, by="subid") %>%
  full_join(select(vocab, -8:-47), by = "subid") %>%
  select(exp, subid,date, time, Gender, Age, age_group.x, prop_correct, total_words) %>%
  rename(age_group = age_group.x)

#write.csv(d, "nextKids_1_2_combined_other_data.csv")