data_AS_WS <- read.delim("data/ASD CDI data/mci_sentences02.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcs_vc_total != "999")
# extracting first row as a descriptive dataframe
description_WS <- data_AS_WS[1:1,]
description_WS <- as.data.frame(t(description_WS))
names(description_WS) <- "description"
# what we get rid of:
eliminated_WS <- data_AS_WS %>%
dplyr::select(c(701:709,780:850)) %>%
slice(1:1) %>%
gather(key = "Column names", value = "Description")#%>%
#head(10)
# grammar items (past tense, future, not present, etc.)
description_WS[701:709,]
# complexity and examples (e.g. longest sentences)
description_WS[780:850,]
#using mci_sentences02_id as a distinctive id and The NDAR Global Unique Identifier
data_raw_AS_WS <- data_AS_WS %>%
dplyr::select(c("mci_sentences02_id","subjectkey","interview_age",
"collection_id", "dataset_id", "interview_date",
"src_subject_id", "sex", 21:779)) %>% # starting from 785 is complexity. we kept vocabs, word endings, word forms.
dplyr::select(-(689:697))
#dplyr::select(-(684:692)) # before adding interview_age -> sex...
colnames(data_raw_AS_WS) <- as.character(unlist(data_raw_AS_WS[1,])) #unlist the row
data_raw_AS_WS = data_raw_AS_WS[-1, ]
data_raw_AS_WS <- data_raw_AS_WS %>%
rename(id = "mci_sentences02_id",
GUID = "The NDAR Global Unique Identifier (GUID) for research subject",
age = "Age in months at the time of the interview/test/sampling/imaging.",
test_date = "Date on which the interview/genetic test/sampling/imaging/biospecimen was completed. MM/DD/YYYY",
src_subject_id = "Subject ID how it's defined in lab/project",
sex = "Sex of the subject") %>%
mutate(age = as.numeric(as.character(age)))
data_clean_AS_WS <- data_raw_AS_WS %>%
gather(key = "definition", value = "value",
-c(id,GUID,age,collection_id,dataset_id,test_date,src_subject_id,sex)) %>%
separate(definition, c("category","definition"),sep = "\\. ") %>%
mutate_all(na_if,"",)%>% #if blank then fill in NA
mutate(value = ifelse(value == 0, FALSE, TRUE))
data_all_AS_WS <- data_clean_AS_WS %>%
group_by(category, definition, age) %>%
summarise(num_true = sum(value, na.rm = TRUE),
num_false = n() - num_true,
prop = num_true/n())
# unique(data_all_AS_WS$category)
data_clean_AS_WS <- data_clean_AS_WS %>% filter(category!="Word forms, noun",
category!="Word forms, verbs",
category!="Word endings, nouns",
category!="Word endings, verbs") %>%
mutate(category = case_when(category=="Sound Effects and Animal Sounds" ~ "sounds",
category=="Animals (Real or Toy)" ~ "animals",
category=="Vehicles (Real or Toy)" ~ "vehicles",
category=="Food and Drink" ~ "food_drink",
category=="Clothinq" ~ "clothing",
category=="Body Parts" ~ "body_parts",
category=="Small Household Items" ~ "household",
category=="Furniture and Rooms" ~ "furniture_rooms",
category=="Outside Thlnqs" ~ "outside",
category=="Action Words" ~ "action_words",
category=="Places to Go" ~ "places",
category=="Helping Verbs" ~ "helping_verbs",
category=="Connectinq Words" ~ "connecting_words",
category=="Descriptive Words" ~ "descriptive_words",
category=="Words About Time" ~ "time_words",
category=="Quantifiers and Articles" ~ "quantifiers",
category=="Games and Routines" ~ "games_routines",
category=="Question Words" ~ "question_words",
category=="Prepositions and Locations" ~ "locations",
category=="Helpinq Verbs" ~ "helping_verbs",
TRUE ~ category)) %>%
mutate(category = tolower(category),
definition = case_when(definition=="baa" ~ "baa baa",
definition=="cockadoodle" ~ "cockadoodledoo",
definition=="quack" ~ "quack quack",
definition=="uhoh" ~ "uh oh",
definition=="woof" ~ "woof woof",
definition=="yum" ~ "yum yum",
definition=="chicken" & category=="food_drink" ~ "chicken (food)",
definition=="chicken" ~ "chicken (animal)",
definition=="fish" & category=="food_drink" ~ "fish (food)",
definition=="fish" ~ "fish (animal)",
definition=="playdough" ~ "play dough",
definition=="vagina" ~ "vagina*",
definition=="penis" ~ "penis*",
definition=="frenchfries" ~ "french fries",
definition=="greenbeans" ~ "green beans",
definition=="toy" ~ "toy (object)",
definition=="drink" & category=="action_words" ~ "drink (action)",
definition=="drink" ~ "drink (beverage)",
definition=="gasstation" ~ "gas station",
definition=="orange" & category=="food_drink" ~ "orange (food)",
definition=="orange" ~ "orange (description)",
definition=="allgone" ~ "all gone",
definition=="water" & category=="food_drink" ~ "water (beverage)",
definition=="water" ~ "water (not beverage)",
definition=="feet" ~ "foot",
definition=="callph" ~ "call (on phone)",
definition=="clean" & category=="action_words" ~ "clean (action)",
definition=="clean" ~ "clean (description)",
definition=="owie booboo" ~ "owie/boo boo",
definition=="dont" ~ "don't",
definition=="5bowl" ~ "bowl",
definition=="can" & category=="household" ~ "can (object)",
definition=="can" & category=="helping_verbs" ~ "can (auxiliary)",
definition=="rockingchair" ~ "rocking chair",
definition=="alot" ~ "a lot",
definition=="buttocks/bottom" ~ "buttocks/bottom*",
definition=="daddy" ~ "daddy*",
definition=="childname" ~ "child's own name",
definition=="washingmachine" ~ "washing machine",
definition=="try" ~ "try/try to",
definition=="work" & category=="places" ~ "work (place)",
definition=="work" ~ "work (action)",
definition=="giveme five" ~ "give me five!",
definition=="mommy" ~ "mommy*",
definition=="grandma" ~ "grandma*",
definition=="church" ~ "church*",
definition=="daddy" ~ "daddy*",
definition=="grandpa" ~ "grandpa*",
definition=="patty cake" ~ "pattycake",
definition=="dry" & category=="action_words" ~ "dry (action)",
definition=="dry" ~ "dry (description)",
definition=="lemme" ~ "lemme/let me",
definition=="tissklee" ~ "tissue/kleenex",
definition=="did" ~ "did/did ya",
definition=="gonna get you" ~ "gonna get you!",
definition=="peanutbutter" ~ "peanut butter",
definition=="playpen" ~ "play pen",
definition=="potatochip" ~ "potato chip",
definition=="wanna" ~ "wanna/want to",
definition=="watch" & category=="action_words" ~ "watch (action)",
definition=="watch" ~ "watch (object)",
definition=="dress" ~ "dress (object)",
definition=="gonna" ~ "gonna/going to",
definition=="gotta" ~ "gotta/got to",
definition=="hafta" ~ "hafta/have to",
definition=="highchair" ~ "high chair",
definition=="lawnmower" ~ "lawn mower",
definition=="little" ~ "little (description)",
definition=="petname" ~ "pet's name",
definition=="so big" ~ "so big!",
definition=="need" ~ "need/need to",
definition=="shush" ~ "shh/shush/hush",
definition=="swing" & category=="action_words" ~ "swing (action)",
definition=="swing" ~ "swing (object)",
definition=="slide" & category=="action_words" ~ "slide (action)",
definition=="slide" ~ "slide (object)",
TRUE ~ definition))
save(data_clean_AS_WS, file="data/ASD_WS.Rdata")
summary(data_all_AS_WS)
Note: found a problem in the NDAR description file – mcg_vc18_back is given the definition “backyard” (making a duplicate) instead of “back”. Also, “throw” (col 517) was left out of previous semantic network growth analyses.
data_AS_WG <- read.delim("data/ASD CDI data/mci_words_gestures01.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcg_vc_totcom != "999")
# filter(mcg_vc_totpr != "999") - for production?
# extracting first row as a descriptive dataframe
description_WG <- data_AS_WG[1:1,]
description_WG <- as.data.frame(t(description_WG))
names(description_WG) <- "description"
# we only kept vocab
eliminated_WG <- data_AS_WG %>%
dplyr::select(c(23:58,454:520))
#using mci_words_gestures01_id as a distinctive id and The NDAR Global Unique Identifier
data_raw_AS_WG <- data_AS_WG %>%
dplyr::select(c("collection_id","dataset_id","sex","mci_words_gestures01_id","subjectkey","interview_age", 59:453, 517))
colnames(data_raw_AS_WG) <- as.character(unlist(data_raw_AS_WG[1,])) #unlist the row
data_raw_AS_WG = data_raw_AS_WG[-1, ]
# what are the duplicated
AS_WG_duplicated <- data_raw_AS_WG[duplicated(colnames(data_raw_AS_WG))] # can call colnames
# making column names unique
names(data_raw_AS_WG) <- make.unique(names(data_raw_AS_WG), sep="_")
data_raw_AS_WG <- data_raw_AS_WG %>%
rename(id = "mci_words_gestures01_id",
GUID = "The NDAR Global Unique Identifier (GUID) for research subject",
age = "Age in months at the time of the interview/test/sampling/imaging.",
house = "MacArthur Words and Gestures: Vocabulary Checklist: House",
sex = "Sex of the subject") %>%
mutate(age = as.numeric(as.character(age)))
data_clean_AS_WG <- data_raw_AS_WG %>%
gather(key = "definition", value = "value",
-c(id,GUID,age,sex,dataset_id,collection_id)) %>%
#separate(definition, c("category","definition"),sep = "\\. ") %>%
mutate_all(na_if,"",) %>% #if blank then fill in NA
mutate(value = ifelse(value == 0, FALSE, TRUE))
data_all_AS_WG <- data_clean_AS_WG %>%
group_by(definition, age) %>%
summarise(num_true = sum(value, na.rm = TRUE),
num_false = n() - num_true,
prop = num_true/n())
data_clean_AS_WG <- data_clean_AS_WG %>%
mutate(definition = case_when(
definition=="bye or bye bye" ~ "bye",
definition=="chicken" ~ "chicken (animal)",
definition=="chicken_1" ~ "chicken (food)",
definition=="peek-a-boo" ~ "peekaboo",
definition=="water" ~ "water (beverage)",
definition=="water_1" ~ "water (not beverage)",
definition=="church" ~ "church*",
definition=="clean" ~ "clean (action)",
definition=="clean_1" ~ "clean (description)",
definition=="daddy" ~ "daddy*",
definition=="dress" ~ "dress (object)",
definition=="towl" ~ "towel",
definition=="grandpa" ~ "grandpa*",
definition=="grandma" ~ "grandma*",
definition=="mommy" ~ "mommy*",
definition=="owie/ boo boo" ~ "owie/boo boo",
definition=="little" ~ "little (description)",
definition=="drink" ~ "drink (beverage)",
definition=="drink_1" ~ "drink (action)",
definition=="dry" ~ "dry (description)",
definition=="fire truck" ~ "firetruck",
definition=="fish" ~ "fish (animal)",
definition=="fish_1" ~ "fish (food)",
definition=="toy" ~ "toy (object)",
definition=="teddy bear" ~ "teddybear",
definition=="swing" ~ "swing (object)",
definition=="swing_1" ~ "swing (action)",
definition=="work" ~ "work (place)",
definition=="orange" ~ "orange (food)",
definition=="patty cake" ~ "pattycake",
definition=="slide" ~ "slide (object)",
definition=="watch" ~ "watch (object)",
definition=="watch_1" ~ "watch (action)",
definition=="backyard_1" ~ "back", # "mcg_vc18_back"
TRUE ~ definition
))
save(data_clean_AS_WG, file="data/ASD_WG.Rdata")
summary(data_all_AS_WG)
We need to consider which ASD studies are relevant to include (e.g., some may be from more severe cases than others), as well as which have oddities (e.g., duplicate/mismatched subject IDs). At a first glance at the summary by dataset_id, a few of the datasets have all 0 production scores (e.g., dataset_id = c(9137, 17935, 21697, 17999, 17151)). Are these true 0 CDI scores, or are the CDI data for these studies somehow missing? As these studies comprise 1170 participants (of our 4488 total ASD sample), it is important to know what’s going on, and whether or not they should be excluded. I also note that studies 17935 and 21697 suspiciously have the same age range, mean_age, and number of subjects – are these 194 subjects duplicated under different dataset_ids?
For now, I remove all of the datasets without any non-zero production scores.
load("data/ASD_WS.Rdata") # data_clean_AS_WS
load("data/ASD_WG.Rdata") # data_clean_AS_WG
asd_ws <- data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
summarise(production = sum(value, na.rm=T))
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
asd_wg <- data_clean_AS_WG %>% group_by(id, age) %>%
summarise(production = sum(value, na.rm=T))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
# remove extreme ages
#data_clean_AS_WS <- data_clean_AS_WS %>%
# filter(age>=12, age<=48)
#data_clean_AS_WG <- data_clean_AS_WG %>%
# filter(age>=8, age<=36)
data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
summarise(production = sum(value, na.rm=T)) %>%
group_by(dataset_id) %>%
summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
n = n(), mean_prod = mean(production)) %>%
arrange(desc(n)) %>%
kable(format = "html", table.attr = "style='width:50%;'", digits=1)
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
dataset_id | min_age | max_age | mean_age | n | mean_prod |
---|---|---|---|---|---|
9137 | 1 | 48 | 32.7 | 657 | 0.0 |
17679 | 12 | 39 | 25.9 | 467 | 304.7 |
15459 | 12 | 39 | 25.4 | 445 | 293.5 |
14572 | 12 | 39 | 25.4 | 435 | 292.4 |
12399 | 12 | 39 | 25.3 | 399 | 285.4 |
11395 | 12 | 38 | 24.5 | 320 | 276.9 |
10564 | 12 | 37 | 20.0 | 314 | 164.0 |
11014 | 17 | 122 | 58.0 | 256 | 433.3 |
11259 | 12 | 38 | 21.9 | 205 | 222.3 |
17935 | 17 | 34 | 22.4 | 194 | 0.0 |
21697 | 17 | 34 | 22.4 | 194 | 0.0 |
9961 | 12 | 26 | 18.6 | 125 | 125.4 |
18066 | 23 | 60 | 30.1 | 115 | 375.9 |
17999 | 17 | 26 | 19.9 | 113 | 0.0 |
7881 | 23 | 47 | 29.6 | 90 | 78.4 |
9960 | 12 | 20 | 14.8 | 53 | 59.0 |
13475 | 19 | 64 | 36.5 | 37 | 370.5 |
12608 | 19 | 64 | 37.1 | 35 | 380.0 |
9959 | 12 | 14 | 12.5 | 22 | 10.8 |
17151 | 18 | 24 | 21.0 | 12 | 0.0 |
# remove datasets with all 0 production
bad_ws_datasets = c(9137, 17935, 21697, 17999, 17151)
asd_ws <- asd_ws %>% filter(!is.element(dataset_id, bad_ws_datasets))
# dim(subset(asd_ws, production==0)) # now only 158/3318 children aren't producing any words
Note that there are many CDI:WS administrations for ASD children outside the intended age range (16-30 months): 352 children aged <16 months (mean production = 22 words), and 894 children aged >30 months (mean production = 476 words).
Similarly, there are many CDI:WG administrations for ASD children outside the intended age range (8-16 months): 12 children aged <12 months (mean production = 2 words), and 4354 children aged >16 months (mean production = 59 words).
We will constrain our analysis to those children close to the intended age ranges, extending a wider margin for older children to capture the attenuated language learning of children with ASD. Thus, for the CDI:WS we include 3148 children aged 12-48 months (removing only 5.1% of the data). For the CDI:WG we include 7862 children aged 8-36 months (removing only 7.4% of the data).
## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.
Production sumscores on the CDI:WS for TD vs. ASD children.
9 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 158 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.
mod_dev_group <- fit_mod_intuitive(d_mat, d_group)
save(mod_dev_group, file="data/prodWS_IRT_model.Rds")
load("data/prodWS_IRT_model.Rds") # 12-48 mos ASD
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat),
# plotName="GLIMMER_asd_prodWS_age12-48mos")
#load("data/prodWS_IRT_model_16-48mos.Rds")
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat),
# plotName="GLIMMER_asd_prodWS_age16-48mos")
# doesn't make a difference whether we exclude 12-15 month-old ASD kids or not
mm_asd <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))
## Joining, by = c("a1", "definition")
#dif_hist <- item_difficulty_difference_histogram(mm_asd)
thresh = median(mm_asd$d_diff_abs) + 2*sd(mm_asd$d_diff_abs)
big_dif = mm_asd[which(mm_asd$d_diff_abs > thresh),]
dif_histn <- item_difficulty_difference_histogram(mm_asd, withNormal = T)
print(dif_histn)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The majority of items are easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 3.21; red dotted lines in histogram). These 33 extrema are listed below.
big_dif <- big_dif %>% left_join(wb_items)
## Joining, by = "definition"
big_dif %>% arrange(d_diff) %>%
relocate(category, definition) %>%
rename(d_TD=d_g1, d_ASD=d_g2) %>%
dplyr::select(-a1, -group1, -group2) %>%
kable(format = "html", table.attr = "style='width:50%;'", digits=2)
category | definition | d_TD | d_ASD | d_diff | d_diff_abs |
---|---|---|---|---|---|
people | mommy* | 6.68 | 0.62 | -6.06 | 6.06 |
people | daddy* | 6.46 | 1.94 | -4.52 | 4.52 |
people | baby | 3.85 | 0.27 | -3.58 | 3.58 |
action_words | cover | -1.90 | 1.32 | 3.22 | 3.22 |
places | playground | -1.73 | 1.51 | 3.24 | 3.24 |
people | girl | 0.09 | 3.40 | 3.31 | 3.31 |
action_words | give | -0.83 | 2.50 | 3.34 | 3.34 |
action_words | pretend | -3.09 | 0.25 | 3.34 | 3.34 |
animals | penguin | -1.25 | 2.14 | 3.39 | 3.39 |
action_words | pick | -2.00 | 1.42 | 3.42 | 3.42 |
action_words | hurry | -1.56 | 1.86 | 3.43 | 3.43 |
action_words | listen | -1.90 | 1.53 | 3.43 | 3.43 |
action_words | make | -1.35 | 2.09 | 3.44 | 3.44 |
people | aunt | -0.72 | 2.75 | 3.47 | 3.47 |
games_routines | go potty | 0.70 | 4.22 | 3.52 | 3.52 |
action_words | chase | -1.86 | 1.69 | 3.55 | 3.55 |
people | person | -3.74 | -0.06 | 3.68 | 3.68 |
places | movie | -1.46 | 2.31 | 3.78 | 3.78 |
games_routines | so big! | -1.01 | 2.83 | 3.84 | 3.84 |
places | gas station | -2.25 | 1.62 | 3.88 | 3.88 |
people | babysitter | -3.43 | 0.48 | 3.92 | 3.92 |
people | uncle | -0.89 | 3.09 | 3.99 | 3.99 |
furniture_rooms | basement | -3.65 | 0.37 | 4.02 | 4.02 |
places | woods | -3.73 | 0.29 | 4.02 | 4.02 |
action_words | rip | -3.10 | 0.97 | 4.07 | 4.07 |
action_words | hate | -4.32 | -0.17 | 4.15 | 4.15 |
games_routines | breakfast | -0.03 | 4.15 | 4.18 | 4.18 |
games_routines | turn around | -1.02 | 3.25 | 4.27 | 4.27 |
action_words | dump | -2.48 | 2.02 | 4.50 | 4.50 |
action_words | skate | -3.07 | 1.46 | 4.53 | 4.53 |
people | nurse | -3.59 | 1.50 | 5.09 | 5.09 |
people | man | -0.57 | 4.59 | 5.15 | 5.15 |
people | cowboy | -3.07 | 3.85 | 6.92 | 6.92 |
sort(table(big_dif$category)) %>% kable(col.names=c("Category","Frequency"))
Category | Frequency |
---|---|
animals | 1 |
furniture_rooms | 1 |
games_routines | 4 |
places | 4 |
people | 11 |
action_words | 12 |
Mostly people and action words.
data_clean_AS_WG %>% group_by(id, age, sex, dataset_id) %>%
summarise(comprehension = sum(value, na.rm=T)) %>%
group_by(dataset_id) %>%
summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
n = n(), mean_comp = mean(comprehension)) %>%
arrange(desc(n)) %>%
kable(format = "html", table.attr = "style='width:50%;'", digits=1)
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
dataset_id | min_age | max_age | mean_age | n | mean_comp |
---|---|---|---|---|---|
12095 | 17 | 22 | 18.0 | 918 | 0.0 |
9138 | 3 | 48 | 22.9 | 876 | 0.0 |
14747 | 11 | 38 | 18.3 | 706 | 151.3 |
16641 | 11 | 38 | 18.3 | 706 | 151.3 |
10754 | 0 | 45 | 17.8 | 420 | 0.0 |
11574 | 0 | 45 | 17.8 | 420 | 0.0 |
10564 | 8 | 20 | 11.8 | 354 | 48.4 |
11014 | 13 | 132 | 57.5 | 290 | 185.3 |
8703 | 8 | 63 | 38.0 | 283 | 0.0 |
8667 | 11 | 48 | 27.5 | 268 | 0.0 |
12399 | 8 | 20 | 11.6 | 226 | 46.6 |
14572 | 8 | 20 | 11.6 | 226 | 46.6 |
15459 | 8 | 20 | 11.6 | 226 | 46.6 |
17679 | 8 | 20 | 11.6 | 226 | 46.6 |
11395 | 8 | 20 | 11.6 | 223 | 46.8 |
18066 | 9 | 21 | 13.0 | 216 | 0.0 |
14458 | 7 | 21 | 12.9 | 208 | 0.0 |
11259 | 8 | 20 | 11.7 | 201 | 46.2 |
17999 | 11 | 20 | 15.1 | 196 | 0.0 |
12942 | 7 | 21 | 12.6 | 194 | 0.0 |
11717 | 7 | 21 | 12.1 | 163 | 0.0 |
9961 | 8 | 20 | 12.1 | 162 | 48.2 |
8368 | 10 | 18 | 13.7 | 138 | 0.0 |
17935 | 12 | 22 | 16.2 | 112 | 0.0 |
8176 | 15 | 30 | 24.3 | 88 | 0.0 |
8370 | 15 | 30 | 24.3 | 88 | 0.0 |
9960 | 7 | 20 | 13.5 | 69 | 119.5 |
16640 | 12 | 29 | 20.9 | 61 | 205.6 |
18941 | 8 | 19 | 12.0 | 50 | 38.6 |
9959 | 7 | 14 | 10.8 | 44 | 59.6 |
8535 | 8 | 60 | 32.4 | 20 | 0.0 |
9507 | 49 | 85 | 68.6 | 20 | 0.0 |
12891 | 27 | 53 | 35.2 | 18 | 0.0 |
18703 | 59 | 101 | 76.1 | 18 | 171.2 |
10971 | 60 | 173 | 86.2 | 13 | 131.2 |
10335 | 60 | 109 | 77.7 | 10 | 136.6 |
11805 | 11 | 14 | 12.3 | 10 | 48.6 |
12820 | 12 | 12 | 12.0 | 7 | 0.0 |
9777 | 62 | 101 | 76.6 | 7 | 122.7 |
12790 | 74 | 124 | 106.3 | 3 | 0.0 |
8532 | 24 | 24 | 24.0 | 1 | 0.0 |
8853 | 12 | 12 | 12.0 | 1 | 10.0 |
8912 | 42 | 42 | 42.0 | 1 | 0.0 |
8922 | 22 | 22 | 22.0 | 1 | 0.0 |
Datasets with all 0 comprehension scores: c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922) 10754 and 11574 have identical age ranges, mean ages, and Ns, as do 8176 and 8370. 16641 and 14747 have identical age ranges, mean ages, Ns, and average comprehension scores. After removing all of the 0-comprehension datasets, there are only 3829 participants.
bad_asd_wg_datasets = c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922)
data_clean_AS_WG <- data_clean_AS_WG %>% filter(!is.element(dataset_id, bad_asd_wg_datasets))
## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.
241 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 1058 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.
Production sumscores on the CDI:WG for TD vs. ASD children.
mod_dev_group <- fit_mod_intuitive(d_mat_wg, d_group)
save(mod_dev_group, file="data/prodWG_IRT_model.Rds")
load("data/prodWG_IRT_model.Rds") # 8-36mo ASD
#plot_glimmer(mod_dev_group, colnames(d_mat_wg), colnames(d_mat_wg),
# plotName="GLIMMER_asd_prodWG_age8-36mos")
mm_asd_wg <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))
## Joining, by = c("a1", "definition")
#dif_hist <- item_difficulty_difference_histogram(mm_asd)
thresh_wg = median(mm_asd_wg$d_diff_abs) + 2*sd(mm_asd_wg$d_diff_abs)
big_dif_wg = mm_asd_wg[which(mm_asd_wg$d_diff_abs > thresh_wg),]
dif_histn_wg <- item_difficulty_difference_histogram(mm_asd_wg, withNormal = T)
print(dif_histn_wg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The majority of the items are much easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 7.88; red dotted lines in histogram). These 3 extrema are listed below.
big_dif_wg <- big_dif_wg %>% left_join(wg_items)
## Joining, by = "definition"
big_dif_wg %>% arrange(d_diff) %>%
relocate(category, definition) %>%
rename(d_TD=d_g1, d_ASD=d_g2) %>%
dplyr::select(-a1, -group1, -group2) %>%
kable(format = "html", table.attr = "style='width:50%;'", digits=2)
category | definition | d_TD | d_ASD | d_diff | d_diff_abs |
---|---|---|---|---|---|
descriptive_words | old | -7.79 | 0.25 | 8.04 | 8.04 |
pronouns | her | -7.60 | 0.45 | 8.04 | 8.04 |
question_words | when | -8.32 | 0.00 | 8.32 | 8.32 |