Clean ASD CDI:WS data

data_AS_WS <- read.delim("data/ASD CDI data/mci_sentences02.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcs_vc_total != "999")

# extracting first row as a descriptive dataframe
description_WS <- data_AS_WS[1:1,]
description_WS <- as.data.frame(t(description_WS))
names(description_WS) <- "description"


# what we get rid of:
eliminated_WS <- data_AS_WS %>%
  dplyr::select(c(701:709,780:850)) %>%
  slice(1:1) %>%
  gather(key = "Column names", value = "Description")#%>%
  #head(10)

# grammar items (past tense, future, not present, etc.)
description_WS[701:709,]
# complexity and examples (e.g. longest sentences)
description_WS[780:850,]

#using mci_sentences02_id   as a distinctive id and The NDAR Global Unique Identifier 
data_raw_AS_WS <- data_AS_WS %>%
  dplyr::select(c("mci_sentences02_id","subjectkey","interview_age", 
                  "collection_id", "dataset_id", "interview_date", 
                  "src_subject_id", "sex", 21:779)) %>% # starting from 785 is complexity. we kept vocabs, word endings, word forms.
  dplyr::select(-(689:697))
  #dplyr::select(-(684:692)) # before adding interview_age -> sex...

colnames(data_raw_AS_WS) <- as.character(unlist(data_raw_AS_WS[1,])) #unlist the row
data_raw_AS_WS = data_raw_AS_WS[-1, ]

data_raw_AS_WS <- data_raw_AS_WS %>%
  rename(id = "mci_sentences02_id",
         GUID = "The NDAR Global Unique Identifier (GUID) for research subject", 
         age = "Age in months at the time of the interview/test/sampling/imaging.",
         test_date = "Date on which the interview/genetic test/sampling/imaging/biospecimen was completed. MM/DD/YYYY",
         src_subject_id = "Subject ID how it's defined in lab/project",
         sex = "Sex of the subject") %>%
  mutate(age = as.numeric(as.character(age)))

data_clean_AS_WS <- data_raw_AS_WS %>%
  gather(key = "definition", value = "value", 
         -c(id,GUID,age,collection_id,dataset_id,test_date,src_subject_id,sex)) %>%
  separate(definition, c("category","definition"),sep = "\\. ") %>%
  mutate_all(na_if,"",)%>% #if blank then fill in NA
   mutate(value = ifelse(value == 0, FALSE, TRUE))

data_all_AS_WS <- data_clean_AS_WS %>%
     group_by(category, definition, age) %>%
       summarise(num_true = sum(value, na.rm = TRUE),
                 num_false = n() - num_true,
                 prop = num_true/n())

# unique(data_all_AS_WS$category)

data_clean_AS_WS <- data_clean_AS_WS %>% filter(category!="Word forms, noun", 
                                                category!="Word forms, verbs",
                                                category!="Word endings, nouns",
                                                category!="Word endings, verbs") %>%
  mutate(category = case_when(category=="Sound Effects and Animal Sounds" ~ "sounds",
                              category=="Animals (Real or Toy)" ~ "animals",
                              category=="Vehicles (Real or Toy)" ~ "vehicles",
                              category=="Food and Drink" ~ "food_drink",
                              category=="Clothinq" ~ "clothing",
                              category=="Body Parts" ~ "body_parts",
                              category=="Small Household Items" ~ "household",
                              category=="Furniture and Rooms" ~ "furniture_rooms",
                              category=="Outside Thlnqs" ~ "outside",
                              category=="Action Words" ~ "action_words",
                              category=="Places to Go" ~ "places",
                              category=="Helping Verbs" ~ "helping_verbs",
                              category=="Connectinq Words" ~ "connecting_words",
                              category=="Descriptive Words" ~ "descriptive_words",
                              category=="Words About Time" ~ "time_words",
                              category=="Quantifiers and Articles" ~ "quantifiers",
                              category=="Games and Routines" ~ "games_routines",
                              category=="Question Words" ~ "question_words",
                              category=="Prepositions and Locations" ~ "locations",
                              category=="Helpinq Verbs" ~ "helping_verbs",
                              TRUE ~ category)) %>%
  mutate(category = tolower(category), 
         definition = case_when(definition=="baa" ~ "baa baa",
                                definition=="cockadoodle" ~ "cockadoodledoo",
                                definition=="quack" ~ "quack quack",
                                definition=="uhoh" ~ "uh oh",
                                definition=="woof" ~ "woof woof",
                                definition=="yum" ~ "yum yum",
                                definition=="chicken" & category=="food_drink" ~ "chicken (food)",
                                definition=="chicken" ~ "chicken (animal)",
                                definition=="fish" & category=="food_drink" ~ "fish (food)",
                                definition=="fish" ~ "fish (animal)",
                                definition=="playdough" ~ "play dough",
                                definition=="vagina" ~ "vagina*",
                                definition=="penis" ~ "penis*",
                                definition=="frenchfries" ~ "french fries",
                                definition=="greenbeans" ~ "green beans",
                                definition=="toy" ~ "toy (object)",
                                definition=="drink" & category=="action_words" ~ "drink (action)",
                                definition=="drink" ~ "drink (beverage)",
                                definition=="gasstation" ~ "gas station",
                                definition=="orange" & category=="food_drink" ~ "orange (food)",
                                definition=="orange" ~ "orange (description)",
                                definition=="allgone" ~ "all gone",
                                definition=="water" & category=="food_drink" ~ "water (beverage)",
                                definition=="water" ~ "water (not beverage)",
                                definition=="feet" ~ "foot",
                                definition=="callph" ~ "call (on phone)",
                                definition=="clean" & category=="action_words" ~ "clean (action)",
                                definition=="clean" ~ "clean (description)",
                                definition=="owie  booboo" ~ "owie/boo boo",
                                definition=="dont" ~ "don't",
                                definition=="5bowl" ~ "bowl",
                                definition=="can" & category=="household" ~ "can (object)",
                                definition=="can" & category=="helping_verbs" ~ "can (auxiliary)",
                                definition=="rockingchair" ~ "rocking chair",
                                definition=="alot" ~ "a lot",
                                definition=="buttocks/bottom" ~ "buttocks/bottom*",
                                definition=="daddy" ~ "daddy*",
                                definition=="childname" ~ "child's own name",
                                definition=="washingmachine" ~ "washing machine",
                                definition=="try" ~ "try/try to",
                                definition=="work" & category=="places" ~ "work (place)",
                                definition=="work" ~ "work (action)",
                                definition=="giveme five" ~ "give me five!",
                                definition=="mommy" ~ "mommy*",
                                definition=="grandma" ~ "grandma*",
                                definition=="church" ~ "church*",
                                definition=="daddy" ~ "daddy*",
                                definition=="grandpa" ~ "grandpa*",
                                definition=="patty cake" ~ "pattycake",
                                definition=="dry" & category=="action_words" ~ "dry (action)",
                                definition=="dry" ~ "dry (description)",
                                definition=="lemme" ~ "lemme/let me",
                                definition=="tissklee" ~ "tissue/kleenex",
                                definition=="did" ~ "did/did ya",
                                definition=="gonna get  you" ~ "gonna get you!",
                                definition=="peanutbutter" ~ "peanut butter",
                                definition=="playpen" ~ "play pen",
                                definition=="potatochip" ~ "potato chip",
                                definition=="wanna" ~ "wanna/want to",
                                definition=="watch" & category=="action_words" ~ "watch (action)",
                                definition=="watch" ~ "watch (object)",
                                definition=="dress" ~ "dress (object)",
                                definition=="gonna" ~ "gonna/going to",
                                definition=="gotta" ~ "gotta/got to",
                                definition=="hafta" ~ "hafta/have to",
                                definition=="highchair" ~ "high chair",
                                definition=="lawnmower" ~ "lawn mower",
                                definition=="little" ~ "little (description)",
                                definition=="petname" ~ "pet's name",
                                definition=="so big" ~ "so big!",
                                definition=="need" ~ "need/need to",
                                definition=="shush" ~ "shh/shush/hush",
                                definition=="swing" & category=="action_words" ~ "swing (action)",
                                definition=="swing" ~ "swing (object)",
                                definition=="slide" & category=="action_words" ~ "slide (action)",
                                definition=="slide" ~ "slide (object)",
                                TRUE ~ definition))

save(data_clean_AS_WS, file="data/ASD_WS.Rdata")

summary(data_all_AS_WS)

Clean ASD CDI:WG data

Note: found a problem in the NDAR description file – mcg_vc18_back is given the definition “backyard” (making a duplicate) instead of “back”. Also, “throw” (col 517) was left out of previous semantic network growth analyses.

data_AS_WG <- read.delim("data/ASD CDI data/mci_words_gestures01.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcg_vc_totcom != "999")
# filter(mcg_vc_totpr != "999") - for production?

# extracting first row as a descriptive dataframe
description_WG <- data_AS_WG[1:1,]
description_WG <- as.data.frame(t(description_WG))
names(description_WG) <- "description"

# we only kept vocab 
eliminated_WG <- data_AS_WG %>%
  dplyr::select(c(23:58,454:520))

#using mci_words_gestures01_id as a distinctive id and The NDAR Global Unique Identifier 

data_raw_AS_WG <- data_AS_WG %>%
  dplyr::select(c("collection_id","dataset_id","sex","mci_words_gestures01_id","subjectkey","interview_age", 59:453, 517))

colnames(data_raw_AS_WG) <- as.character(unlist(data_raw_AS_WG[1,])) #unlist the row
data_raw_AS_WG = data_raw_AS_WG[-1, ]

# what are the duplicated
AS_WG_duplicated <- data_raw_AS_WG[duplicated(colnames(data_raw_AS_WG))] # can call colnames 

# making column names unique
names(data_raw_AS_WG) <- make.unique(names(data_raw_AS_WG), sep="_")


data_raw_AS_WG <- data_raw_AS_WG %>%
  rename(id = "mci_words_gestures01_id",
         GUID = "The NDAR Global Unique Identifier (GUID) for research subject", 
         age = "Age in months at the time of the interview/test/sampling/imaging.",
         house = "MacArthur Words and Gestures: Vocabulary Checklist: House",
         sex = "Sex of the subject") %>%
  mutate(age = as.numeric(as.character(age)))


data_clean_AS_WG <- data_raw_AS_WG %>%
  gather(key = "definition", value = "value", 
         -c(id,GUID,age,sex,dataset_id,collection_id)) %>%
  #separate(definition, c("category","definition"),sep = "\\. ") %>%
  mutate_all(na_if,"",) %>% #if blank then fill in NA
   mutate(value = ifelse(value == 0, FALSE, TRUE))

data_all_AS_WG <- data_clean_AS_WG %>%
     group_by(definition, age) %>%
       summarise(num_true = sum(value, na.rm = TRUE),
                 num_false = n() - num_true,
                 prop = num_true/n())

data_clean_AS_WG <- data_clean_AS_WG %>% 
  mutate(definition = case_when(
    definition=="bye or bye bye" ~ "bye",
    definition=="chicken" ~ "chicken (animal)",
    definition=="chicken_1" ~ "chicken (food)",
    definition=="peek-a-boo" ~ "peekaboo",
    definition=="water" ~ "water (beverage)",
    definition=="water_1" ~ "water (not beverage)",
    definition=="church" ~ "church*",
    definition=="clean" ~ "clean (action)",
    definition=="clean_1" ~ "clean (description)",
    definition=="daddy" ~ "daddy*",
    definition=="dress" ~ "dress (object)",
    definition=="towl" ~ "towel",
    definition=="grandpa" ~ "grandpa*",
    definition=="grandma" ~ "grandma*",
    definition=="mommy" ~ "mommy*",
    definition=="owie/ boo boo" ~ "owie/boo boo",
    definition=="little" ~ "little (description)",
    definition=="drink" ~ "drink (beverage)",
    definition=="drink_1" ~ "drink (action)",
    definition=="dry" ~ "dry (description)",
    definition=="fire truck" ~ "firetruck",
    definition=="fish" ~ "fish (animal)",
    definition=="fish_1" ~ "fish (food)",
    definition=="toy" ~ "toy (object)",
    definition=="teddy bear" ~ "teddybear",
    definition=="swing" ~ "swing (object)",
    definition=="swing_1" ~ "swing (action)",
    definition=="work" ~ "work (place)",
    definition=="orange" ~ "orange (food)",
    definition=="patty cake" ~ "pattycake",
    definition=="slide" ~ "slide (object)",
    definition=="watch" ~ "watch (object)",
    definition=="watch_1" ~ "watch (action)",
    definition=="backyard_1" ~ "back", # "mcg_vc18_back"
    TRUE ~ definition
  ))


save(data_clean_AS_WG, file="data/ASD_WG.Rdata")

summary(data_all_AS_WG)

Summary of the WS ASD studies

We need to consider which ASD studies are relevant to include (e.g., some may be from more severe cases than others), as well as which have oddities (e.g., duplicate/mismatched subject IDs). At a first glance at the summary by dataset_id, a few of the datasets have all 0 production scores (e.g., dataset_id = c(9137, 17935, 21697, 17999, 17151)). Are these true 0 CDI scores, or are the CDI data for these studies somehow missing? As these studies comprise 1170 participants (of our 4488 total ASD sample), it is important to know what’s going on, and whether or not they should be excluded. I also note that studies 17935 and 21697 suspiciously have the same age range, mean_age, and number of subjects – are these 194 subjects duplicated under different dataset_ids?

For now, I remove all of the datasets without any non-zero production scores.

load("data/ASD_WS.Rdata") # data_clean_AS_WS
load("data/ASD_WG.Rdata") # data_clean_AS_WG

asd_ws <- data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
  summarise(production = sum(value, na.rm=T))
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
asd_wg <- data_clean_AS_WG %>% group_by(id, age) %>%
  summarise(production = sum(value, na.rm=T))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
# remove extreme ages
#data_clean_AS_WS <- data_clean_AS_WS %>%
#  filter(age>=12, age<=48)

#data_clean_AS_WG <- data_clean_AS_WG %>%
#  filter(age>=8, age<=36)


data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
  summarise(production = sum(value, na.rm=T)) %>%
  group_by(dataset_id) %>%
  summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
            n = n(), mean_prod = mean(production)) %>%
  arrange(desc(n)) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=1)
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
dataset_id min_age max_age mean_age n mean_prod
9137 1 48 32.7 657 0.0
17679 12 39 25.9 467 304.7
15459 12 39 25.4 445 293.5
14572 12 39 25.4 435 292.4
12399 12 39 25.3 399 285.4
11395 12 38 24.5 320 276.9
10564 12 37 20.0 314 164.0
11014 17 122 58.0 256 433.3
11259 12 38 21.9 205 222.3
17935 17 34 22.4 194 0.0
21697 17 34 22.4 194 0.0
9961 12 26 18.6 125 125.4
18066 23 60 30.1 115 375.9
17999 17 26 19.9 113 0.0
7881 23 47 29.6 90 78.4
9960 12 20 14.8 53 59.0
13475 19 64 36.5 37 370.5
12608 19 64 37.1 35 380.0
9959 12 14 12.5 22 10.8
17151 18 24 21.0 12 0.0
# remove datasets with all 0 production
bad_ws_datasets = c(9137, 17935, 21697, 17999, 17151)
asd_ws <- asd_ws %>% filter(!is.element(dataset_id, bad_ws_datasets))
# dim(subset(asd_ws, production==0)) # now only 158/3318 children aren't producing any words

Note that there are many CDI:WS administrations for ASD children outside the intended age range (16-30 months): 352 children aged <16 months (mean production = 22 words), and 894 children aged >30 months (mean production = 476 words).

Similarly, there are many CDI:WG administrations for ASD children outside the intended age range (8-16 months): 12 children aged <12 months (mean production = 2 words), and 4354 children aged >16 months (mean production = 59 words).

We will constrain our analysis to those children close to the intended age ranges, extending a wider margin for older children to capture the attenuated language learning of children with ASD. Thus, for the CDI:WS we include 3148 children aged 12-48 months (removing only 5.1% of the data). For the CDI:WG we include 7862 children aged 8-36 months (removing only 7.4% of the data).

Load Typically-Developing Data

## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.

Production vs. Age

Production sumscores on the CDI:WS for TD vs. ASD children.

Production sumscores on the CDI:WS for TD vs. ASD children.

DIF Analysis for WS data

9 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 158 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.

mod_dev_group <- fit_mod_intuitive(d_mat, d_group)

save(mod_dev_group, file="data/prodWS_IRT_model.Rds")
load("data/prodWS_IRT_model.Rds") # 12-48 mos ASD
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat), 
#             plotName="GLIMMER_asd_prodWS_age12-48mos")

#load("data/prodWS_IRT_model_16-48mos.Rds")
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat), 
#             plotName="GLIMMER_asd_prodWS_age16-48mos")
# doesn't make a difference whether we exclude 12-15 month-old ASD kids or not


mm_asd <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))
## Joining, by = c("a1", "definition")
#dif_hist <- item_difficulty_difference_histogram(mm_asd)

thresh = median(mm_asd$d_diff_abs) + 2*sd(mm_asd$d_diff_abs)
big_dif = mm_asd[which(mm_asd$d_diff_abs > thresh),]

dif_histn <- item_difficulty_difference_histogram(mm_asd, withNormal = T)
print(dif_histn)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The majority of items are easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 3.21; red dotted lines in histogram). These 33 extrema are listed below.

big_dif <- big_dif %>% left_join(wb_items)
## Joining, by = "definition"
big_dif %>% arrange(d_diff) %>% 
  relocate(category, definition) %>%
  rename(d_TD=d_g1, d_ASD=d_g2) %>%
  dplyr::select(-a1, -group1, -group2) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=2)
category definition d_TD d_ASD d_diff d_diff_abs
people mommy* 6.68 0.62 -6.06 6.06
people daddy* 6.46 1.94 -4.52 4.52
people baby 3.85 0.27 -3.58 3.58
action_words cover -1.90 1.32 3.22 3.22
places playground -1.73 1.51 3.24 3.24
people girl 0.09 3.40 3.31 3.31
action_words give -0.83 2.50 3.34 3.34
action_words pretend -3.09 0.25 3.34 3.34
animals penguin -1.25 2.14 3.39 3.39
action_words pick -2.00 1.42 3.42 3.42
action_words hurry -1.56 1.86 3.43 3.43
action_words listen -1.90 1.53 3.43 3.43
action_words make -1.35 2.09 3.44 3.44
people aunt -0.72 2.75 3.47 3.47
games_routines go potty 0.70 4.22 3.52 3.52
action_words chase -1.86 1.69 3.55 3.55
people person -3.74 -0.06 3.68 3.68
places movie -1.46 2.31 3.78 3.78
games_routines so big! -1.01 2.83 3.84 3.84
places gas station -2.25 1.62 3.88 3.88
people babysitter -3.43 0.48 3.92 3.92
people uncle -0.89 3.09 3.99 3.99
furniture_rooms basement -3.65 0.37 4.02 4.02
places woods -3.73 0.29 4.02 4.02
action_words rip -3.10 0.97 4.07 4.07
action_words hate -4.32 -0.17 4.15 4.15
games_routines breakfast -0.03 4.15 4.18 4.18
games_routines turn around -1.02 3.25 4.27 4.27
action_words dump -2.48 2.02 4.50 4.50
action_words skate -3.07 1.46 4.53 4.53
people nurse -3.59 1.50 5.09 5.09
people man -0.57 4.59 5.15 5.15
people cowboy -3.07 3.85 6.92 6.92
sort(table(big_dif$category)) %>% kable(col.names=c("Category","Frequency"))
Category Frequency
animals 1
furniture_rooms 1
games_routines 4
places 4
people 11
action_words 12

Mostly people and action words.

Ability vs. Sumscore by Age

CDI:WG Production

Examine ASD WG datasets

data_clean_AS_WG %>% group_by(id, age, sex, dataset_id) %>%
  summarise(comprehension = sum(value, na.rm=T)) %>%
  group_by(dataset_id) %>%
  summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
            n = n(), mean_comp = mean(comprehension)) %>%
  arrange(desc(n)) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=1)
## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.
dataset_id min_age max_age mean_age n mean_comp
12095 17 22 18.0 918 0.0
9138 3 48 22.9 876 0.0
14747 11 38 18.3 706 151.3
16641 11 38 18.3 706 151.3
10754 0 45 17.8 420 0.0
11574 0 45 17.8 420 0.0
10564 8 20 11.8 354 48.4
11014 13 132 57.5 290 185.3
8703 8 63 38.0 283 0.0
8667 11 48 27.5 268 0.0
12399 8 20 11.6 226 46.6
14572 8 20 11.6 226 46.6
15459 8 20 11.6 226 46.6
17679 8 20 11.6 226 46.6
11395 8 20 11.6 223 46.8
18066 9 21 13.0 216 0.0
14458 7 21 12.9 208 0.0
11259 8 20 11.7 201 46.2
17999 11 20 15.1 196 0.0
12942 7 21 12.6 194 0.0
11717 7 21 12.1 163 0.0
9961 8 20 12.1 162 48.2
8368 10 18 13.7 138 0.0
17935 12 22 16.2 112 0.0
8176 15 30 24.3 88 0.0
8370 15 30 24.3 88 0.0
9960 7 20 13.5 69 119.5
16640 12 29 20.9 61 205.6
18941 8 19 12.0 50 38.6
9959 7 14 10.8 44 59.6
8535 8 60 32.4 20 0.0
9507 49 85 68.6 20 0.0
12891 27 53 35.2 18 0.0
18703 59 101 76.1 18 171.2
10971 60 173 86.2 13 131.2
10335 60 109 77.7 10 136.6
11805 11 14 12.3 10 48.6
12820 12 12 12.0 7 0.0
9777 62 101 76.6 7 122.7
12790 74 124 106.3 3 0.0
8532 24 24 24.0 1 0.0
8853 12 12 12.0 1 10.0
8912 42 42 42.0 1 0.0
8922 22 22 22.0 1 0.0

Datasets with all 0 comprehension scores: c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922) 10754 and 11574 have identical age ranges, mean ages, and Ns, as do 8176 and 8370. 16641 and 14747 have identical age ranges, mean ages, Ns, and average comprehension scores. After removing all of the 0-comprehension datasets, there are only 3829 participants.

bad_asd_wg_datasets = c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922)
data_clean_AS_WG <- data_clean_AS_WG %>% filter(!is.element(dataset_id, bad_asd_wg_datasets))
## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.

241 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 1058 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.

Production sumscores on the CDI:WG for TD vs. ASD children.

Production sumscores on the CDI:WG for TD vs. ASD children.

mod_dev_group <- fit_mod_intuitive(d_mat_wg, d_group)
save(mod_dev_group, file="data/prodWG_IRT_model.Rds")
load("data/prodWG_IRT_model.Rds") # 8-36mo ASD
#plot_glimmer(mod_dev_group, colnames(d_mat_wg), colnames(d_mat_wg), 
#             plotName="GLIMMER_asd_prodWG_age8-36mos")


mm_asd_wg <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))
## Joining, by = c("a1", "definition")
#dif_hist <- item_difficulty_difference_histogram(mm_asd)

thresh_wg = median(mm_asd_wg$d_diff_abs) + 2*sd(mm_asd_wg$d_diff_abs)
big_dif_wg = mm_asd_wg[which(mm_asd_wg$d_diff_abs > thresh_wg),]

dif_histn_wg <- item_difficulty_difference_histogram(mm_asd_wg, withNormal = T)
print(dif_histn_wg)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The majority of the items are much easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 7.88; red dotted lines in histogram). These 3 extrema are listed below.

big_dif_wg <- big_dif_wg %>% left_join(wg_items)
## Joining, by = "definition"
big_dif_wg %>% arrange(d_diff) %>% 
  relocate(category, definition) %>%
  rename(d_TD=d_g1, d_ASD=d_g2) %>%
  dplyr::select(-a1, -group1, -group2) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=2)
category definition d_TD d_ASD d_diff d_diff_abs
descriptive_words old -7.79 0.25 8.04 8.04
pronouns her -7.60 0.45 8.04 8.04
question_words when -8.32 0.00 8.32 8.32