Clean ASD CDI:WS data

data_AS_WS <- read.delim("data/ASD CDI data/mci_sentences02.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcs_vc_total != "999")

# extracting first row as a descriptive dataframe
description_WS <- data_AS_WS[1:1,]
description_WS <- as.data.frame(t(description_WS))
names(description_WS) <- "description"


# what we get rid of:
eliminated_WS <- data_AS_WS %>%
  dplyr::select(c(701:709,780:850)) %>%
  slice(1:1) %>%
  gather(key = "Column names", value = "Description")#%>%
  #head(10)

# grammar items (past tense, future, not present, etc.)
description_WS[701:709,]
# complexity and examples (e.g. longest sentences)
description_WS[780:850,]

#using mci_sentences02_id   as a distinctive id and The NDAR Global Unique Identifier 
data_raw_AS_WS <- data_AS_WS %>%
  dplyr::select(c("mci_sentences02_id","subjectkey","interview_age", 
                  "collection_id", "dataset_id", "interview_date", 
                  "src_subject_id", "sex", 21:779)) %>% # starting from 785 is complexity. we kept vocabs, word endings, word forms.
  dplyr::select(-(689:697))
  #dplyr::select(-(684:692)) # before adding interview_age -> sex...

colnames(data_raw_AS_WS) <- as.character(unlist(data_raw_AS_WS[1,])) #unlist the row
data_raw_AS_WS = data_raw_AS_WS[-1, ]

data_raw_AS_WS <- data_raw_AS_WS %>%
  rename(id = "mci_sentences02_id",
         GUID = "The NDAR Global Unique Identifier (GUID) for research subject", 
         age = "Age in months at the time of the interview/test/sampling/imaging.",
         test_date = "Date on which the interview/genetic test/sampling/imaging/biospecimen was completed. MM/DD/YYYY",
         src_subject_id = "Subject ID how it's defined in lab/project",
         sex = "Sex of the subject") %>%
  mutate(age = as.numeric(as.character(age)))

data_clean_AS_WS <- data_raw_AS_WS %>%
  gather(key = "definition", value = "value", 
         -c(id,GUID,age,collection_id,dataset_id,test_date,src_subject_id,sex)) %>%
  separate(definition, c("category","definition"),sep = "\\. ") %>%
  mutate_all(na_if,"",)%>% #if blank then fill in NA
   mutate(value = ifelse(value == 0, FALSE, TRUE))

data_all_AS_WS <- data_clean_AS_WS %>%
     group_by(category, definition, age) %>%
       summarise(num_true = sum(value, na.rm = TRUE),
                 num_false = n() - num_true,
                 prop = num_true/n())

# unique(data_all_AS_WS$category)

data_clean_AS_WS <- data_clean_AS_WS %>% filter(category!="Word forms, noun", 
                                                category!="Word forms, verbs",
                                                category!="Word endings, nouns",
                                                category!="Word endings, verbs") %>%
  mutate(category = case_when(category=="Sound Effects and Animal Sounds" ~ "sounds",
                              category=="Animals (Real or Toy)" ~ "animals",
                              category=="Vehicles (Real or Toy)" ~ "vehicles",
                              category=="Food and Drink" ~ "food_drink",
                              category=="Clothinq" ~ "clothing",
                              category=="Body Parts" ~ "body_parts",
                              category=="Small Household Items" ~ "household",
                              category=="Furniture and Rooms" ~ "furniture_rooms",
                              category=="Outside Thlnqs" ~ "outside",
                              category=="Action Words" ~ "action_words",
                              category=="Places to Go" ~ "places",
                              category=="Helping Verbs" ~ "helping_verbs",
                              category=="Connectinq Words" ~ "connecting_words",
                              category=="Descriptive Words" ~ "descriptive_words",
                              category=="Words About Time" ~ "time_words",
                              category=="Quantifiers and Articles" ~ "quantifiers",
                              category=="Games and Routines" ~ "games_routines",
                              category=="Question Words" ~ "question_words",
                              category=="Prepositions and Locations" ~ "locations",
                              category=="Helpinq Verbs" ~ "helping_verbs",
                              TRUE ~ category)) %>%
  mutate(category = tolower(category), 
         definition = case_when(definition=="baa" ~ "baa baa",
                                definition=="cockadoodle" ~ "cockadoodledoo",
                                definition=="quack" ~ "quack quack",
                                definition=="uhoh" ~ "uh oh",
                                definition=="woof" ~ "woof woof",
                                definition=="yum" ~ "yum yum",
                                definition=="chicken" & category=="food_drink" ~ "chicken (food)",
                                definition=="chicken" ~ "chicken (animal)",
                                definition=="fish" & category=="food_drink" ~ "fish (food)",
                                definition=="fish" ~ "fish (animal)",
                                definition=="playdough" ~ "play dough",
                                definition=="vagina" ~ "vagina*",
                                definition=="penis" ~ "penis*",
                                definition=="frenchfries" ~ "french fries",
                                definition=="greenbeans" ~ "green beans",
                                definition=="toy" ~ "toy (object)",
                                definition=="drink" & category=="action_words" ~ "drink (action)",
                                definition=="drink" ~ "drink (beverage)",
                                definition=="gasstation" ~ "gas station",
                                definition=="orange" & category=="food_drink" ~ "orange (food)",
                                definition=="orange" ~ "orange (description)",
                                definition=="allgone" ~ "all gone",
                                definition=="water" & category=="food_drink" ~ "water (beverage)",
                                definition=="water" ~ "water (not beverage)",
                                definition=="feet" ~ "foot",
                                definition=="callph" ~ "call (on phone)",
                                definition=="clean" & category=="action_words" ~ "clean (action)",
                                definition=="clean" ~ "clean (description)",
                                definition=="owie  booboo" ~ "owie/boo boo",
                                definition=="dont" ~ "don't",
                                definition=="5bowl" ~ "bowl",
                                definition=="can" & category=="household" ~ "can (object)",
                                definition=="can" & category=="helping_verbs" ~ "can (auxiliary)",
                                definition=="rockingchair" ~ "rocking chair",
                                definition=="alot" ~ "a lot",
                                definition=="buttocks/bottom" ~ "buttocks/bottom*",
                                definition=="daddy" ~ "daddy*",
                                definition=="childname" ~ "child's own name",
                                definition=="washingmachine" ~ "washing machine",
                                definition=="try" ~ "try/try to",
                                definition=="work" & category=="places" ~ "work (place)",
                                definition=="work" ~ "work (action)",
                                definition=="giveme five" ~ "give me five!",
                                definition=="mommy" ~ "mommy*",
                                definition=="grandma" ~ "grandma*",
                                definition=="church" ~ "church*",
                                definition=="daddy" ~ "daddy*",
                                definition=="grandpa" ~ "grandpa*",
                                definition=="patty cake" ~ "pattycake",
                                definition=="dry" & category=="action_words" ~ "dry (action)",
                                definition=="dry" ~ "dry (description)",
                                definition=="lemme" ~ "lemme/let me",
                                definition=="tissklee" ~ "tissue/kleenex",
                                definition=="did" ~ "did/did ya",
                                definition=="gonna get  you" ~ "gonna get you!",
                                definition=="peanutbutter" ~ "peanut butter",
                                definition=="playpen" ~ "play pen",
                                definition=="potatochip" ~ "potato chip",
                                definition=="wanna" ~ "wanna/want to",
                                definition=="watch" & category=="action_words" ~ "watch (action)",
                                definition=="watch" ~ "watch (object)",
                                definition=="dress" ~ "dress (object)",
                                definition=="gonna" ~ "gonna/going to",
                                definition=="gotta" ~ "gotta/got to",
                                definition=="hafta" ~ "hafta/have to",
                                definition=="highchair" ~ "high chair",
                                definition=="lawnmower" ~ "lawn mower",
                                definition=="little" ~ "little (description)",
                                definition=="petname" ~ "pet's name",
                                definition=="so big" ~ "so big!",
                                definition=="need" ~ "need/need to",
                                definition=="shush" ~ "shh/shush/hush",
                                definition=="swing" & category=="action_words" ~ "swing (action)",
                                definition=="swing" ~ "swing (object)",
                                definition=="slide" & category=="action_words" ~ "slide (action)",
                                definition=="slide" ~ "slide (object)",
                                TRUE ~ definition))

save(data_clean_AS_WS, file="data/ASD_WS.Rdata")

summary(data_all_AS_WS)

Clean ASD CDI:WG data

Note: found a problem in the NDAR description file – mcg_vc18_back is given the definition “backyard” (making a duplicate) instead of “back”. Also, “throw” (col 517) was left out of previous semantic network growth analyses.

data_AS_WG <- read.delim("data/ASD CDI data/mci_words_gestures01.txt", header = TRUE, sep = "\t", dec = ".") %>% filter(mcg_vc_totcom != "999")
# filter(mcg_vc_totpr != "999") - for production?

# extracting first row as a descriptive dataframe
description_WG <- data_AS_WG[1:1,]
description_WG <- as.data.frame(t(description_WG))
names(description_WG) <- "description"

# we only kept vocab 
eliminated_WG <- data_AS_WG %>%
  dplyr::select(c(23:58,454:520))

#using mci_words_gestures01_id as a distinctive id and The NDAR Global Unique Identifier 

data_raw_AS_WG <- data_AS_WG %>%
  dplyr::select(c("collection_id","dataset_id","sex","mci_words_gestures01_id","subjectkey","interview_age", 59:453, 517))

colnames(data_raw_AS_WG) <- as.character(unlist(data_raw_AS_WG[1,])) #unlist the row
data_raw_AS_WG = data_raw_AS_WG[-1, ]

# what are the duplicated
AS_WG_duplicated <- data_raw_AS_WG[duplicated(colnames(data_raw_AS_WG))] # can call colnames 

# making column names unique
names(data_raw_AS_WG) <- make.unique(names(data_raw_AS_WG), sep="_")


data_raw_AS_WG <- data_raw_AS_WG %>%
  rename(id = "mci_words_gestures01_id",
         GUID = "The NDAR Global Unique Identifier (GUID) for research subject", 
         age = "Age in months at the time of the interview/test/sampling/imaging.",
         house = "MacArthur Words and Gestures: Vocabulary Checklist: House",
         sex = "Sex of the subject") %>%
  mutate(age = as.numeric(as.character(age)))


data_clean_AS_WG <- data_raw_AS_WG %>%
  gather(key = "definition", value = "value", 
         -c(id,GUID,age,sex,dataset_id,collection_id)) %>%
  #separate(definition, c("category","definition"),sep = "\\. ") %>%
  mutate_all(na_if,"",) %>% #if blank then fill in NA
   mutate(value = ifelse(value == 0, FALSE, TRUE))

data_all_AS_WG <- data_clean_AS_WG %>%
     group_by(definition, age) %>%
       summarise(num_true = sum(value, na.rm = TRUE),
                 num_false = n() - num_true,
                 prop = num_true/n())

data_clean_AS_WG <- data_clean_AS_WG %>% 
  mutate(definition = case_when(
    definition=="bye or bye bye" ~ "bye",
    definition=="chicken" ~ "chicken (animal)",
    definition=="chicken_1" ~ "chicken (food)",
    definition=="peek-a-boo" ~ "peekaboo",
    definition=="water" ~ "water (beverage)",
    definition=="water_1" ~ "water (not beverage)",
    definition=="church" ~ "church*",
    definition=="clean" ~ "clean (action)",
    definition=="clean_1" ~ "clean (description)",
    definition=="daddy" ~ "daddy*",
    definition=="dress" ~ "dress (object)",
    definition=="towl" ~ "towel",
    definition=="grandpa" ~ "grandpa*",
    definition=="grandma" ~ "grandma*",
    definition=="mommy" ~ "mommy*",
    definition=="owie/ boo boo" ~ "owie/boo boo",
    definition=="little" ~ "little (description)",
    definition=="drink" ~ "drink (beverage)",
    definition=="drink_1" ~ "drink (action)",
    definition=="dry" ~ "dry (description)",
    definition=="fire truck" ~ "firetruck",
    definition=="fish" ~ "fish (animal)",
    definition=="fish_1" ~ "fish (food)",
    definition=="toy" ~ "toy (object)",
    definition=="teddy bear" ~ "teddybear",
    definition=="swing" ~ "swing (object)",
    definition=="swing_1" ~ "swing (action)",
    definition=="work" ~ "work (place)",
    definition=="orange" ~ "orange (food)",
    definition=="patty cake" ~ "pattycake",
    definition=="slide" ~ "slide (object)",
    definition=="watch" ~ "watch (object)",
    definition=="watch_1" ~ "watch (action)",
    definition=="backyard_1" ~ "back", # "mcg_vc18_back"
    TRUE ~ definition
  ))


save(data_clean_AS_WG, file="data/ASD_WG.Rdata")

summary(data_all_AS_WG)

Summary of the WS ASD studies

We need to consider which ASD studies are relevant to include (e.g., some may be from more severe cases than others), as well as which have oddities (e.g., duplicate/mismatched subject IDs). At a first glance at the summary by dataset_id, a few of the datasets have all 0 production scores (e.g., dataset_id = c(9137, 17935, 21697, 17999, 17151)). Are these true 0 CDI scores, or are the CDI data for these studies somehow missing? As these studies comprise 1170 participants (of our 4488 total ASD sample), it is important to know what’s going on, and whether or not they should be excluded. I also note that studies 17935 and 21697 suspiciously have the same age range, mean_age, and number of subjects – are these 194 subjects duplicated under different dataset_ids?

For now, I remove all of the datasets without any non-zero production scores.

load("data/ASD_WS.Rdata") # data_clean_AS_WS
load("data/ASD_WG.Rdata") # data_clean_AS_WG

asd_ws <- data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
  summarise(production = sum(value, na.rm=T))

## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.

asd_wg <- data_clean_AS_WG %>% group_by(id, age) %>%
  summarise(production = sum(value, na.rm=T))

## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.

# remove extreme ages
#data_clean_AS_WS <- data_clean_AS_WS %>%
#  filter(age>=12, age<=48)

#data_clean_AS_WG <- data_clean_AS_WG %>%
#  filter(age>=8, age<=36)


data_clean_AS_WS %>% group_by(id, age, sex, dataset_id) %>%
  summarise(production = sum(value, na.rm=T)) %>%
  group_by(dataset_id) %>%
  summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
            n = n(), mean_prod = mean(production)) %>%
  arrange(desc(n)) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=1)

## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.

dataset_id	min_age	max_age	mean_age	n	mean_prod
9137	1	48	32.7	657	0.0
17679	12	39	25.9	467	304.7
15459	12	39	25.4	445	293.5
14572	12	39	25.4	435	292.4
12399	12	39	25.3	399	285.4
11395	12	38	24.5	320	276.9
10564	12	37	20.0	314	164.0
11014	17	122	58.0	256	433.3
11259	12	38	21.9	205	222.3
17935	17	34	22.4	194	0.0
21697	17	34	22.4	194	0.0
9961	12	26	18.6	125	125.4
18066	23	60	30.1	115	375.9
17999	17	26	19.9	113	0.0
7881	23	47	29.6	90	78.4
9960	12	20	14.8	53	59.0
13475	19	64	36.5	37	370.5
12608	19	64	37.1	35	380.0
9959	12	14	12.5	22	10.8
17151	18	24	21.0	12	0.0

# remove datasets with all 0 production
bad_ws_datasets = c(9137, 17935, 21697, 17999, 17151)
asd_ws <- asd_ws %>% filter(!is.element(dataset_id, bad_ws_datasets))
# dim(subset(asd_ws, production==0)) # now only 158/3318 children aren't producing any words

Note that there are many CDI:WS administrations for ASD children outside the intended age range (16-30 months): 352 children aged <16 months (mean production = 22 words), and 894 children aged >30 months (mean production = 476 words).

Similarly, there are many CDI:WG administrations for ASD children outside the intended age range (8-16 months): 12 children aged <12 months (mean production = 2 words), and 4354 children aged >16 months (mean production = 59 words).

We will constrain our analysis to those children close to the intended age ranges, extending a wider margin for older children to capture the attenuated language learning of children with ASD. Thus, for the CDI:WS we include 3148 children aged 12-48 months (removing only 5.1% of the data). For the CDI:WG we include 7862 children aged 8-36 months (removing only 7.4% of the data).

Load Typically-Developing Data

## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.

Production vs. Age

Production sumscores on the CDI:WS for TD vs. ASD children.

DIF Analysis for WS data

9 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 158 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.

mod_dev_group <- fit_mod_intuitive(d_mat, d_group)

save(mod_dev_group, file="data/prodWS_IRT_model.Rds")

load("data/prodWS_IRT_model.Rds") # 12-48 mos ASD
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat), 
#             plotName="GLIMMER_asd_prodWS_age12-48mos")

#load("data/prodWS_IRT_model_16-48mos.Rds")
#plot_glimmer(mod_dev_group, colnames(d_mat), colnames(d_mat), 
#             plotName="GLIMMER_asd_prodWS_age16-48mos")
# doesn't make a difference whether we exclude 12-15 month-old ASD kids or not


mm_asd <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))

## Joining, by = c("a1", "definition")

#dif_hist <- item_difficulty_difference_histogram(mm_asd)

thresh = median(mm_asd$d_diff_abs) + 2*sd(mm_asd$d_diff_abs)
big_dif = mm_asd[which(mm_asd$d_diff_abs > thresh),]

dif_histn <- item_difficulty_difference_histogram(mm_asd, withNormal = T)
print(dif_histn)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The majority of items are easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 3.21; red dotted lines in histogram). These 33 extrema are listed below.

big_dif <- big_dif %>% left_join(wb_items)

## Joining, by = "definition"

big_dif %>% arrange(d_diff) %>% 
  relocate(category, definition) %>%
  rename(d_TD=d_g1, d_ASD=d_g2) %>%
  dplyr::select(-a1, -group1, -group2) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=2)

category	definition	d_TD	d_ASD	d_diff	d_diff_abs
people	mommy*	6.68	0.62	-6.06	6.06
people	daddy*	6.46	1.94	-4.52	4.52
people	baby	3.85	0.27	-3.58	3.58
action_words	cover	-1.90	1.32	3.22	3.22
places	playground	-1.73	1.51	3.24	3.24
people	girl	0.09	3.40	3.31	3.31
action_words	give	-0.83	2.50	3.34	3.34
action_words	pretend	-3.09	0.25	3.34	3.34
animals	penguin	-1.25	2.14	3.39	3.39
action_words	pick	-2.00	1.42	3.42	3.42
action_words	hurry	-1.56	1.86	3.43	3.43
action_words	listen	-1.90	1.53	3.43	3.43
action_words	make	-1.35	2.09	3.44	3.44
people	aunt	-0.72	2.75	3.47	3.47
games_routines	go potty	0.70	4.22	3.52	3.52
action_words	chase	-1.86	1.69	3.55	3.55
people	person	-3.74	-0.06	3.68	3.68
places	movie	-1.46	2.31	3.78	3.78
games_routines	so big!	-1.01	2.83	3.84	3.84
places	gas station	-2.25	1.62	3.88	3.88
people	babysitter	-3.43	0.48	3.92	3.92
people	uncle	-0.89	3.09	3.99	3.99
furniture_rooms	basement	-3.65	0.37	4.02	4.02
places	woods	-3.73	0.29	4.02	4.02
action_words	rip	-3.10	0.97	4.07	4.07
action_words	hate	-4.32	-0.17	4.15	4.15
games_routines	breakfast	-0.03	4.15	4.18	4.18
games_routines	turn around	-1.02	3.25	4.27	4.27
action_words	dump	-2.48	2.02	4.50	4.50
action_words	skate	-3.07	1.46	4.53	4.53
people	nurse	-3.59	1.50	5.09	5.09
people	man	-0.57	4.59	5.15	5.15
people	cowboy	-3.07	3.85	6.92	6.92

sort(table(big_dif$category)) %>% kable(col.names=c("Category","Frequency"))

Category	Frequency
animals	1
furniture_rooms	1
games_routines	4
places	4
people	11
action_words	12

Mostly people and action words.

Ability vs. Sumscore by Age

CDI:WG Production

Examine ASD WG datasets

data_clean_AS_WG %>% group_by(id, age, sex, dataset_id) %>%
  summarise(comprehension = sum(value, na.rm=T)) %>%
  group_by(dataset_id) %>%
  summarise(min_age = min(age), max_age = max(age), mean_age = mean(age),
            n = n(), mean_comp = mean(comprehension)) %>%
  arrange(desc(n)) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=1)

## `summarise()` has grouped output by 'id', 'age', 'sex'. You can override using
## the `.groups` argument.

dataset_id	min_age	max_age	mean_age	n	mean_comp
12095	17	22	18.0	918	0.0
9138	3	48	22.9	876	0.0
14747	11	38	18.3	706	151.3
16641	11	38	18.3	706	151.3
10754	0	45	17.8	420	0.0
11574	0	45	17.8	420	0.0
10564	8	20	11.8	354	48.4
11014	13	132	57.5	290	185.3
8703	8	63	38.0	283	0.0
8667	11	48	27.5	268	0.0
12399	8	20	11.6	226	46.6
14572	8	20	11.6	226	46.6
15459	8	20	11.6	226	46.6
17679	8	20	11.6	226	46.6
11395	8	20	11.6	223	46.8
18066	9	21	13.0	216	0.0
14458	7	21	12.9	208	0.0
11259	8	20	11.7	201	46.2
17999	11	20	15.1	196	0.0
12942	7	21	12.6	194	0.0
11717	7	21	12.1	163	0.0
9961	8	20	12.1	162	48.2
8368	10	18	13.7	138	0.0
17935	12	22	16.2	112	0.0
8176	15	30	24.3	88	0.0
8370	15	30	24.3	88	0.0
9960	7	20	13.5	69	119.5
16640	12	29	20.9	61	205.6
18941	8	19	12.0	50	38.6
9959	7	14	10.8	44	59.6
8535	8	60	32.4	20	0.0
9507	49	85	68.6	20	0.0
12891	27	53	35.2	18	0.0
18703	59	101	76.1	18	171.2
10971	60	173	86.2	13	131.2
10335	60	109	77.7	10	136.6
11805	11	14	12.3	10	48.6
12820	12	12	12.0	7	0.0
9777	62	101	76.6	7	122.7
12790	74	124	106.3	3	0.0
8532	24	24	24.0	1	0.0
8853	12	12	12.0	1	10.0
8912	42	42	42.0	1	0.0
8922	22	22	22.0	1	0.0

Datasets with all 0 comprehension scores: c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922) 10754 and 11574 have identical age ranges, mean ages, and Ns, as do 8176 and 8370. 16641 and 14747 have identical age ranges, mean ages, Ns, and average comprehension scores. After removing all of the 0-comprehension datasets, there are only 3829 participants.

bad_asd_wg_datasets = c(12095, 9138, 10754, 11574, 8703, 8667, 18066, 14458, 17999, 12942, 11717, 8368, 17935, 8176, 8370, 8535, 9507, 12891, 12820, 12790, 8532, 8912, 8922)
data_clean_AS_WG <- data_clean_AS_WG %>% filter(!is.element(dataset_id, bad_asd_wg_datasets))

## `summarise()` has grouped output by 'data_id', 'age'. You can override using
## the `.groups` argument.

241 typically-developing (TD) children were removed from the wordbank data (5520 total) due to their not yet producing any words. 1058 children with ASD were removed from the NDAR data (3318 total) due to their not yet producing any words.

Production sumscores on the CDI:WG for TD vs. ASD children.

mod_dev_group <- fit_mod_intuitive(d_mat_wg, d_group)
save(mod_dev_group, file="data/prodWG_IRT_model.Rds")

load("data/prodWG_IRT_model.Rds") # 8-36mo ASD
#plot_glimmer(mod_dev_group, colnames(d_mat_wg), colnames(d_mat_wg), 
#             plotName="GLIMMER_asd_prodWG_age8-36mos")


mm_asd_wg <- extract_group_df(mod_dev_group, groups=c("TD","ASD"))

## Joining, by = c("a1", "definition")

#dif_hist <- item_difficulty_difference_histogram(mm_asd)

thresh_wg = median(mm_asd_wg$d_diff_abs) + 2*sd(mm_asd_wg$d_diff_abs)
big_dif_wg = mm_asd_wg[which(mm_asd_wg$d_diff_abs > thresh_wg),]

dif_histn_wg <- item_difficulty_difference_histogram(mm_asd_wg, withNormal = T)
print(dif_histn_wg)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The majority of the items are much easier for TD than for ASD children. We examine the extrema: items with an absolute difficulty difference of at least 2 standard deviations (|d_diff| = 7.88; red dotted lines in histogram). These 3 extrema are listed below.

big_dif_wg <- big_dif_wg %>% left_join(wg_items)

## Joining, by = "definition"

big_dif_wg %>% arrange(d_diff) %>% 
  relocate(category, definition) %>%
  rename(d_TD=d_g1, d_ASD=d_g2) %>%
  dplyr::select(-a1, -group1, -group2) %>%
  kable(format = "html", table.attr = "style='width:50%;'", digits=2)

category	definition	d_TD	d_ASD	d_diff	d_diff_abs
descriptive_words	old	-7.79	0.25	8.04	8.04
pronouns	her	-7.60	0.45	8.04	8.04
question_words	when	-8.32	0.00	8.32	8.32

ASD vs. TD DIF

George

2022-04-06