library(wordbankr) # WB data
library(tidyverse) # tidy
library(mirt) # IRT models
library(ltm) # more IRT functions
library(psych) # some psychometric stuff (tests of dimensionality)
library(Gifi)# some more psychometric stuff (tests of dimensionality)
library(knitr) # some formatting, tables, etc
library(patchwork) # combining plots.
library(GGally) # More plottinng options.
library(lordif) # differential item functioning.
In our meeting on 01/09/2021, we discussed the possibility that our sample was a mixture of children, grammatical kids and pre-grammatical kids. We said one way to remove the pre-grammatical kids would be to exclude kids who have a 0 on the variable indicating whether children combine words, and on any of the morphological variables. One concern is whether kids who are not combining words but are using ing, ed or s. It’s not clear whether these kids are pre-grammatical or not.
Download data from Word Bank:
Inst <- get_instrument_data(language="English (American)", form="WS")
Admin <- get_administration_data(language="English (American)", form="WS")
Item <- get_item_data(language="English (American)", form = "WS")
Complex <- Admin %>%
full_join(.,Inst, by="data_id") %>%
full_join(., Item, by="num_item_id") %>%
filter(longitudinal==FALSE) %>%
filter(type == "combine" |
type == "complexity" |
type == "word_endings" |
type== "word_forms_nouns" |
type == "word_forms_verbs" |
type == "word_endings_nouns" |
type == "word_endings_verbs"
) %>%
mutate(
out = ifelse(value=="complex" | value=="sometimes" | value=="produces", yes=1,
no = ifelse(value=="often", yes=2, no =0))
)
As a quick sanity check, let’s see which items have ordinal data (to make sure the above code worked correctly).
Complex %>%
filter(out==2) %>%
group_by(definition) %>%
count()
Ok, it’s only the combine variable and word endings variables – that’s right.
Let’s see how many missing values there are.
Complex %>%
filter(is.na(out)) %>%
group_by(definition) %>%
count()
Looks like there are 1426 participants don’t have item-level complexity scores. There are a further 2 who don’t have word forms, and 1 who doesn’t have word endings.
Complex$complexity_category <- ifelse(Complex$complexity_category == "", yes=Complex$type, no=Complex$complexity_category)
Complex %>%
filter(!is.na(out)) %>%
group_by(definition) %>%
summarise(
mean=mean(out),
sd=sd(out),
sum=sum(out),
category = first(complexity_category)
) %>%
arrange(category) %>%
kable(caption="Means and SDs for Each Item (Arranged by Item)")
| definition | mean | sd | sum | category |
|---|---|---|---|---|
| has your child begun to combine word yet? | 1.2833572 | 0.7960586 | 3578 | combine |
| baby blanket / baby’s blanket | 0.3873745 | 0.4872378 | 1080 | morphology |
| daddy car / daddy’s car | 0.4451220 | 0.4970684 | 1241 | morphology |
| daddy pick me up / daddy picked me up | 0.1743185 | 0.3794512 | 486 | morphology |
| doggie kiss me / doggie kissed me | 0.1879484 | 0.3907410 | 524 | morphology |
| I fall down / I fell down | 0.1807747 | 0.3849005 | 504 | morphology |
| I make tower / I making tower | 0.2360115 | 0.4247055 | 658 | morphology |
| kitty go away / kitty went away | 0.1280488 | 0.3342041 | 357 | morphology |
| kitty sleep / kitty sleeping | 0.4192970 | 0.4935326 | 1169 | morphology |
| more cookie / more cookies | 0.3758967 | 0.4844404 | 1048 | morphology |
| these my tooth / these my teeth | 0.4533716 | 0.4979103 | 1264 | morphology |
| two foot / two feet | 0.4214491 | 0.4938798 | 1175 | morphology |
| two shoe / two shoes | 0.4422525 | 0.4967431 | 1233 | morphology |
| baby crying / baby crying cuz she’s sad | 0.1599713 | 0.3666452 | 446 | syntax |
| baby crying / baby is crying | 0.1624821 | 0.3689586 | 453 | syntax |
| baby want eat / baby want to eat | 0.2650646 | 0.4414468 | 739 | syntax |
| coffee hot / that coffee hot | 0.1929699 | 0.3947004 | 538 | syntax |
| cookie mommy / cookie for mommy | 0.2413917 | 0.4280040 | 673 | syntax |
| doggie table / doggie on table | 0.3511478 | 0.4774147 | 979 | syntax |
| don’t read book / don’t want you read that book | 0.1818508 | 0.3857907 | 507 | syntax |
| go bye-bye / wanna go bye-bye | 0.2679340 | 0.4429625 | 747 | syntax |
| I like read stories / I like to read stories | 0.1779053 | 0.3825016 | 496 | syntax |
| I no do it / I can’t do it | 0.2482066 | 0.4320498 | 692 | syntax |
| I sing song / I sing song for you | 0.1449067 | 0.3520699 | 404 | syntax |
| I want that / I want that one you got | 0.1137016 | 0.3175054 | 317 | syntax |
| lookit / lookit what I got | 0.2410330 | 0.4277870 | 672 | syntax |
| lookit me / lookit me dancing | 0.2596844 | 0.4385400 | 724 | syntax |
| no wash dolly / don’t wash dolly | 0.2116212 | 0.4085310 | 590 | syntax |
| read me story Mommy / read me a story Mommy | 0.1635581 | 0.3699405 | 456 | syntax |
| that my truck / that’s my truck | 0.2822812 | 0.4501902 | 787 | syntax |
| there a kitty / there’s a kitty | 0.3210187 | 0.4669517 | 895 | syntax |
| turn on light / turn on light so I can see | 0.0961263 | 0.2948172 | 268 | syntax |
| want cookies / want cookies and milk | 0.2048063 | 0.4036324 | 571 | syntax |
| want more juice / want juice in there | 0.2051650 | 0.4038946 | 572 | syntax |
| we made this / me and Paul made this | 0.1603300 | 0.3669776 | 447 | syntax |
| where’s my dolly / where’s my dolly name Sam | 0.1093974 | 0.3121932 | 305 | syntax |
| where mommy go / where did mommy go | 0.1822095 | 0.3860863 | 508 | syntax |
| you fix it / can you fix it | 0.1025825 | 0.3034672 | 286 | syntax |
| ed | 0.3589470 | 0.5835291 | 1009 | word_endings |
| ing | 0.6143721 | 0.7237652 | 1727 | word_endings |
| splural | 0.8701530 | 0.8247950 | 2446 | word_endings |
| spossess | 0.9558876 | 0.8877438 | 2687 | word_endings |
| blockses | 0.0211697 | 0.1439757 | 59 | word_endings_nouns |
| childrens | 0.0312164 | 0.1739332 | 87 | word_endings_nouns |
| childs | 0.0093290 | 0.0961526 | 26 | word_endings_nouns |
| feets | 0.1729458 | 0.3782683 | 482 | word_endings_nouns |
| foots | 0.1094367 | 0.3122423 | 305 | word_endings_nouns |
| mans | 0.0940079 | 0.2918921 | 262 | word_endings_nouns |
| mens | 0.0200933 | 0.1403446 | 56 | word_endings_nouns |
| mices | 0.0114819 | 0.1065557 | 32 | word_endings_nouns |
| mouses | 0.0606387 | 0.2387092 | 169 | word_endings_nouns |
| shoeses | 0.0782484 | 0.2686103 | 218 | word_endings_nouns |
| sockses | 0.0692501 | 0.2539245 | 193 | word_endings_nouns |
| teeths | 0.1395766 | 0.3466094 | 389 | word_endings_nouns |
| toeses | 0.0581270 | 0.2340255 | 162 | word_endings_nouns |
| tooths | 0.0534625 | 0.2249943 | 149 | word_endings_nouns |
| ated | 0.0200933 | 0.1403446 | 56 | word_endings_verbs |
| blewed | 0.0118407 | 0.1081882 | 33 | word_endings_verbs |
| blowed | 0.1115895 | 0.3149173 | 311 | word_endings_verbs |
| breaked | 0.0696089 | 0.2545324 | 194 | word_endings_verbs |
| bringed | 0.0487980 | 0.2154841 | 136 | word_endings_verbs |
| broked | 0.0900610 | 0.2863205 | 251 | word_endings_verbs |
| buyed | 0.0617151 | 0.2406806 | 172 | word_endings_verbs |
| camed | 0.0107643 | 0.1032096 | 30 | word_endings_verbs |
| comed | 0.0186581 | 0.1353385 | 52 | word_endings_verbs |
| doed | 0.0448511 | 0.2070141 | 125 | word_endings_verbs |
| dranked | 0.0276283 | 0.1639347 | 77 | word_endings_verbs |
| drinked | 0.0843201 | 0.2779171 | 235 | word_endings_verbs |
| eated | 0.0559742 | 0.2299131 | 156 | word_endings_verbs |
| falled | 0.0961607 | 0.2948645 | 268 | word_endings_verbs |
| flied | 0.0211697 | 0.1439757 | 59 | word_endings_verbs |
| getted | 0.0211697 | 0.1439757 | 59 | word_endings_verbs |
| goed | 0.0487980 | 0.2154841 | 136 | word_endings_verbs |
| gotted | 0.0125583 | 0.1113779 | 35 | word_endings_verbs |
| haved | 0.0143524 | 0.1189598 | 40 | word_endings_verbs |
| heared | 0.0200933 | 0.1403446 | 56 | word_endings_verbs |
| holded | 0.0362397 | 0.1869195 | 101 | word_endings_verbs |
| losed | 0.0211697 | 0.1439757 | 59 | word_endings_verbs |
| losted | 0.0165052 | 0.1274308 | 46 | word_endings_verbs |
| maked | 0.0423394 | 0.2013985 | 118 | word_endings_verbs |
| ranned | 0.0193757 | 0.1378662 | 54 | word_endings_verbs |
| runned | 0.0441335 | 0.2054285 | 123 | word_endings_verbs |
| satted | 0.0060997 | 0.0778763 | 17 | word_endings_verbs |
| seed | 0.0279871 | 0.1649654 | 78 | word_endings_verbs |
| sitted | 0.0258342 | 0.1586690 | 72 | word_endings_verbs |
| taked | 0.0405454 | 0.1972699 | 113 | word_endings_verbs |
| wented | 0.0086114 | 0.0924138 | 24 | word_endings_verbs |
| children | 0.1302943 | 0.3366874 | 363 | word_forms_nouns |
| feet | 0.5681981 | 0.4954161 | 1583 | word_forms_nouns |
| men | 0.1162958 | 0.3206368 | 324 | word_forms_nouns |
| mice | 0.0972721 | 0.2963811 | 271 | word_forms_nouns |
| teeth | 0.6977746 | 0.4593048 | 1944 | word_forms_nouns |
| ate | 0.2882268 | 0.4530185 | 803 | word_forms_verbs |
| blew | 0.1335248 | 0.3402021 | 372 | word_forms_verbs |
| bought | 0.1166547 | 0.3210660 | 325 | word_forms_verbs |
| broke | 0.4605169 | 0.4985281 | 1283 | word_forms_verbs |
| came | 0.1148600 | 0.3189102 | 320 | word_forms_verbs |
| drank | 0.1765973 | 0.3813960 | 492 | word_forms_verbs |
| drove | 0.0717875 | 0.2581821 | 200 | word_forms_verbs |
| fell | 0.3646805 | 0.4814269 | 1016 | word_forms_verbs |
| flew | 0.1105528 | 0.3136338 | 308 | word_forms_verbs |
| got | 0.2781766 | 0.4481813 | 775 | word_forms_verbs |
| had | 0.1256281 | 0.3314893 | 350 | word_forms_verbs |
| heard | 0.1127064 | 0.3162903 | 314 | word_forms_verbs |
| held | 0.0678392 | 0.2515149 | 189 | word_forms_verbs |
| lost | 0.2333094 | 0.4230134 | 650 | word_forms_verbs |
| made | 0.2132089 | 0.4096475 | 594 | word_forms_verbs |
| ran | 0.1819813 | 0.3858984 | 507 | word_forms_verbs |
| sat | 0.1593683 | 0.3660849 | 444 | word_forms_verbs |
| saw | 0.1870065 | 0.3899867 | 521 | word_forms_verbs |
| took | 0.1679828 | 0.3739181 | 468 | word_forms_verbs |
| went | 0.1719311 | 0.3773883 | 479 | word_forms_verbs |
Complex %>%
filter(!is.na(out)) %>%
group_by(definition) %>%
summarise(
mean=mean(out),
category = first(complexity_category),
cat2 = str_replace(category, "_", " " ) %>% str_replace("_", "\n" )
) %>%
ggplot(aes(x=cat2, y= mean)) + geom_boxplot()
It seems like the word endings and the combine words variable have higher means than the morphological and syntactic complexity items (though they are ordinal, but when I treated these as binary in an older version the results were the same). The over-regularization items have very low means, and the word forms looks a bit like the syntactic and morphological complexity.
One difficulty I just thought of is that the we would expect the word form and over-regularizations to be non-monotonially related to overall grammatical proficiency, given the inverse u shaped curve we typically see in the development of morphology.
Therefore, I’m going to first add the combine item and the word endings variables, see how that changes the results of the IRT model, and then plot the over-regularization and word form scores against this, to see if we see the non-monotonicity.
Prepare data set for IRT modeling. I’m going to re-name each item to its category (morphology or syntax) and its item number so as to make some graphs easier to read.
Complex_short_with_ids <- Complex %>%
dplyr::select(data_id, value, out, complexity_category, num_item_id) %>%
mutate(
label = str_c(complexity_category, num_item_id)
) %>%
pivot_wider(id_cols=data_id, names_from = "label", values_from="out") %>%
drop_na()
Complex_short <- Complex_short_with_ids %>%
dplyr::select(starts_with(c("combine", "morphology", "syntax")), c("word_endings686", "word_endings687", "word_endings688", "word_endings689")) # dataset for IRT can't have IDs
Quick test of the possibility that
Complex_short %>%
filter(combine760==0) %>%
dplyr::select(word_endings686, word_endings687, word_endings688, word_endings689) %>%
mutate(
total = word_endings686 + word_endings687 + word_endings688 + word_endings689,
morph = ifelse(total == 0, yes = 0, no =1)
) %>%
group_by(morph) %>%
count()
So, there are 600 kids with a 0 on combines words. Of those, 107 kids are producing morphological forms. I think it makes sense to be conservative about who’s grammatical and exclude those 107 kids. I’m going to make two datasets for the IRT models: one including all participants and one including those who met our stringent definition of grammatical.
Complex_short_grammatical <- Complex_short %>%
filter(combine760 > 0) %>%
dplyr::select(starts_with(c("morphology", "syntax")))
Complex_short_all <- Complex_short %>%
dplyr::select(starts_with(c("morphology", "syntax")))
Prior to fitting the IRT model, it’s worth looking at the correlation structure of the data to get a sense of the possible dimensionality.
Create tetrachoric correlation matrix (for binary data). I created a vector with labels, for labeling of the correlation plot.
Complex_tetra <- tetrachoric(Complex_short_grammatical)
rho <- Complex_tetra$rho
lab = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37") # create labels for corPlot because the defintions are long and take up a ton of space
Check out the correlations
corPlot(rho, labels=lab)
They look really high. Let’s examine the dimensionality, using princals, which is basically just a version of PCA that is appropriate for categorical data.
pc <- princals(rho)
plot(pc)
fa.parallel(rho, fa="fa", cor="tetra", n.obs = 2185)
## Parallel analysis suggests that the number of factors = 6 and the number of components = NA
vss(rho, cor="tetra", n.obs = 2185)
##
## Very Simple Structure
## Call: vss(x = rho, n.obs = 2185, cor = "tetra")
## VSS complexity 1 achieves a maximimum of 1 with 1 factors
## VSS complexity 2 achieves a maximimum of 1 with 2 factors
##
## The Velicer MAP achieves a minimum of 0.02 with 4 factors
## BIC achieves a minimum of 10758.74 with 8 factors
## Sample Size adjusted BIC achieves a minimum of 12023.25 with 8 factors
##
## Statistics by number of factors
## vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
## 1 1.00 0.00 0.024 629 29234 0 3.38 1 0.14 24397 26396 1.0 3228
## 2 0.50 1.00 0.017 593 23749 0 2.18 1 0.13 19189 21073 1.8 1525
## 3 0.36 0.80 0.016 558 21230 0 1.77 1 0.13 16939 18712 2.4 1075
## 4 0.38 0.72 0.016 524 19000 0 1.46 1 0.13 14971 16635 2.8 770
## 5 0.40 0.74 0.016 491 17308 0 1.27 1 0.13 13533 15093 3.1 600
## 6 0.40 0.66 0.017 459 15841 0 1.11 1 0.12 12311 13770 3.2 466
## 7 0.40 0.66 0.019 428 14898 0 1.00 1 0.12 11607 12966 3.3 393
## 8 0.40 0.67 0.020 398 13819 0 0.92 1 0.12 10759 12023 3.4 339
## SRMR eCRMS eBIC
## 1 0.033 0.034 -1608
## 2 0.023 0.024 -3035
## 3 0.019 0.021 -3216
## 4 0.016 0.018 -3259
## 5 0.014 0.017 -3176
## 6 0.013 0.015 -3063
## 7 0.012 0.015 -2898
## 8 0.011 0.014 -2721
Complex_tetra <- tetrachoric(Complex_short_all)
rho <- Complex_tetra$rho
lab = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37") # create labels for corPlot because the defintions are long and take up a ton of space
Check out the correlations
corPlot(rho, labels=lab)
pc <- princals(rho)
plot(pc)
fa.parallel(rho, fa="fa", cor="tetra", n.obs = 2785)
## Parallel analysis suggests that the number of factors = 5 and the number of components = NA
vss(rho, cor="tetra", n.obs = 2785)
##
## Very Simple Structure
## Call: vss(x = rho, n.obs = 2785, cor = "tetra")
## VSS complexity 1 achieves a maximimum of 1 with 1 factors
## VSS complexity 2 achieves a maximimum of 1 with 2 factors
##
## The Velicer MAP achieves a minimum of 0.02 with 4 factors
## BIC achieves a minimum of 17923.79 with 8 factors
## Sample Size adjusted BIC achieves a minimum of 19188.37 with 8 factors
##
## Statistics by number of factors
## vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
## 1 1.00 0.00 0.026 629 42035 0 2.18 1 0.15 37046 39044 1.0 2778
## 2 0.47 1.00 0.018 593 33909 0 1.36 1 0.14 29205 31089 1.8 1241
## 3 0.37 0.81 0.017 558 30360 0 1.08 1 0.14 25934 27707 2.5 827
## 4 0.36 0.74 0.017 524 27416 0 0.93 1 0.14 23259 24924 2.9 629
## 5 0.38 0.77 0.017 491 25227 0 0.81 1 0.13 21332 22892 3.1 488
## 6 0.37 0.71 0.018 459 23369 0 0.72 1 0.13 19728 21186 3.3 393
## 7 0.37 0.72 0.019 428 22262 0 0.63 1 0.14 18867 20227 3.3 332
## 8 0.39 0.71 0.021 398 21081 0 0.57 1 0.14 17924 19188 3.4 283
## SRMR eCRMS eBIC
## 1 0.0274 0.028 -2211
## 2 0.0183 0.019 -3462
## 3 0.0149 0.016 -3599
## 4 0.0130 0.015 -3527
## 5 0.0115 0.013 -3406
## 6 0.0103 0.012 -3248
## 7 0.0095 0.012 -3063
## 8 0.0087 0.011 -2874
It seems like the one factor solution is most plausible for both the full dataset and the dataset with grammatical kids. One obvious difference, though, is that in the princal plot for the grammatical dataset a few items appeared to differ quite differntly from the others. I’m first going to do some IRT with the grammatical-only dataset, and I’m goig to see if those vectors were a quirk of the scaling
m1 <- mirt(Complex_short_grammatical, 1, itemtype="2PL")
##
Iteration: 1, Log-Lik: -29726.902, Max-Change: 3.65875
Iteration: 2, Log-Lik: -26829.133, Max-Change: 1.38662
Iteration: 3, Log-Lik: -26127.530, Max-Change: 1.35030
Iteration: 4, Log-Lik: -25767.866, Max-Change: 1.34218
Iteration: 5, Log-Lik: -25527.145, Max-Change: 1.05650
Iteration: 6, Log-Lik: -25374.650, Max-Change: 0.80281
Iteration: 7, Log-Lik: -25271.544, Max-Change: 0.53019
Iteration: 8, Log-Lik: -25190.421, Max-Change: 0.55317
Iteration: 9, Log-Lik: -25130.397, Max-Change: 0.28782
Iteration: 10, Log-Lik: -25086.075, Max-Change: 0.28687
Iteration: 11, Log-Lik: -25053.856, Max-Change: 0.15719
Iteration: 12, Log-Lik: -25029.561, Max-Change: 0.13709
Iteration: 13, Log-Lik: -25011.154, Max-Change: 0.12037
Iteration: 14, Log-Lik: -24996.952, Max-Change: 0.10614
Iteration: 15, Log-Lik: -24985.837, Max-Change: 0.09665
Iteration: 16, Log-Lik: -24976.939, Max-Change: 0.08788
Iteration: 17, Log-Lik: -24969.695, Max-Change: 0.07843
Iteration: 18, Log-Lik: -24963.683, Max-Change: 0.06964
Iteration: 19, Log-Lik: -24958.612, Max-Change: 0.06168
Iteration: 20, Log-Lik: -24954.269, Max-Change: 0.05164
Iteration: 21, Log-Lik: -24950.690, Max-Change: 0.04895
Iteration: 22, Log-Lik: -24934.779, Max-Change: 0.03626
Iteration: 23, Log-Lik: -24932.504, Max-Change: 0.02689
Iteration: 24, Log-Lik: -24930.893, Max-Change: 0.02507
Iteration: 25, Log-Lik: -24929.365, Max-Change: 0.03278
Iteration: 26, Log-Lik: -24927.441, Max-Change: 0.05386
Iteration: 27, Log-Lik: -24925.696, Max-Change: 0.04181
Iteration: 28, Log-Lik: -24924.883, Max-Change: 0.01743
Iteration: 29, Log-Lik: -24923.451, Max-Change: 0.01977
Iteration: 30, Log-Lik: -24922.263, Max-Change: 0.01537
Iteration: 31, Log-Lik: -24919.883, Max-Change: 0.02046
Iteration: 32, Log-Lik: -24918.849, Max-Change: 0.01077
Iteration: 33, Log-Lik: -24917.848, Max-Change: 0.01255
Iteration: 34, Log-Lik: -24915.394, Max-Change: 0.03068
Iteration: 35, Log-Lik: -24914.476, Max-Change: 0.01890
Iteration: 36, Log-Lik: -24913.566, Max-Change: 0.01321
Iteration: 37, Log-Lik: -24912.006, Max-Change: 0.03461
Iteration: 38, Log-Lik: -24911.366, Max-Change: 0.01318
Iteration: 39, Log-Lik: -24910.641, Max-Change: 0.01266
Iteration: 40, Log-Lik: -24909.272, Max-Change: 0.04133
Iteration: 41, Log-Lik: -24908.737, Max-Change: 0.01215
Iteration: 42, Log-Lik: -24908.140, Max-Change: 0.01312
Iteration: 43, Log-Lik: -24906.558, Max-Change: 0.06214
Iteration: 44, Log-Lik: -24906.148, Max-Change: 0.01115
Iteration: 45, Log-Lik: -24905.668, Max-Change: 0.01094
Iteration: 46, Log-Lik: -24904.610, Max-Change: 0.01640
Iteration: 47, Log-Lik: -24904.253, Max-Change: 0.00936
Iteration: 48, Log-Lik: -24903.867, Max-Change: 0.01109
Iteration: 49, Log-Lik: -24903.176, Max-Change: 0.01024
Iteration: 50, Log-Lik: -24902.819, Max-Change: 0.00991
Iteration: 51, Log-Lik: -24902.483, Max-Change: 0.00970
Iteration: 52, Log-Lik: -24900.623, Max-Change: 0.00989
Iteration: 53, Log-Lik: -24900.390, Max-Change: 0.00807
Iteration: 54, Log-Lik: -24900.127, Max-Change: 0.00698
Iteration: 55, Log-Lik: -24899.622, Max-Change: 0.01109
Iteration: 56, Log-Lik: -24899.394, Max-Change: 0.01440
Iteration: 57, Log-Lik: -24899.168, Max-Change: 0.00860
Iteration: 58, Log-Lik: -24898.674, Max-Change: 0.01258
Iteration: 59, Log-Lik: -24898.462, Max-Change: 0.00744
Iteration: 60, Log-Lik: -24898.262, Max-Change: 0.00746
Iteration: 61, Log-Lik: -24897.649, Max-Change: 0.00640
Iteration: 62, Log-Lik: -24897.467, Max-Change: 0.01172
Iteration: 63, Log-Lik: -24897.297, Max-Change: 0.00586
Iteration: 64, Log-Lik: -24897.121, Max-Change: 0.00802
Iteration: 65, Log-Lik: -24896.963, Max-Change: 0.00839
Iteration: 66, Log-Lik: -24896.802, Max-Change: 0.00571
Iteration: 67, Log-Lik: -24896.542, Max-Change: 0.00858
Iteration: 68, Log-Lik: -24896.388, Max-Change: 0.00807
Iteration: 69, Log-Lik: -24896.242, Max-Change: 0.00551
Iteration: 70, Log-Lik: -24896.022, Max-Change: 0.00786
Iteration: 71, Log-Lik: -24895.880, Max-Change: 0.00813
Iteration: 72, Log-Lik: -24895.749, Max-Change: 0.00601
Iteration: 73, Log-Lik: -24895.467, Max-Change: 0.00750
Iteration: 74, Log-Lik: -24895.344, Max-Change: 0.00620
Iteration: 75, Log-Lik: -24895.223, Max-Change: 0.00671
Iteration: 76, Log-Lik: -24894.918, Max-Change: 0.00702
Iteration: 77, Log-Lik: -24894.809, Max-Change: 0.00755
Iteration: 78, Log-Lik: -24894.699, Max-Change: 0.00522
Iteration: 79, Log-Lik: -24894.525, Max-Change: 0.00767
Iteration: 80, Log-Lik: -24894.424, Max-Change: 0.00676
Iteration: 81, Log-Lik: -24894.324, Max-Change: 0.00551
Iteration: 82, Log-Lik: -24894.084, Max-Change: 0.00942
Iteration: 83, Log-Lik: -24893.991, Max-Change: 0.00634
Iteration: 84, Log-Lik: -24893.904, Max-Change: 0.00617
Iteration: 85, Log-Lik: -24893.751, Max-Change: 0.00439
Iteration: 86, Log-Lik: -24893.669, Max-Change: 0.00604
Iteration: 87, Log-Lik: -24893.593, Max-Change: 0.00527
Iteration: 88, Log-Lik: -24893.461, Max-Change: 0.00613
Iteration: 89, Log-Lik: -24893.389, Max-Change: 0.00697
Iteration: 90, Log-Lik: -24893.318, Max-Change: 0.00497
Iteration: 91, Log-Lik: -24893.226, Max-Change: 0.00989
Iteration: 92, Log-Lik: -24893.161, Max-Change: 0.00471
Iteration: 93, Log-Lik: -24893.099, Max-Change: 0.00507
Iteration: 94, Log-Lik: -24892.997, Max-Change: 0.00490
Iteration: 95, Log-Lik: -24892.943, Max-Change: 0.00672
Iteration: 96, Log-Lik: -24892.886, Max-Change: 0.00398
Iteration: 97, Log-Lik: -24892.819, Max-Change: 0.00904
Iteration: 98, Log-Lik: -24892.764, Max-Change: 0.00747
Iteration: 99, Log-Lik: -24892.717, Max-Change: 0.01030
Iteration: 100, Log-Lik: -24892.672, Max-Change: 0.00691
Iteration: 101, Log-Lik: -24892.621, Max-Change: 0.01048
Iteration: 102, Log-Lik: -24892.572, Max-Change: 0.00901
Iteration: 103, Log-Lik: -24892.535, Max-Change: 0.01022
Iteration: 104, Log-Lik: -24892.489, Max-Change: 0.00571
Iteration: 105, Log-Lik: -24892.446, Max-Change: 0.00389
Iteration: 106, Log-Lik: -24892.390, Max-Change: 0.00296
Iteration: 107, Log-Lik: -24892.351, Max-Change: 0.00293
Iteration: 108, Log-Lik: -24892.313, Max-Change: 0.00287
Iteration: 109, Log-Lik: -24892.101, Max-Change: 0.00271
Iteration: 110, Log-Lik: -24892.071, Max-Change: 0.00235
Iteration: 111, Log-Lik: -24892.042, Max-Change: 0.00240
Iteration: 112, Log-Lik: -24891.883, Max-Change: 0.00236
Iteration: 113, Log-Lik: -24891.860, Max-Change: 0.00219
Iteration: 114, Log-Lik: -24891.839, Max-Change: 0.00214
Iteration: 115, Log-Lik: -24891.721, Max-Change: 0.00207
Iteration: 116, Log-Lik: -24891.704, Max-Change: 0.00181
Iteration: 117, Log-Lik: -24891.688, Max-Change: 0.00198
Iteration: 118, Log-Lik: -24891.602, Max-Change: 0.00151
Iteration: 119, Log-Lik: -24891.588, Max-Change: 0.00151
Iteration: 120, Log-Lik: -24891.575, Max-Change: 0.00118
Iteration: 121, Log-Lik: -24891.560, Max-Change: 0.00130
Iteration: 122, Log-Lik: -24891.552, Max-Change: 0.00203
Iteration: 123, Log-Lik: -24891.540, Max-Change: 0.00171
Iteration: 124, Log-Lik: -24891.478, Max-Change: 0.00167
Iteration: 125, Log-Lik: -24891.470, Max-Change: 0.00144
Iteration: 126, Log-Lik: -24891.462, Max-Change: 0.00104
Iteration: 127, Log-Lik: -24891.452, Max-Change: 0.00107
Iteration: 128, Log-Lik: -24891.447, Max-Change: 0.00103
Iteration: 129, Log-Lik: -24891.442, Max-Change: 0.00097
Iteration: 130, Log-Lik: -24891.416, Max-Change: 0.00101
Iteration: 131, Log-Lik: -24891.412, Max-Change: 0.00109
Iteration: 132, Log-Lik: -24891.407, Max-Change: 0.00076
Iteration: 133, Log-Lik: -24891.401, Max-Change: 0.00083
Iteration: 134, Log-Lik: -24891.398, Max-Change: 0.00081
Iteration: 135, Log-Lik: -24891.395, Max-Change: 0.00079
Iteration: 136, Log-Lik: -24891.377, Max-Change: 0.00071
Iteration: 137, Log-Lik: -24891.375, Max-Change: 0.00070
Iteration: 138, Log-Lik: -24891.372, Max-Change: 0.00069
Iteration: 139, Log-Lik: -24891.359, Max-Change: 0.00142
Iteration: 140, Log-Lik: -24891.355, Max-Change: 0.00118
Iteration: 141, Log-Lik: -24891.353, Max-Change: 0.00109
Iteration: 142, Log-Lik: -24891.347, Max-Change: 0.00098
Iteration: 143, Log-Lik: -24891.346, Max-Change: 0.00128
Iteration: 144, Log-Lik: -24891.343, Max-Change: 0.00104
Iteration: 145, Log-Lik: -24891.339, Max-Change: 0.00120
Iteration: 146, Log-Lik: -24891.337, Max-Change: 0.00050
Iteration: 147, Log-Lik: -24891.336, Max-Change: 0.00046
Iteration: 148, Log-Lik: -24891.330, Max-Change: 0.00035
Iteration: 149, Log-Lik: -24891.329, Max-Change: 0.00037
Iteration: 150, Log-Lik: -24891.328, Max-Change: 0.00037
Iteration: 151, Log-Lik: -24891.324, Max-Change: 0.00034
Iteration: 152, Log-Lik: -24891.324, Max-Change: 0.00096
Iteration: 153, Log-Lik: -24891.322, Max-Change: 0.00079
Iteration: 154, Log-Lik: -24891.321, Max-Change: 0.00075
Iteration: 155, Log-Lik: -24891.320, Max-Change: 0.00031
Iteration: 156, Log-Lik: -24891.320, Max-Change: 0.00088
Iteration: 157, Log-Lik: -24891.319, Max-Change: 0.00121
Iteration: 158, Log-Lik: -24891.318, Max-Change: 0.00051
Iteration: 159, Log-Lik: -24891.318, Max-Change: 0.00027
Iteration: 160, Log-Lik: -24891.318, Max-Change: 0.00056
Iteration: 161, Log-Lik: -24891.317, Max-Change: 0.00055
Iteration: 162, Log-Lik: -24891.316, Max-Change: 0.00052
Iteration: 163, Log-Lik: -24891.313, Max-Change: 0.00099
Iteration: 164, Log-Lik: -24891.312, Max-Change: 0.00040
Iteration: 165, Log-Lik: -24891.312, Max-Change: 0.00021
Iteration: 166, Log-Lik: -24891.312, Max-Change: 0.00036
Iteration: 167, Log-Lik: -24891.312, Max-Change: 0.00037
Iteration: 168, Log-Lik: -24891.311, Max-Change: 0.00062
Iteration: 169, Log-Lik: -24891.311, Max-Change: 0.00021
Iteration: 170, Log-Lik: -24891.311, Max-Change: 0.00044
Iteration: 171, Log-Lik: -24891.311, Max-Change: 0.00021
Iteration: 172, Log-Lik: -24891.311, Max-Change: 0.00019
Iteration: 173, Log-Lik: -24891.310, Max-Change: 0.00052
Iteration: 174, Log-Lik: -24891.310, Max-Change: 0.00049
Iteration: 175, Log-Lik: -24891.310, Max-Change: 0.00024
Iteration: 176, Log-Lik: -24891.310, Max-Change: 0.00064
Iteration: 177, Log-Lik: -24891.310, Max-Change: 0.00072
Iteration: 178, Log-Lik: -24891.310, Max-Change: 0.00023
Iteration: 179, Log-Lik: -24891.310, Max-Change: 0.00062
Iteration: 180, Log-Lik: -24891.309, Max-Change: 0.00071
Iteration: 181, Log-Lik: -24891.309, Max-Change: 0.00023
Iteration: 182, Log-Lik: -24891.309, Max-Change: 0.00059
Iteration: 183, Log-Lik: -24891.309, Max-Change: 0.00069
Iteration: 184, Log-Lik: -24891.309, Max-Change: 0.00022
Iteration: 185, Log-Lik: -24891.309, Max-Change: 0.00057
Iteration: 186, Log-Lik: -24891.309, Max-Change: 0.00013
Iteration: 187, Log-Lik: -24891.309, Max-Change: 0.00057
Iteration: 188, Log-Lik: -24891.309, Max-Change: 0.00023
Iteration: 189, Log-Lik: -24891.309, Max-Change: 0.00060
Iteration: 190, Log-Lik: -24891.308, Max-Change: 0.00024
Iteration: 191, Log-Lik: -24891.308, Max-Change: 0.00054
Iteration: 192, Log-Lik: -24891.308, Max-Change: 0.00022
Iteration: 193, Log-Lik: -24891.308, Max-Change: 0.00019
Iteration: 194, Log-Lik: -24891.308, Max-Change: 0.00050
Iteration: 195, Log-Lik: -24891.308, Max-Change: 0.00012
Iteration: 196, Log-Lik: -24891.308, Max-Change: 0.00051
Iteration: 197, Log-Lik: -24891.308, Max-Change: 0.00020
Iteration: 198, Log-Lik: -24891.308, Max-Change: 0.00053
Iteration: 199, Log-Lik: -24891.308, Max-Change: 0.00022
Iteration: 200, Log-Lik: -24891.308, Max-Change: 0.00049
Iteration: 201, Log-Lik: -24891.308, Max-Change: 0.00019
Iteration: 202, Log-Lik: -24891.308, Max-Change: 0.00017
Iteration: 203, Log-Lik: -24891.308, Max-Change: 0.00044
Iteration: 204, Log-Lik: -24891.308, Max-Change: 0.00011
Iteration: 205, Log-Lik: -24891.308, Max-Change: 0.00046
Iteration: 206, Log-Lik: -24891.308, Max-Change: 0.00018
Iteration: 207, Log-Lik: -24891.308, Max-Change: 0.00047
Iteration: 208, Log-Lik: -24891.308, Max-Change: 0.00020
Iteration: 209, Log-Lik: -24891.308, Max-Change: 0.00045
Iteration: 210, Log-Lik: -24891.308, Max-Change: 0.00017
Iteration: 211, Log-Lik: -24891.308, Max-Change: 0.00015
Iteration: 212, Log-Lik: -24891.308, Max-Change: 0.00040
Iteration: 213, Log-Lik: -24891.308, Max-Change: 0.00010
m1
##
## Call:
## mirt(data = Complex_short_grammatical, model = 1, itemtype = "2PL")
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 213 EM iterations.
## mirt version: 1.34
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -24891.31
## Estimated parameters: 74
## AIC = 49930.62; AICc = 49935.88
## BIC = 50351.63; SABIC = 50116.52
## G2 (1e+10) = 24028.81, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
summary(m1)
## F1 h2
## morphology761 0.888 0.789
## morphology762 0.838 0.703
## morphology763 0.845 0.714
## morphology764 0.878 0.771
## morphology765 0.909 0.826
## morphology766 0.831 0.690
## morphology767 0.852 0.726
## morphology768 0.866 0.750
## morphology769 0.868 0.754
## morphology770 0.910 0.829
## morphology771 0.930 0.866
## morphology772 0.897 0.805
## syntax773 0.936 0.876
## syntax774 0.908 0.824
## syntax775 0.925 0.856
## syntax776 0.933 0.870
## syntax777 0.933 0.870
## syntax778 0.916 0.839
## syntax779 0.854 0.730
## syntax780 0.930 0.864
## syntax781 0.901 0.812
## syntax782 0.906 0.821
## syntax783 0.937 0.878
## syntax784 0.925 0.856
## syntax785 0.955 0.912
## syntax786 0.952 0.907
## syntax787 0.945 0.892
## syntax788 0.917 0.840
## syntax789 0.910 0.827
## syntax790 0.956 0.914
## syntax791 0.964 0.930
## syntax792 0.922 0.851
## syntax793 0.950 0.903
## syntax794 0.901 0.812
## syntax795 0.877 0.769
## syntax796 0.952 0.907
## syntax797 0.932 0.869
##
## SS loadings: 30.654
## Proportion Var: 0.828
##
## Factor correlations:
##
## F1
## F1 1
Let’s check the overall fit of the model. The chi square statistic here isn’t super meaningful given the sample size.
M2(m1)
itemfit(m1)
I’m going to use a p-value of .10 as a cut off, to be conservative about false negatives.
misfit <- itemfit(m1) %>% # Get labels of mis-fitting items.
filter(p.S_X2 <= .10) %>%
dplyr::select(item) %>%
as.vector()
items_good <- dplyr::select(Complex_short_grammatical, -all_of(misfit$item)) # Well fitting items
items_bad <- dplyr::select(Complex_short_grammatical, all_of(misfit$item)) # Poorly fitting items
mod_fit <- mirt(items_good, 1, "2PL", verbose=FALSE) # Calculate factor scores using only the well fitting items.
Theta <- fscores(mod_fit)
IG762<- itemGAM(items_bad$morphology762, Theta)
IG764<- itemGAM(items_bad$morphology764, Theta)
IG771<- itemGAM(items_bad$morphology771, Theta)
IG783<- itemGAM(items_bad$syntax783, Theta)
IG792<- itemGAM(items_bad$syntax792, Theta)
plot(IG762); plot(IG764); plot(IG771)
plot(IG783); plot(IG792)
Look at the discrimination and difficulty parameters
coefs_2pl <- coef(m1, as.data.frame = TRUE) %>%
t() %>%
as_tibble() %>%
dplyr::select(-c(149:150)) %>%
pivot_longer(everything()) %>%
tidyr::separate(, col=name, into=c("item", "parameter"), sep="([.])") %>%
pivot_wider(id_cols=item, names_from=parameter, values_from=value) %>%
dplyr::select(item, a1, d) %>%
mutate(
category = gsub('[[:digit:]]+', '', item)
)
ggplot(coefs_2pl,
aes(x = a1, y = d)) +
geom_point(alpha = .3) +
ggrepel::geom_text_repel(data = coefs_2pl,
aes(label = item), size = 3) +
xlab("Discrimination") +
ylab("Difficulty") + theme_minimal()
ggplot(coefs_2pl, aes(x=a1, fill=category)) + geom_histogram() + theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(coefs_2pl, aes(x=d, fill=category)) + geom_histogram() + theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Next let’s look at the relationship between the true scores and the raw scores.
true_raw <- Complex_short_grammatical %>%
mutate(
Raw = rowSums(.[,1:37]),
Theta = fscores(m1, method="MAP")
) %>%
ggplot(aes(x=Theta, y=Raw)) + geom_point() + stat_smooth(method="loess") + theme_minimal()
raw_score <- Complex_short_grammatical %>%
mutate(
Raw = rowSums(.[,1:37])
) %>%
ggplot(aes(x=Raw)) + geom_histogram() + theme_minimal()
true_score <- Complex_short_grammatical %>%
mutate(
Theta = fscores(m1, method="MAP")
) %>%
ggplot(aes(x=Theta)) + geom_histogram() + theme_minimal()
library(patchwork)
true_raw/(true_score + raw_score)
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We still have a bunch of 0s.
It’s important to remember that these latent variables are only interpretable if the following conditions hold: 1. The assumption of unidimensionality is appropriate. 2. The assumed distributional form of the latent variable is correct. 3. Measurement invariance holds: the items behave the same way across different groups of participants.
Exploratory 2 dimensional IRT.
m2 <- mirt(Complex_short_grammatical, 2, itemtype="2PL", verbose=FALSE)
M2(m2)
anova(m1, m2)
##
## Model 1: mirt(data = Complex_short_grammatical, model = 1, itemtype = "2PL")
## Model 2: mirt(data = Complex_short_grammatical, model = 2, itemtype = "2PL",
## verbose = FALSE)
summary(m2, "oblimin", suppress=.20)
##
## Rotation: oblimin
##
## Rotated factor loadings:
##
## F1 F2 h2
## morphology761 -0.828 -0.268 0.827
## morphology762 -0.795 NA 0.718
## morphology763 -0.748 -0.418 0.834
## morphology764 -0.838 NA 0.779
## morphology765 -0.889 NA 0.817
## morphology766 -0.797 NA 0.696
## morphology767 -0.801 -0.222 0.747
## morphology768 -0.844 NA 0.751
## morphology769 -0.790 -0.353 0.838
## morphology770 -0.876 NA 0.846
## morphology771 -0.894 NA 0.892
## morphology772 -0.859 NA 0.825
## syntax773 -0.924 NA 0.871
## syntax774 -0.866 -0.200 0.845
## syntax775 -0.910 NA 0.843
## syntax776 -0.924 NA 0.858
## syntax777 -0.922 NA 0.858
## syntax778 -0.914 NA 0.829
## syntax779 -0.857 NA 0.719
## syntax780 -0.898 NA 0.873
## syntax781 -0.916 NA 0.819
## syntax782 -0.888 NA 0.811
## syntax783 -0.945 NA 0.875
## syntax784 -0.928 NA 0.849
## syntax785 -0.950 NA 0.902
## syntax786 -0.968 NA 0.913
## syntax787 -0.963 NA 0.904
## syntax788 -0.939 0.222 0.865
## syntax789 -0.925 NA 0.834
## syntax790 -0.967 NA 0.915
## syntax791 -0.965 NA 0.926
## syntax792 -0.936 NA 0.855
## syntax793 -0.962 NA 0.906
## syntax794 -0.916 NA 0.817
## syntax795 -0.894 NA 0.779
## syntax796 -0.968 NA 0.913
## syntax797 -0.949 NA 0.878
##
## Rotated SS loadings: 29.829 0.988
##
## Factor correlations:
##
## F1 F2
## F1 1.000 0.159
## F2 0.159 1.000
Complex_short_grammatical %>%
mutate(
Raw = rowSums(.[,1:37]),
Theta1 = fscores(m2, method="MAP")[,1],
Theta2 = fscores(m2, method="MAP")[,2]
) %>%
dplyr::select(Raw, Theta1, Theta2) %>%
ggpairs()
## Warning: The following factor score estimates failed to converge successfully:
## 1495
## Warning: The following factor score estimates failed to converge successfully:
## 1495
Could this be due to morphological vs syntactic complexity items?
model.1 <- mirt.model('
F1 = 1 - 12
F2 = 13 - 37
COV=F1*F2')
m3 <- mirt(Complex_short_grammatical, model.1, "2PL", verbose=FALSE)
M2(m3)
summary(m3)
## F1 F2 h2
## morphology761 0.906 0.000 0.821
## morphology762 0.846 0.000 0.716
## morphology763 0.859 0.000 0.738
## morphology764 0.881 0.000 0.776
## morphology765 0.900 0.000 0.811
## morphology766 0.832 0.000 0.692
## morphology767 0.855 0.000 0.731
## morphology768 0.864 0.000 0.747
## morphology769 0.882 0.000 0.778
## morphology770 0.922 0.000 0.851
## morphology771 0.951 0.000 0.904
## morphology772 0.907 0.000 0.823
## syntax773 0.000 0.925 0.856
## syntax774 0.000 0.888 0.788
## syntax775 0.000 0.908 0.825
## syntax776 0.000 0.919 0.845
## syntax777 0.000 0.919 0.845
## syntax778 0.000 0.901 0.812
## syntax779 0.000 0.835 0.698
## syntax780 0.000 0.915 0.838
## syntax781 0.000 0.891 0.794
## syntax782 0.000 0.890 0.792
## syntax783 0.000 0.926 0.858
## syntax784 0.000 0.912 0.833
## syntax785 0.000 0.944 0.892
## syntax786 0.000 0.943 0.889
## syntax787 0.000 0.938 0.880
## syntax788 0.000 0.908 0.824
## syntax789 0.000 0.897 0.805
## syntax790 0.000 0.951 0.904
## syntax791 0.000 0.959 0.919
## syntax792 0.000 0.913 0.834
## syntax793 0.000 0.943 0.889
## syntax794 0.000 0.889 0.790
## syntax795 0.000 0.861 0.741
## syntax796 0.000 0.943 0.890
## syntax797 0.000 0.922 0.850
##
## SS loadings: 9.388 20.889
## Proportion Var: 0.254 0.565
##
## Factor correlations:
##
## F1 F2
## F1 1.000 0.951
## F2 0.951 1.000
M2(m3)
anova(m3, m2)
##
## Model 1: mirt(data = Complex_short_grammatical, model = model.1, itemtype = "2PL",
## verbose = FALSE)
## Model 2: mirt(data = Complex_short_grammatical, model = 2, itemtype = "2PL",
## verbose = FALSE)
anova(m3, m1)
##
## Model 1: mirt(data = Complex_short_grammatical, model = 1, itemtype = "2PL")
## Model 2: mirt(data = Complex_short_grammatical, model = model.1, itemtype = "2PL",
## verbose = FALSE)
m1_nonpar <- mirt(Complex_short_grammatical, 1, itemtype="2PL", dentype = "EH", verbose=FALSE)
M2(m1_nonpar)
Complex_short_grammatical %>%
mutate(
Theta_EH = fscores(m1_nonpar, method="MAP"),
Theta_gauss = fscores(m1, method="MAP")
) %>%
ggplot(aes(x=Theta_EH, y=Theta_gauss)) + geom_point() + stat_smooth(method="loess") + theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
m1_nonpar <- mirt(Complex_short_grammatical, 1, itemtype="2PL", dentype = "EHW", verbose=FALSE)
## EM cycles terminated after 2000 iterations.
M2(m1_nonpar)
Complex_short_grammatical %>%
mutate(
Theta_EH = fscores(m1_nonpar, method="MAP"),
Theta_gauss = fscores(m1, method="MAP")
) %>%
ggplot(aes(x=Theta_EH, y=Theta_gauss)) + geom_point() + stat_smooth(method="loess") + theme_minimal()
## Warning: The following factor score estimates failed to converge successfully:
## 552
## `geom_smooth()` using formula 'y ~ x'
m4 <- mirt(Complex_short_all, 1, itemtype="2PL")
##
Iteration: 1, Log-Lik: -32673.571, Max-Change: 3.39156
Iteration: 2, Log-Lik: -28573.860, Max-Change: 1.45184
Iteration: 3, Log-Lik: -27579.333, Max-Change: 1.20998
Iteration: 4, Log-Lik: -27108.269, Max-Change: 1.16367
Iteration: 5, Log-Lik: -26731.071, Max-Change: 0.81377
Iteration: 6, Log-Lik: -26481.512, Max-Change: 0.79194
Iteration: 7, Log-Lik: -26320.450, Max-Change: 0.56231
Iteration: 8, Log-Lik: -26197.489, Max-Change: 0.42376
Iteration: 9, Log-Lik: -26108.997, Max-Change: 0.28902
Iteration: 10, Log-Lik: -26053.463, Max-Change: 0.20263
Iteration: 11, Log-Lik: -26009.752, Max-Change: 0.18188
Iteration: 12, Log-Lik: -25975.265, Max-Change: 0.16999
Iteration: 13, Log-Lik: -25951.276, Max-Change: 0.14602
Iteration: 14, Log-Lik: -25932.104, Max-Change: 0.14039
Iteration: 15, Log-Lik: -25916.300, Max-Change: 0.14633
Iteration: 16, Log-Lik: -25903.433, Max-Change: 0.09503
Iteration: 17, Log-Lik: -25893.454, Max-Change: 0.10048
Iteration: 18, Log-Lik: -25885.359, Max-Change: 0.09405
Iteration: 19, Log-Lik: -25878.450, Max-Change: 0.08033
Iteration: 20, Log-Lik: -25872.658, Max-Change: 0.07240
Iteration: 21, Log-Lik: -25867.796, Max-Change: 0.05793
Iteration: 22, Log-Lik: -25863.672, Max-Change: 0.05368
Iteration: 23, Log-Lik: -25860.145, Max-Change: 0.06178
Iteration: 24, Log-Lik: -25856.842, Max-Change: 0.07034
Iteration: 25, Log-Lik: -25847.319, Max-Change: 0.06064
Iteration: 26, Log-Lik: -25844.447, Max-Change: 0.08175
Iteration: 27, Log-Lik: -25842.350, Max-Change: 0.02595
Iteration: 28, Log-Lik: -25840.981, Max-Change: 0.02959
Iteration: 29, Log-Lik: -25838.769, Max-Change: 0.02840
Iteration: 30, Log-Lik: -25836.628, Max-Change: 0.04417
Iteration: 31, Log-Lik: -25834.074, Max-Change: 0.04628
Iteration: 32, Log-Lik: -25831.996, Max-Change: 0.05240
Iteration: 33, Log-Lik: -25830.087, Max-Change: 0.03887
Iteration: 34, Log-Lik: -25828.711, Max-Change: 0.03935
Iteration: 35, Log-Lik: -25826.875, Max-Change: 0.02643
Iteration: 36, Log-Lik: -25825.215, Max-Change: 0.03514
Iteration: 37, Log-Lik: -25823.601, Max-Change: 0.04218
Iteration: 38, Log-Lik: -25821.779, Max-Change: 0.03915
Iteration: 39, Log-Lik: -25820.334, Max-Change: 0.02804
Iteration: 40, Log-Lik: -25819.441, Max-Change: 0.02317
Iteration: 41, Log-Lik: -25817.937, Max-Change: 0.04196
Iteration: 42, Log-Lik: -25816.563, Max-Change: 0.03385
Iteration: 43, Log-Lik: -25815.420, Max-Change: 0.04009
Iteration: 44, Log-Lik: -25814.028, Max-Change: 0.03612
Iteration: 45, Log-Lik: -25812.805, Max-Change: 0.02760
Iteration: 46, Log-Lik: -25812.140, Max-Change: 0.01459
Iteration: 47, Log-Lik: -25811.026, Max-Change: 0.01097
Iteration: 48, Log-Lik: -25809.999, Max-Change: 0.01689
Iteration: 49, Log-Lik: -25808.399, Max-Change: 0.03655
Iteration: 50, Log-Lik: -25807.374, Max-Change: 0.02833
Iteration: 51, Log-Lik: -25806.387, Max-Change: 0.02572
Iteration: 52, Log-Lik: -25805.717, Max-Change: 0.01041
Iteration: 53, Log-Lik: -25804.948, Max-Change: 0.01564
Iteration: 54, Log-Lik: -25804.225, Max-Change: 0.01086
Iteration: 55, Log-Lik: -25803.222, Max-Change: 0.01647
Iteration: 56, Log-Lik: -25802.590, Max-Change: 0.01144
Iteration: 57, Log-Lik: -25801.999, Max-Change: 0.01345
Iteration: 58, Log-Lik: -25801.052, Max-Change: 0.00979
Iteration: 59, Log-Lik: -25800.546, Max-Change: 0.01146
Iteration: 60, Log-Lik: -25800.062, Max-Change: 0.01115
Iteration: 61, Log-Lik: -25798.383, Max-Change: 0.01193
Iteration: 62, Log-Lik: -25797.987, Max-Change: 0.01233
Iteration: 63, Log-Lik: -25797.626, Max-Change: 0.00969
Iteration: 64, Log-Lik: -25797.119, Max-Change: 0.01318
Iteration: 65, Log-Lik: -25796.804, Max-Change: 0.01005
Iteration: 66, Log-Lik: -25796.502, Max-Change: 0.01129
Iteration: 67, Log-Lik: -25795.944, Max-Change: 0.01426
Iteration: 68, Log-Lik: -25795.676, Max-Change: 0.01388
Iteration: 69, Log-Lik: -25795.421, Max-Change: 0.01196
Iteration: 70, Log-Lik: -25795.115, Max-Change: 0.01416
Iteration: 71, Log-Lik: -25794.882, Max-Change: 0.01194
Iteration: 72, Log-Lik: -25794.658, Max-Change: 0.01376
Iteration: 73, Log-Lik: -25794.372, Max-Change: 0.01150
Iteration: 74, Log-Lik: -25794.153, Max-Change: 0.00895
Iteration: 75, Log-Lik: -25793.953, Max-Change: 0.00488
Iteration: 76, Log-Lik: -25793.702, Max-Change: 0.01502
Iteration: 77, Log-Lik: -25793.514, Max-Change: 0.00770
Iteration: 78, Log-Lik: -25793.332, Max-Change: 0.00480
Iteration: 79, Log-Lik: -25793.117, Max-Change: 0.01586
Iteration: 80, Log-Lik: -25792.944, Max-Change: 0.00651
Iteration: 81, Log-Lik: -25792.776, Max-Change: 0.00472
Iteration: 82, Log-Lik: -25792.554, Max-Change: 0.01590
Iteration: 83, Log-Lik: -25792.395, Max-Change: 0.00563
Iteration: 84, Log-Lik: -25792.239, Max-Change: 0.00736
Iteration: 85, Log-Lik: -25791.995, Max-Change: 0.00570
Iteration: 86, Log-Lik: -25791.850, Max-Change: 0.00476
Iteration: 87, Log-Lik: -25791.706, Max-Change: 0.01137
Iteration: 88, Log-Lik: -25791.502, Max-Change: 0.00683
Iteration: 89, Log-Lik: -25791.366, Max-Change: 0.01249
Iteration: 90, Log-Lik: -25791.230, Max-Change: 0.00624
Iteration: 91, Log-Lik: -25791.113, Max-Change: 0.01000
Iteration: 92, Log-Lik: -25790.979, Max-Change: 0.00395
Iteration: 93, Log-Lik: -25790.852, Max-Change: 0.00435
Iteration: 94, Log-Lik: -25790.096, Max-Change: 0.02310
Iteration: 95, Log-Lik: -25789.970, Max-Change: 0.00670
Iteration: 96, Log-Lik: -25789.849, Max-Change: 0.00407
Iteration: 97, Log-Lik: -25789.692, Max-Change: 0.00413
Iteration: 98, Log-Lik: -25789.575, Max-Change: 0.00416
Iteration: 99, Log-Lik: -25789.459, Max-Change: 0.01524
Iteration: 100, Log-Lik: -25789.307, Max-Change: 0.00429
Iteration: 101, Log-Lik: -25789.192, Max-Change: 0.00398
Iteration: 102, Log-Lik: -25789.078, Max-Change: 0.00403
Iteration: 103, Log-Lik: -25788.405, Max-Change: 0.01520
Iteration: 104, Log-Lik: -25788.290, Max-Change: 0.00404
Iteration: 105, Log-Lik: -25788.180, Max-Change: 0.00393
Iteration: 106, Log-Lik: -25787.531, Max-Change: 0.02005
Iteration: 107, Log-Lik: -25787.419, Max-Change: 0.00956
Iteration: 108, Log-Lik: -25787.308, Max-Change: 0.00382
Iteration: 109, Log-Lik: -25787.235, Max-Change: 0.00387
Iteration: 110, Log-Lik: -25787.128, Max-Change: 0.00391
Iteration: 111, Log-Lik: -25787.023, Max-Change: 0.00392
Iteration: 112, Log-Lik: -25786.395, Max-Change: 0.01848
Iteration: 113, Log-Lik: -25786.289, Max-Change: 0.00701
Iteration: 114, Log-Lik: -25786.180, Max-Change: 0.01192
Iteration: 115, Log-Lik: -25786.091, Max-Change: 0.00341
Iteration: 116, Log-Lik: -25785.989, Max-Change: 0.00373
Iteration: 117, Log-Lik: -25785.888, Max-Change: 0.00376
Iteration: 118, Log-Lik: -25785.289, Max-Change: 0.01167
Iteration: 119, Log-Lik: -25785.189, Max-Change: 0.00391
Iteration: 120, Log-Lik: -25785.092, Max-Change: 0.00365
Iteration: 121, Log-Lik: -25784.526, Max-Change: 0.01349
Iteration: 122, Log-Lik: -25784.436, Max-Change: 0.00392
Iteration: 123, Log-Lik: -25784.345, Max-Change: 0.00361
Iteration: 124, Log-Lik: -25783.821, Max-Change: 0.01261
Iteration: 125, Log-Lik: -25783.735, Max-Change: 0.00348
Iteration: 126, Log-Lik: -25783.653, Max-Change: 0.00332
Iteration: 127, Log-Lik: -25783.181, Max-Change: 0.00316
Iteration: 128, Log-Lik: -25783.108, Max-Change: 0.00323
Iteration: 129, Log-Lik: -25783.036, Max-Change: 0.00321
Iteration: 130, Log-Lik: -25782.621, Max-Change: 0.00302
Iteration: 131, Log-Lik: -25782.558, Max-Change: 0.00299
Iteration: 132, Log-Lik: -25782.496, Max-Change: 0.00296
Iteration: 133, Log-Lik: -25782.141, Max-Change: 0.00278
Iteration: 134, Log-Lik: -25782.087, Max-Change: 0.00276
Iteration: 135, Log-Lik: -25782.035, Max-Change: 0.00273
Iteration: 136, Log-Lik: -25781.738, Max-Change: 0.00247
Iteration: 137, Log-Lik: -25781.696, Max-Change: 0.00243
Iteration: 138, Log-Lik: -25781.655, Max-Change: 0.00241
Iteration: 139, Log-Lik: -25781.423, Max-Change: 0.00215
Iteration: 140, Log-Lik: -25781.389, Max-Change: 0.00215
Iteration: 141, Log-Lik: -25781.356, Max-Change: 0.00212
Iteration: 142, Log-Lik: -25781.169, Max-Change: 0.00194
Iteration: 143, Log-Lik: -25781.141, Max-Change: 0.00191
Iteration: 144, Log-Lik: -25781.115, Max-Change: 0.00189
Iteration: 145, Log-Lik: -25780.967, Max-Change: 0.00171
Iteration: 146, Log-Lik: -25780.946, Max-Change: 0.00169
Iteration: 147, Log-Lik: -25780.925, Max-Change: 0.00167
Iteration: 148, Log-Lik: -25780.811, Max-Change: 0.00218
Iteration: 149, Log-Lik: -25780.792, Max-Change: 0.00216
Iteration: 150, Log-Lik: -25780.774, Max-Change: 0.00213
Iteration: 151, Log-Lik: -25780.676, Max-Change: 0.00135
Iteration: 152, Log-Lik: -25780.665, Max-Change: 0.00132
Iteration: 153, Log-Lik: -25780.653, Max-Change: 0.00131
Iteration: 154, Log-Lik: -25780.590, Max-Change: 0.00174
Iteration: 155, Log-Lik: -25780.580, Max-Change: 0.00172
Iteration: 156, Log-Lik: -25780.570, Max-Change: 0.00169
Iteration: 157, Log-Lik: -25780.517, Max-Change: 0.00144
Iteration: 158, Log-Lik: -25780.510, Max-Change: 0.00143
Iteration: 159, Log-Lik: -25780.503, Max-Change: 0.00140
Iteration: 160, Log-Lik: -25780.466, Max-Change: 0.00123
Iteration: 161, Log-Lik: -25780.461, Max-Change: 0.00119
Iteration: 162, Log-Lik: -25780.456, Max-Change: 0.00101
Iteration: 163, Log-Lik: -25780.432, Max-Change: 0.00085
Iteration: 164, Log-Lik: -25780.428, Max-Change: 0.00077
Iteration: 165, Log-Lik: -25780.425, Max-Change: 0.00068
Iteration: 166, Log-Lik: -25780.416, Max-Change: 0.00079
Iteration: 167, Log-Lik: -25780.413, Max-Change: 0.00070
Iteration: 168, Log-Lik: -25780.410, Max-Change: 0.00068
Iteration: 169, Log-Lik: -25780.397, Max-Change: 0.00061
Iteration: 170, Log-Lik: -25780.395, Max-Change: 0.00061
Iteration: 171, Log-Lik: -25780.394, Max-Change: 0.00060
Iteration: 172, Log-Lik: -25780.384, Max-Change: 0.00056
Iteration: 173, Log-Lik: -25780.383, Max-Change: 0.00055
Iteration: 174, Log-Lik: -25780.382, Max-Change: 0.00054
Iteration: 175, Log-Lik: -25780.375, Max-Change: 0.00049
Iteration: 176, Log-Lik: -25780.374, Max-Change: 0.00048
Iteration: 177, Log-Lik: -25780.373, Max-Change: 0.00047
Iteration: 178, Log-Lik: -25780.368, Max-Change: 0.00029
Iteration: 179, Log-Lik: -25780.368, Max-Change: 0.00015
Iteration: 180, Log-Lik: -25780.368, Max-Change: 0.00012
Iteration: 181, Log-Lik: -25780.366, Max-Change: 0.00075
Iteration: 182, Log-Lik: -25780.365, Max-Change: 0.00024
Iteration: 183, Log-Lik: -25780.365, Max-Change: 0.00013
Iteration: 184, Log-Lik: -25780.364, Max-Change: 0.00012
Iteration: 185, Log-Lik: -25780.364, Max-Change: 0.00012
Iteration: 186, Log-Lik: -25780.364, Max-Change: 0.00012
Iteration: 187, Log-Lik: -25780.363, Max-Change: 0.00011
Iteration: 188, Log-Lik: -25780.363, Max-Change: 0.00011
Iteration: 189, Log-Lik: -25780.362, Max-Change: 0.00011
Iteration: 190, Log-Lik: -25780.361, Max-Change: 0.00051
Iteration: 191, Log-Lik: -25780.361, Max-Change: 0.00010
summary(m4)
## F1 h2
## morphology761 0.930 0.866
## morphology762 0.899 0.808
## morphology763 0.905 0.819
## morphology764 0.923 0.852
## morphology765 0.937 0.879
## morphology766 0.881 0.775
## morphology767 0.905 0.818
## morphology768 0.917 0.841
## morphology769 0.915 0.838
## morphology770 0.938 0.879
## morphology771 0.952 0.906
## morphology772 0.927 0.859
## syntax773 0.958 0.918
## syntax774 0.938 0.879
## syntax775 0.948 0.898
## syntax776 0.952 0.907
## syntax777 0.953 0.908
## syntax778 0.942 0.887
## syntax779 0.898 0.806
## syntax780 0.953 0.908
## syntax781 0.932 0.870
## syntax782 0.934 0.872
## syntax783 0.956 0.915
## syntax784 0.949 0.900
## syntax785 0.969 0.939
## syntax786 0.967 0.935
## syntax787 0.961 0.923
## syntax788 0.940 0.884
## syntax789 0.937 0.878
## syntax790 0.969 0.939
## syntax791 0.973 0.948
## syntax792 0.947 0.897
## syntax793 0.966 0.934
## syntax794 0.928 0.862
## syntax795 0.913 0.833
## syntax796 0.967 0.935
## syntax797 0.953 0.908
##
## SS loadings: 32.62
## Proportion Var: 0.882
##
## Factor correlations:
##
## F1
## F1 1
M2(m4)
itemfit(m4)
misfit <- itemfit(m4) %>% # Get labels of mis-fitting items.
filter(p.S_X2 <= .10) %>%
dplyr::select(item) %>%
as.vector()
items_good <- dplyr::select(Complex_short_grammatical, -all_of(misfit$item)) # Well fitting items
items_bad <- dplyr::select(Complex_short_grammatical, all_of(misfit$item)) # Poorly fitting items
mod_fit <- mirt(items_good, 1, "2PL", verbose=FALSE) # Calculate factor scores using only the well fitting items.
Theta <- fscores(mod_fit)
IG762<- itemGAM(items_bad$morphology762, Theta)
IG764<- itemGAM(items_bad$morphology764, Theta)
IG769<- itemGAM(items_bad$morphology769, Theta)
IG771<- itemGAM(items_bad$morphology771, Theta)
IG783<- itemGAM(items_bad$syntax783, Theta)
IG792<- itemGAM(items_bad$syntax792, Theta)
plot(IG762); plot(IG764); plot(IG769); plot(IG771)
plot(IG783); plot(IG792)
coefs_2pl <- coef(m4, as.data.frame = TRUE) %>%
t() %>%
as_tibble() %>%
dplyr::select(-c(149:150)) %>%
pivot_longer(everything()) %>%
tidyr::separate(, col=name, into=c("item", "parameter"), sep="([.])") %>%
pivot_wider(id_cols=item, names_from=parameter, values_from=value) %>%
dplyr::select(item, a1, d) %>%
mutate(
category = gsub('[[:digit:]]+', '', item)
)
ggplot(coefs_2pl,
aes(x = a1, y = d)) +
geom_point(alpha = .3) +
ggrepel::geom_text_repel(data = coefs_2pl,
aes(label = item), size = 3) +
xlab("Discrimination") +
ylab("Difficulty") + theme_minimal()
true_raw <- Complex_short_all %>%
mutate(
Raw = rowSums(.[,1:37]),
Theta = fscores(m4, method="MAP")
) %>%
ggplot(aes(x=Theta, y=Raw)) + geom_point() + stat_smooth(method="loess") + theme_minimal()
raw_score <- Complex_short_all %>%
mutate(
Raw = rowSums(.[,1:37])
) %>%
ggplot(aes(x=Raw)) + geom_histogram() + theme_minimal()
true_score <- Complex_short_all %>%
mutate(
Theta = fscores(m4, method="MAP")
) %>%
ggplot(aes(x=Theta)) + geom_histogram() + theme_minimal()
library(patchwork)
true_raw/(true_score + raw_score)
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Assumption Checking
m5 <- mirt(Complex_short_all, 2, itemtype="2PL", verbose=FALSE)
M2(m5)
anova(m4, m5)
##
## Model 1: mirt(data = Complex_short_all, model = 1, itemtype = "2PL")
## Model 2: mirt(data = Complex_short_all, model = 2, itemtype = "2PL", verbose = FALSE)
summary(m5, "oblimin", suppress=.20)
##
## Rotation: oblimin
##
## Rotated factor loadings:
##
## F1 F2 h2
## morphology761 -0.897 -0.228 0.891
## morphology762 -0.873 NA 0.820
## morphology763 -0.856 -0.335 0.892
## morphology764 -0.902 NA 0.855
## morphology765 -0.925 NA 0.869
## morphology766 -0.862 NA 0.774
## morphology767 -0.878 NA 0.827
## morphology768 -0.902 NA 0.840
## morphology769 -0.875 -0.293 0.894
## morphology770 -0.922 NA 0.889
## morphology771 -0.937 NA 0.922
## morphology772 -0.911 NA 0.869
## syntax773 -0.950 NA 0.911
## syntax774 -0.920 NA 0.888
## syntax775 -0.939 NA 0.886
## syntax776 -0.948 NA 0.896
## syntax777 -0.946 NA 0.897
## syntax778 -0.936 NA 0.874
## syntax779 -0.891 NA 0.788
## syntax780 -0.938 NA 0.911
## syntax781 -0.932 NA 0.864
## syntax782 -0.924 NA 0.860
## syntax783 -0.955 NA 0.906
## syntax784 -0.947 NA 0.891
## syntax785 -0.966 NA 0.930
## syntax786 -0.969 NA 0.934
## syntax787 -0.964 NA 0.936
## syntax788 -0.943 0.219 0.902
## syntax789 -0.936 NA 0.873
## syntax790 -0.971 NA 0.937
## syntax791 -0.973 NA 0.942
## syntax792 -0.948 NA 0.892
## syntax793 -0.968 NA 0.931
## syntax794 -0.926 NA 0.856
## syntax795 -0.911 NA 0.830
## syntax796 -0.968 NA 0.932
## syntax797 -0.946 0.217 0.907
##
## Rotated SS loadings: 31.94 0.729
##
## Factor correlations:
##
## F1 F2
## F1 1.000 0.083
## F2 0.083 1.000
Complex_short_all %>%
mutate(
Raw = rowSums(.[,1:37]),
Theta1 = fscores(m5, method="MAP")[,1],
Theta2 = fscores(m5, method="MAP")[,2]
) %>%
dplyr::select(Raw, Theta1, Theta2) %>%
ggpairs()
## Warning: The following factor score estimates failed to converge successfully:
## 1222,1977
## Warning: The following factor score estimates failed to converge successfully:
## 1222,1977
model.1 <- mirt.model('
F1 = 1 - 12
F2 = 13 - 37
COV=F1*F2')
m6 <- mirt(Complex_short_all, model.1, "2PL", verbose=FALSE)
M2(m6)
summary(m6)
## F1 F2 h2
## morphology761 0.933 0.000 0.870
## morphology762 0.891 0.000 0.794
## morphology763 0.901 0.000 0.813
## morphology764 0.913 0.000 0.834
## morphology765 0.919 0.000 0.844
## morphology766 0.863 0.000 0.744
## morphology767 0.892 0.000 0.796
## morphology768 0.905 0.000 0.819
## morphology769 0.912 0.000 0.833
## morphology770 0.935 0.000 0.875
## morphology771 0.958 0.000 0.918
## morphology772 0.922 0.000 0.851
## syntax773 0.000 0.939 0.882
## syntax774 0.000 0.908 0.824
## syntax775 0.000 0.925 0.856
## syntax776 0.000 0.931 0.867
## syntax777 0.000 0.930 0.866
## syntax778 0.000 0.916 0.840
## syntax779 0.000 0.863 0.745
## syntax780 0.000 0.931 0.866
## syntax781 0.000 0.911 0.829
## syntax782 0.000 0.907 0.823
## syntax783 0.000 0.937 0.878
## syntax784 0.000 0.926 0.857
## syntax785 0.000 0.951 0.905
## syntax786 0.000 0.953 0.908
## syntax787 0.000 0.947 0.897
## syntax788 0.000 0.922 0.850
## syntax789 0.000 0.912 0.832
## syntax790 0.000 0.957 0.915
## syntax791 0.000 0.962 0.925
## syntax792 0.000 0.927 0.860
## syntax793 0.000 0.952 0.906
## syntax794 0.000 0.904 0.817
## syntax795 0.000 0.883 0.781
## syntax796 0.000 0.950 0.903
## syntax797 0.000 0.932 0.869
##
## SS loadings: 9.99 21.5
## Proportion Var: 0.27 0.581
##
## Factor correlations:
##
## F1 F2
## F1 1.000 0.959
## F2 0.959 1.000
anova(m6, m4)
##
## Model 1: mirt(data = Complex_short_all, model = 1, itemtype = "2PL")
## Model 2: mirt(data = Complex_short_all, model = model.1, itemtype = "2PL",
## verbose = FALSE)