Loading data
setwd('~')
setwd('rfolder/MFGK2')
mfgkdata <- read.csv(file="data/mfgkdata.csv")
engnats <- subset(mfgkdata, mfgkdata$engnat == 1)
Visualizing the distribution of time spent on the test by person. Individuals who spend less than 1 second on a question were removed.
e_test = engnats %>% dplyr::select(contains("Q"))
e_test = e_test %>% dplyr::select(contains("E"))
########flag under 1000 ms
e_test <- e_test %>%
mutate(across(1:32, ~ replace(.x, .x > -1 & .x < 1000, NA)))
#############create graph
e2 <- engnats
for(i in 1:32) {
e2[, i*4-1] <- e_test[, i]
}
e2o <- na.omit(e2)
e2o$testelapse[e2o$testelapse > 3000] <- NA
e2o$`Time spent (seconds)` = e2o$testelapse
GG_denhist(e2o, "Time spent (seconds)", bins=50)
Warning: Removed 183 rows containing non-finite outside the scale range (`stat_bin()`).Warning: Removed 183 rows containing non-finite outside the scale range (`stat_density()`).
ggsave(filename="timespent.jpg", device ="jpeg", path="plots", width=9, height=5, dpi=320)
Maximum, mean, and minimum of time spent on the test.
max(e2o$testelapse, na.rm=T)
[1] 2989
mean(e2o$testelapse, na.rm=T)
[1] 644.3871
min(e2o$testelapse, na.rm=T)
[1] 155
Giving columns names and converting the dataset’s answer format to one that can be used.
#########calculating sumscores
for(i in 1:352) {
e2[, 137 + i] <- NA
}
e2 <- e2 %>%
rename("Q1: Emily Dickinson" = "V138",
"Q1: Robert Frost" = "V139",
"Q1: Sylvia Path" = "V140",
"Q1: Maya Angelou" = "V141",
"Q1: Langston Hughes" = "V142",
"Q2: Cats" = "V143",
"Q2: The Lion King" = "V144",
"Q2: Hamilton" = "V145",
"Q2: Wicked" = "V146",
"Q2: Kinky Boots" = "V147",
"Q3: Kwanzaa" = "V148",
"Q3: Christmas" = "V149",
"Q3: Ramadan" = "V150",
"Q3: Yom Kippur" = "V151",
"Q3: Hanukkah" = "V152",
"Q4: CoverGirl" = "V153",
"Q4: Sephora" = "V154",
"Q4: Maybelline" = "V155",
"Q4: Dior" = "V156",
"Q4: Shiseido" = "V157",
"Q5: Oxycodone" = "V158",
"Q5: Ibuprofen" = "V159",
"Q5: Codeine" = "V160",
"Q5: Morphine" = "V161",
"Q5: Asprin" = "V162",
"Q6: AIDS" = "V163",
"Q6: Herpes" = "V164",
"Q6: Chlamydia" = "V165",
"Q6: Human Papillomavirus" = "V166",
"Q6: Trichomoniasis" = "V167",
"Q7: Camel" = "V168",
"Q7: Marlboro" = "V169",
"Q7: Newport" = "V170",
"Q7: Pall Max Box" = "V171",
"Q7: Pyramid" = "V172",
"Q8: weed" = "V173",
"Q8: 420" = "V174",
"Q8: ganja" = "V175",
"Q8: chronic" = "V176",
"Q8: reefer" = "V177",
"Q9: Senegal" = "V178",
"Q9: Ivory Coast" = "V179",
"Q9: Quebec" = "V180",
"Q9: Morocco" = "V181",
"Q9: Vietnam" = "V182",
"Q10: United Kingdom" = "V183",
"Q10: Japan3" = "V184",
"Q10: Sweden" = "V185",
"Q10: Thailand" = "V186",
"Q10: Saudi Arabia" = "V187",
"Q11: Saudi Arabia2" = "V188",
"Q11: Venezuela" = "V189",
"Q11: Nigeria" = "V190",
"Q11: Norway" = "V191",
"Q11: Qatar" = "V192",
"Q12: Russia" = "V193",
"Q12: France" = "V194",
"Q12: Israel" = "V195",
"Q12: China" = "V196",
"Q12: Pakistan" = "V197",
"Q13: mp4" = "V198",
"Q13: mkv" = "V199",
"Q13: avi" = "V200",
"Q13: wmv" = "V201",
"Q13: mov" = "V202",
"Q14: Internet Explorer" = "V203",
"Q14: Firefox" = "V204",
"Q14: Safari" = "V205",
"Q14: Opera" = "V206",
"Q14: Chrome" = "V207",
"Q15: Ubuntu" = "V208",
"Q15: Debian" = "V209",
"Q15: Fedora" = "V210",
"Q15: RHEL" = "V211",
"Q15: Slackware" = "V212",
"Q16: 100 Continue" = "V213",
"Q16: 500 Internal Server Error" = "V214",
"Q16: 301 Moved Permanently" = "V215",
"Q16: 404 Not Found" = "V216",
"Q16: 502 Bad Gateway" = "V217",
"Q17: Shirt" = "V218",
"Q17: Tunic" = "V219",
"Q17: Sarong" = "V220",
"Q17: Shawl" = "V221",
"Q17: Camisole" = "V222",
"Q18: Saw" = "V223",
"Q18: Chisel" = "V224",
"Q18: Bevel" = "V225",
"Q18: Caliper" = "V226",
"Q18: Awl" = "V227",
"Q19: Merlot" = "V228",
"Q19: Cabernet sauvignon" = "V229",
"Q19: Malbec" = "V230",
"Q19: Sangiovese" = "V231",
"Q19: Pinot Noir" = "V232",
"Q20: Rummy" = "V233",
"Q20: Hearts" = "V234",
"Q20: Poker" = "V235",
"Q20: Bridge" = "V236",
"Q20: Cribbidge" = "V237",
"Q21: Resistor" = "V238",
"Q21: Inductor" = "V239",
"Q21: Capacitor" = "V240",
"Q21: Transistor" = "V241",
"Q21: Diode" = "V242",
"Q22: Bitcoin" = "V243",
"Q22: Litecoin" = "V244",
"Q22: Etherium" = "V245",
"Q22: Monero" = "V246",
"Q22: Ripple" = "V247",
"Q23: Mexico" = "V248",
"Q23: Egypt" = "V249",
"Q23: India" = "V250",
"Q23: Sudan" = "V251",
"Q23: Indonesia" = "V252",
"Q24: Al Capone" = "V253",
"Q24: Ted Kaczynski" = "V254",
"Q24: Pablo Escobar" = "V255",
"Q24: Timothy McVeigh" = "V256",
"Q24: Jim Jones" = "V257",
"Q25: Infinite Jest" = "V258",
"Q25: Les Miserables" = "V259",
"Q25: Atlas Shrugged" = "V260",
"Q25: War and Peace" = "V261",
"Q25: Cryptonomicon" = "V262",
"Q26: Mile" = "V263",
"Q26: Meter" = "V264",
"Q26: Furlong" = "V265",
"Q26: Parsec" = "V266",
"Q26: Angstrom" = "V267",
"Q27: CrossFit" = "V268",
"Q27: Zumba" = "V269",
"Q27: Barre" = "V270",
"Q27: Pilates" = "V271",
"Q27: Tabata" = "V272",
"Q28: LOL" = "V273",
"Q28: ROFL" = "V274",
"Q28: BRB" = "V275",
"Q28: GG" = "V276",
"Q28: DM" = "V277",
"Q29: ornate" = "V278",
"Q29: adorned" = "V279",
"Q29: cushy" = "V280",
"Q29: resplendent" = "V281",
"Q29: spiffy" = "V282",
"Q30: HDMI" = "V283",
"Q30: USB" = "V284",
"Q30: Ethernet" = "V285",
"Q30: SATA" = "V286",
"Q30: FireWire" = "V287",
"Q31: Leukemia" = "V288",
"Q31: Lymphoma" = "V289",
"Q31: Melanoma" = "V290",
"Q31: Mesothelioma" = "V291",
"Q31: Sarcoma" = "V292",
"Q32: Calico" = "V293",
"Q32: Paisley" = "V294",
"Q32: Pinstripe" = "V295",
"Q32: Plaid" = "V296",
"Q32: Tartan" = "V297",
"Q1: Elizabeth Cady Stanton" = "V298",
"Q1: Abigail Adams" = "V299",
"Q1: Marcel Cordoba" = "V300",
"Q1: Sun Tzu" = "V301",
"Q1: Trent Moseson" = "V302",
"Q2: Casablanca" = "V303",
"Q2: The Tin Man" = "V304",
"Q2: Blue Swede Shoes" = "V305",
"Q2: Common Projects" = "V306",
"Q2: Amandine" = "V307",
"Q3: Mirch Masala" = "V308",
"Q3: Reconciliation" = "V309",
"Q3: Amadar" = "V310",
"Q3: Durest" = "V311",
"Q3: Viveza" = "V312",
"Q4: ThriftyGal" = "V313",
"Q4: Allenda" = "V314",
"Q4: Reis" = "V315",
"Q4: NewBeautyTruth" = "V316",
"Q4: Aejeong" = "V317",
"Q5: Modafinil" = "V318",
"Q5: Creatine" = "V319",
"Q5: Alemtuzumab" = "V320",
"Q5: Semtex" = "V321",
"Q5: Carboplatin" = "V322",
"Q6: Botulism" = "V323",
"Q6: Shingles" = "V324",
"Q6: Pneumonia" = "V325",
"Q6: Tuberculosis" = "V326",
"Q6: Pertusis" = "V327",
"Q7: Seagrams" = "V328",
"Q7: Black Velvet" = "V329",
"Q7: Windsor" = "V330",
"Q7: Black Turkey" = "V331",
"Q7: Solo" = "V332",
"Q8: smack" = "V333",
"Q8: tilt" = "V334",
"Q8: DnB" = "V335",
"Q8: Jose Garcia" = "V336",
"Q8: Heavenly Green" = "V337",
"Q9: India 2" = "V338",
"Q9: Florida" = "V339",
"Q9: Brazil" = "V340",
"Q9: South Africa" = "V341",
"Q9: Egypt 2" = "V342",
"Q10: France 2" = "V343",
"Q10: Germany" = "V344",
"Q10: Russia 2" = "V345",
"Q10: China 2" = "V346",
"Q10: Brazil 2" = "V347",
"Q11: Zimbabwe" = "V348",
"Q11: Sweden2" = "V349",
"Q11: Singapore" = "V350",
"Q11: Panama" = "V351",
"Q11: Japan" = "V352",
"Q12: Germany 2" = "V353",
"Q12: Saudi Arabia 3" = "V354",
"Q12: Nigeria2" = "V355",
"Q12: Mexico 2" = "V356",
"Q12: Spain" = "V357",
"Q13: csv" = "V358",
"Q13: xls" = "V359",
"Q13: flac" = "V360",
"Q13: msi" = "V361",
"Q13: mp3" = "V362",
"Q14: Slate" = "V363",
"Q14: Expedition" = "V364",
"Q14: Pipes" = "V365",
"Q14: Adele" = "V366",
"Q14: Telegram" = "V367",
"Q15: IIS" = "V368",
"Q15: Kodiak" = "V369",
"Q15: Technitium" = "V370",
"Q15: Oracle" = "V371",
"Q15: Go" = "V372",
"Q16: 500 Deleted" = "V373",
"Q16: 600 Encrypted" = "V374",
"Q16: 303 Payment Processing" = "V375",
"Q16: 209 Download Complete" = "V376",
"Q16: 101 Use Proxy" = "V377",
"Q17: Jayanti" = "V378",
"Q17: Wristlings" = "V379",
"Q17: Cornik" = "V380",
"Q17: Cheapnik" = "V381",
"Q17: Frutiger" = "V382",
"Q18: Skree" = "V383",
"Q18: Wry" = "V384",
"Q18: Whisket" = "V385",
"Q18: Skane" = "V386",
"Q18: Brutch" = "V387",
"Q19: Chardonnay" = "V388",
"Q19: Semillon" = "V389",
"Q19: Moscato" = "V390",
"Q19: Gewuumlarztraminer" = "V391",
"Q19: Riesling" = "V392",
"Q20: Yatzhe" = "V393",
"Q20: Croquet" = "V394",
"Q20: Bocce" = "V395",
"Q20: Black 2s" = "V396",
"Q20: Manhattan" = "V397",
"Q21: Signer" = "V398",
"Q21: Subductor" = "V399",
"Q21: Annulus" = "V400",
"Q21: Boson" = "V401",
"Q21: Zenoid" = "V402",
"Q22: AlphaBay" = "V403",
"Q22: DCA" = "V404",
"Q22: PayPal" = "V405",
"Q22: Liberty Ledger" = "V406",
"Q22: Dwork" = "V407",
"Q23: Greece" = "V408",
"Q23: Turkey" = "V409",
"Q23: Congo" = "V410",
"Q23: Mongolia" = "V411",
"Q23: Japan2" = "V412",
"Q24: Harvey Parnell" = "V413",
"Q24: Sid McMath" = "V414",
"Q24: John Goodman" = "V415",
"Q24: Buster Keaton" = "V416",
"Q24: Pavel Tikhonov" = "V417",
"Q25: Pride and Prejudice" = "V418",
"Q25: Harry Potter and the Prisoner of Azkaban" = "V419",
"Q25: Fahrenheit 451" = "V420",
"Q25: To Kill a Mockingbird" = "V421",
"Q25: Science, and its Antecedents" = "V422",
"Q26: Newton" = "V423",
"Q26: Pascal" = "V424",
"Q26: Pitch" = "V425",
"Q26: Hertz" = "V426",
"Q26: Annum" = "V427",
"Q27: Shiatsu" = "V428",
"Q27: Reflexology" = "V429",
"Q27: Gooba" = "V430",
"Q27: UltraMaxFit" = "V431",
"Q27: NTP" = "V432",
"Q28: QTY" = "V433",
"Q28: FUM" = "V434",
"Q28: AET" = "V435",
"Q28: TT" = "V436",
"Q28: MRLO" = "V437",
"Q29: effective" = "V438",
"Q29: virile" = "V439",
"Q29: esulent" = "V440",
"Q29: adscititious" = "V441",
"Q29: thalassic" = "V442",
"Q30: WiFi" = "V443",
"Q30: D-High" = "V444",
"Q30: 2Interlink" = "V445",
"Q30: RTC" = "V446",
"Q30: HDD" = "V447",
"Q31: Lymnoma" = "V448",
"Q31: Colerectia" = "V449",
"Q31: Vitisus" = "V450",
"Q31: Tradoma" = "V451",
"Q31: Cellenia" = "V452",
"Q32: Periwinkle" = "V453",
"Q32: Snapdragon" = "V454",
"Q32: Stilted" = "V455",
"Q32: Arvo" = "V456",
"Q32: Tahoma" = "V457"
)
for(i in 1:32) {
e2[, 132+1+i*5][grepl("A0", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 132+1+i*5][!grepl("A0", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 132+2+i*5][grepl("A1", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 132+2+i*5][!grepl("A1", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 132+3+i*5][grepl("A2", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 132+3+i*5][!grepl("A2", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 132+4+i*5][grepl("A3", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 132+4+i*5][!grepl("A3", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 132+5+i*5][grepl("A4", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 132+5+i*5][!grepl("A4", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+1+i*5][grepl("A5", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+1+i*5][!grepl("A5", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 292+2+i*5][grepl("A6", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+2+i*5][!grepl("A6", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 292+3+i*5][grepl("A7", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+3+i*5][!grepl("A7", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 292+4+i*5][grepl("A8", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+4+i*5][!grepl("A8", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 292+5+i*5][grepl("A9", e2[, i*4-2], fixed=TRUE)] <- 0
e2[, 292+5+i*5][!grepl("A9", e2[, i*4-2], fixed=TRUE)] <- 1
e2[, 457+i] <- e2[, 132+1+i*5] + e2[, 132+2+i*5] + e2[, 132+3+i*5] + e2[, 132+4+i*5] + e2[, 132+5+i*5] + e2[, 292+1+i*5] + e2[, 292+2+i*5] + e2[, 292+3+i*5] + e2[, 292+4+i*5] + e2[, 292+5+i*5]
}
Calculating the sex bias, pass rate, and g-loadings of all items.
#########calculating types of scores
engy <- e2
engy$gksum = rowSums(engy[, 138:457])
engy$gksumstand <- normalise(engy$gksum)
engyo <- na.omit(engy)
e2test3 <- engyo[, 458:489]
proanglo <- rep(0,320)
equalsex <- rep(0,320)
gloading <- rep(0,320)
passrate <- rep(0,320)
agecor <- rep(0,320)
engyo$sex <- engyo$gender
engyo$sex[engyo$sex==3] <- NA
engyo$sex[engyo$sex==0] <- NA
for(i in 1:320) {
t1 <- table(engyo$sex, engyo[, 137+i])
equalsex[i] <- (t1[1, 1]*t1[2, 2])/(t1[1, 2]*t1[2, 1])
passrate[i] <- mean(engyo[, 137+i], na.rm=T)
agecor[i] <- cor.test(engyo$age, engyo[, 137+i] %>% unlist())
}
ans2 <- mirt(engyo[, 138:297], model=1, itemtype='2PL')
dist2 <- mirt(engyo[, 298:457], model=1, itemtype='2PL')
ans3 <- mirt(engyo[, 138:297], model=1, itemtype='3PL')
dist3 <- mirt(engyo[, 298:457], model=1, itemtype='3PL')
ans4 <- mirt(engyo[, 138:297], model=1, itemtype='4PL')
dist4 <- mirt(engyo[, 298:457], model=1, itemtype='4PL')
gl2 <- data.frame(summary(ans2)[1], summary(dist2)[1])
gl3 <- data.frame(summary(ans3)[1], summary(dist3)[1])
gl4 <- data.frame(summary(ans4)[1], summary(dist4)[1])
gl2[161:320, 1] <- gl2[1:160, 2]
gl3[161:320, 1] <- gl3[1:160, 2]
gl4[161:320, 1] <- gl4[1:160, 2]
dafs <- data.frame(g2=gl2[, 1], equalsex)
dafs$anglobias = proanglo
dafs$g3 = gl3[, 1]
dafs$g4 = gl4[, 1]
dafs$ageb = agecor
dafs$pr = passrate
dafs$itemname = colnames(engyo[, 138:457])
colnames(dafs)
correlation_matrix(dafs %>% select(g2, g3, g4, pr, ageb, equalsex))
Correlation matrix of 2PL g-loadings (g2), 3PL g-loadings (g3), 4PL g-loadings (g4), pass rates (pr), age-associations (ageb), and sex bias in favour of women (equalsex)
correlation_matrix(dafs %>% select(g2, g3, g4, pr, ageb, equalsex))
g2 g3 g4 pr ageb equalsex
g2 "NA" "0.744 ***" "0.224 ***" "0.504 ***" "0.23 ***" "-0.13 *"
g3 "0.744 ***" "NA" "0.407 ***" "0.127 *" "0.179 **" "-0.291 ***"
g4 "0.224 ***" "0.407 ***" "NA" "-0.229 ***" "0.009 " "0.187 ***"
pr "0.504 ***" "0.127 *" "-0.229 ***" "NA" "0.058 " "0.04 "
ageb "0.23 ***" "0.179 **" "0.009 " "0.058 " "NA" "-0.125 *"
equalsex "-0.13 *" "-0.291 ***" "0.187 ***" "0.04 " "-0.125 *" "NA"
Linear regression models which test differences in general knowledge between the sexes using the method of correlated vectors.
ans_dafs <- dafs[1:160, ]
dist_dafs <- dafs[161:320, ]
lr1 <- lm(data=ans_dafs, equalsex ~ g2 + pr)
summary(lr1)
Call:
lm(formula = equalsex ~ g2 + pr, data = ans_dafs)
Residuals:
Min 1Q Median 3Q Max
-1.3112 -0.5863 -0.2340 0.0956 8.0045
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2023 0.3350 3.589 0.000444 ***
g2 -1.5011 0.6613 -2.270 0.024575 *
pr 0.8140 0.3926 2.073 0.039760 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.18 on 157 degrees of freedom
Multiple R-squared: 0.04314, Adjusted R-squared: 0.03095
F-statistic: 3.539 on 2 and 157 DF, p-value: 0.03138
lr2 <- lm(data=ans_dafs, equalsex ~ g3 + pr)
summary(lr2)
Call:
lm(formula = equalsex ~ g3 + pr, data = ans_dafs)
Residuals:
Min 1Q Median 3Q Max
-1.8199 -0.4227 -0.1817 0.0117 7.8682
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.9359 0.3674 5.269 0.000000447 ***
g3 -2.0257 0.4477 -4.525 0.000011848 ***
pr 0.3357 0.3553 0.945 0.346
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.128 on 157 degrees of freedom
Multiple R-squared: 0.1258, Adjusted R-squared: 0.1146
F-statistic: 11.29 on 2 and 157 DF, p-value: 0.00002617
lr3 <- lm(data=ans_dafs, equalsex ~ g4 + pr)
summary(lr3)
Call:
lm(formula = equalsex ~ g4 + pr, data = ans_dafs)
Residuals:
Min 1Q Median 3Q Max
-1.2380 -0.6157 -0.2345 0.2253 7.7566
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6370 0.5065 -1.258 0.21037
g4 1.5304 0.4756 3.218 0.00157 **
pr 0.8801 0.3812 2.309 0.02226 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.161 on 157 degrees of freedom
Multiple R-squared: 0.07287, Adjusted R-squared: 0.06106
F-statistic: 6.17 on 2 and 157 DF, p-value: 0.002633
lr4 <- lm(data=dist_dafs, equalsex ~ g2 + pr)
summary(lr4)
Call:
lm(formula = equalsex ~ g2 + pr, data = dist_dafs)
Residuals:
Min 1Q Median 3Q Max
-0.61987 -0.24667 -0.08964 0.15550 1.63323
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3424 0.3231 1.060 0.2909
g2 -0.3194 0.2858 -1.118 0.2654
pr 0.8291 0.4329 1.915 0.0573 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3856 on 157 degrees of freedom
Multiple R-squared: 0.02283, Adjusted R-squared: 0.01038
F-statistic: 1.834 on 2 and 157 DF, p-value: 0.1632
lr5 <- lm(data=dist_dafs, equalsex ~ g3 + pr)
summary(lr5)
Call:
lm(formula = equalsex ~ g3 + pr, data = dist_dafs)
Residuals:
Min 1Q Median 3Q Max
-0.62423 -0.24435 -0.09548 0.15651 1.62492
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4316 0.3187 1.354 0.178
g3 -0.1100 0.2429 -0.453 0.651
pr 0.6034 0.3749 1.609 0.110
Residual standard error: 0.3869 on 157 degrees of freedom
Multiple R-squared: 0.01634, Adjusted R-squared: 0.003807
F-statistic: 1.304 on 2 and 157 DF, p-value: 0.2744
lr6 <- lm(data=dist_dafs, equalsex ~ g4 + pr)
summary(lr6)
Call:
lm(formula = equalsex ~ g4 + pr, data = dist_dafs)
Residuals:
Min 1Q Median 3Q Max
-0.60084 -0.24501 -0.09191 0.16749 1.61428
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3704 0.3275 1.131 0.260
g4 0.1381 0.2414 0.572 0.568
pr 0.4877 0.3626 1.345 0.181
Residual standard error: 0.3867 on 157 degrees of freedom
Multiple R-squared: 0.0171, Adjusted R-squared: 0.004581
F-statistic: 1.366 on 2 and 157 DF, p-value: 0.2582
Preliminary code for calculating the reliability of each scoring method.
mirtanswerss <- mirt(e2o[, 138:297], model=1, itemtype='2PL')
Iteration: 1, Log-Lik: -940747.630, Max-Change: 3.52091
Iteration: 2, Log-Lik: -931961.614, Max-Change: 2.23146
Iteration: 3, Log-Lik: -930050.999, Max-Change: 0.71893
Iteration: 4, Log-Lik: -929647.239, Max-Change: 0.38081
Iteration: 5, Log-Lik: -929557.245, Max-Change: 0.13579
Iteration: 6, Log-Lik: -929533.105, Max-Change: 0.03667
Iteration: 7, Log-Lik: -929529.020, Max-Change: 0.00596
Iteration: 8, Log-Lik: -929527.898, Max-Change: 0.00365
Iteration: 9, Log-Lik: -929527.338, Max-Change: 0.00275
Iteration: 10, Log-Lik: -929526.830, Max-Change: 0.00234
Iteration: 11, Log-Lik: -929526.565, Max-Change: 0.00102
Iteration: 12, Log-Lik: -929526.427, Max-Change: 0.00165
Iteration: 13, Log-Lik: -929526.246, Max-Change: 0.00160
Iteration: 14, Log-Lik: -929526.105, Max-Change: 0.00115
Iteration: 15, Log-Lik: -929526.033, Max-Change: 0.00091
Iteration: 16, Log-Lik: -929525.950, Max-Change: 0.00104
Iteration: 17, Log-Lik: -929525.906, Max-Change: 0.00095
Iteration: 18, Log-Lik: -929525.861, Max-Change: 0.00056
Iteration: 19, Log-Lik: -929525.851, Max-Change: 0.00031
Iteration: 20, Log-Lik: -929525.836, Max-Change: 0.00030
Iteration: 21, Log-Lik: -929525.824, Max-Change: 0.00027
Iteration: 22, Log-Lik: -929525.768, Max-Change: 0.00107
Iteration: 23, Log-Lik: -929525.739, Max-Change: 0.00016
Iteration: 24, Log-Lik: -929525.734, Max-Change: 0.00017
Iteration: 25, Log-Lik: -929525.715, Max-Change: 0.00095
Iteration: 26, Log-Lik: -929525.699, Max-Change: 0.00015
Iteration: 27, Log-Lik: -929525.696, Max-Change: 0.00076
Iteration: 28, Log-Lik: -929525.681, Max-Change: 0.00081
Iteration: 29, Log-Lik: -929525.674, Max-Change: 0.00022
Iteration: 30, Log-Lik: -929525.669, Max-Change: 0.00013
Iteration: 31, Log-Lik: -929525.668, Max-Change: 0.00065
Iteration: 32, Log-Lik: -929525.661, Max-Change: 0.00064
Iteration: 33, Log-Lik: -929525.656, Max-Change: 0.00013
Iteration: 34, Log-Lik: -929525.655, Max-Change: 0.00013
Iteration: 35, Log-Lik: -929525.654, Max-Change: 0.00062
Iteration: 36, Log-Lik: -929525.649, Max-Change: 0.00061
Iteration: 37, Log-Lik: -929525.647, Max-Change: 0.00016
Iteration: 38, Log-Lik: -929525.644, Max-Change: 0.00012
Iteration: 39, Log-Lik: -929525.644, Max-Change: 0.00059
Iteration: 40, Log-Lik: -929525.640, Max-Change: 0.00058
Iteration: 41, Log-Lik: -929525.639, Max-Change: 0.00015
Iteration: 42, Log-Lik: -929525.637, Max-Change: 0.00011
Iteration: 43, Log-Lik: -929525.637, Max-Change: 0.00056
Iteration: 44, Log-Lik: -929525.635, Max-Change: 0.00055
Iteration: 45, Log-Lik: -929525.634, Max-Change: 0.00011
Iteration: 46, Log-Lik: -929525.633, Max-Change: 0.00011
Iteration: 47, Log-Lik: -929525.632, Max-Change: 0.00053
Iteration: 48, Log-Lik: -929525.631, Max-Change: 0.00010
Iteration: 49, Log-Lik: -929525.631, Max-Change: 0.00052
Iteration: 50, Log-Lik: -929525.631, Max-Change: 0.00012
Iteration: 51, Log-Lik: -929525.629, Max-Change: 0.00010
Iteration: 52, Log-Lik: -929525.629, Max-Change: 0.00050
Iteration: 53, Log-Lik: -929525.628, Max-Change: 0.00049
Iteration: 54, Log-Lik: -929525.628, Max-Change: 0.00010
Iteration: 55, Log-Lik: -929525.628, Max-Change: 0.00010
(unconverted) Reliability of each method:
#4PL split half
cor.test(e2o$even, e2o$odd)
Pearson's product-moment correlation
data: e2o$even and e2o$odd
t = 176.2, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.8278237 0.8380779
sample estimates:
cor
0.8330223
#2PL split half
cor.test(e2o$even2, e2o$odd2)
Pearson's product-moment correlation
data: e2o$even2 and e2o$odd2
t = 203.01, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.8621342 0.8704897
sample estimates:
cor
0.8663725
#3PL split half
cor.test(e2o$even3, e2o$odd3)
Pearson's product-moment correlation
data: e2o$even3 and e2o$odd3
t = 200.78, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.8596628 0.8681575
sample estimates:
cor
0.8639716
#spline split half
cor.test(e2o$evens, e2o$odds)
Pearson's product-moment correlation
data: e2o$evens and e2o$odds
t = 175.22, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.8263605 0.8366939
sample estimates:
cor
0.8315992
#principal component split half
cor.test(e2o$paeven, e2o$paodd)
Pearson's product-moment correlation
data: e2o$paeven and e2o$paodd
t = 151.3, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7846616 0.7972011
sample estimates:
cor
0.7910144
#factor split half
cor.test(e2o$faeven, e2o$faodd)
Pearson's product-moment correlation
data: e2o$faeven and e2o$faodd
t = 145.59, df = 13694, p-value < 0.00000000000000022
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7727648 0.7859140
sample estimates:
cor
0.7794252
Preliminary code for sex and mobile/desktop user differences in knowledge.
################ differences
e2o$sex <- e2o$gender
e2o$sex[e2o$sex==3] <- NA
e2o$sex[e2o$sex==0] <- NA
e2o$desktop = 1 #null hypothesis
phonenumber = c(320, 480, 320, 568, 375, 667, 414, 736, 414, 896, 375, 812, 414, 896, 390, 844, 428, 926, 320, 553, 360, 640, 360, 800, 390, 844, 414, 896, 412, 915, 393, 873, 360, 780)
for (i in 1:length(phonenumber)/2) {
e2o$desktop[e2o$screenw==phonenumber[i] & e2o$screenh==phonenumber[i+1]] = 0
}
saba <- subset(e2o, select=c(gksum, gkdsum4, gkdsum2, gkdsum3, gkdsums, gkdsumg, gkfa, gkpa))
cor(saba)
fas <- fa(saba, nfactors=1)
print(fas)
The differences:
cohen.d(data=e2o, gksum ~ sex)
Call: cohen.d(x = gksum ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gksum -0.47 -0.43 -0.4
Multivariate (Mahalanobis) distance between groups
[1] 0.43
r equivalent of difference between two means
gksum
-0.21
cohen.d(data=e2o, gkdsum4 ~ sex)
Call: cohen.d(x = gkdsum4 ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum4 -0.53 -0.49 -0.46
Multivariate (Mahalanobis) distance between groups
[1] 0.49
r equivalent of difference between two means
gkdsum4
-0.24
cohen.d(data=e2o, gkdsum2 ~ sex)
Call: cohen.d(x = gkdsum2 ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum2 -0.49 -0.45 -0.42
Multivariate (Mahalanobis) distance between groups
[1] 0.45
r equivalent of difference between two means
gkdsum2
-0.22
cohen.d(data=e2o, gkdsum3 ~ sex)
Call: cohen.d(x = gkdsum3 ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum3 -0.44 -0.41 -0.37
Multivariate (Mahalanobis) distance between groups
[1] 0.41
r equivalent of difference between two means
gkdsum3
-0.2
cohen.d(data=e2o, gkdsums ~ sex)
Call: cohen.d(x = gkdsums ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsums -0.48 -0.45 -0.41
Multivariate (Mahalanobis) distance between groups
[1] 0.45
r equivalent of difference between two means
gkdsums
-0.22
cohen.d(data=e2o, gkdsumg ~ sex)
Call: cohen.d(x = gkdsumg ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsumg -0.43 -0.4 -0.36
Multivariate (Mahalanobis) distance between groups
[1] 0.4
r equivalent of difference between two means
gkdsumg
-0.19
cohen.d(data=e2o, gkfa ~ sex)
Call: cohen.d(x = gkfa ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkfa -0.42 -0.38 -0.35
Multivariate (Mahalanobis) distance between groups
[1] 0.38
r equivalent of difference between two means
gkfa
-0.19
cohen.d(data=e2o, gkpa ~ sex)
Call: cohen.d(x = gkpa ~ sex, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkpa -0.42 -0.38 -0.35
Multivariate (Mahalanobis) distance between groups
[1] 0.38
r equivalent of difference between two means
gkpa
-0.19
cohen.d(data=e2o, gksum ~ desktop)
Call: cohen.d(x = gksum ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gksum 0.17 0.22 0.26
Multivariate (Mahalanobis) distance between groups
[1] 0.22
r equivalent of difference between two means
gksum
0.08
cohen.d(data=e2o, gkdsum4 ~ desktop)
Call: cohen.d(x = gkdsum4 ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum4 0.2 0.25 0.29
Multivariate (Mahalanobis) distance between groups
[1] 0.25
r equivalent of difference between two means
gkdsum4
0.09
cohen.d(data=e2o, gkdsum2 ~ desktop)
Call: cohen.d(x = gkdsum2 ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum2 0.2 0.24 0.29
Multivariate (Mahalanobis) distance between groups
[1] 0.24
r equivalent of difference between two means
gkdsum2
0.09
cohen.d(data=e2o, gkdsum3 ~ desktop)
Call: cohen.d(x = gkdsum3 ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsum3 0.17 0.22 0.26
Multivariate (Mahalanobis) distance between groups
[1] 0.22
r equivalent of difference between two means
gkdsum3
0.08
cohen.d(data=e2o, gkdsums ~ desktop)
Call: cohen.d(x = gkdsums ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsums 0.2 0.24 0.29
Multivariate (Mahalanobis) distance between groups
[1] 0.24
r equivalent of difference between two means
gkdsums
0.09
cohen.d(data=e2o, gkdsumg ~ desktop)
Call: cohen.d(x = gkdsumg ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkdsumg 0.17 0.22 0.26
Multivariate (Mahalanobis) distance between groups
[1] 0.22
r equivalent of difference between two means
gkdsumg
0.08
cohen.d(data=e2o, gkfa ~ desktop)
Call: cohen.d(x = gkfa ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkfa 0.17 0.21 0.26
Multivariate (Mahalanobis) distance between groups
[1] 0.21
r equivalent of difference between two means
gkfa
0.08
cohen.d(data=e2o, gkpa ~ desktop)
Call: cohen.d(x = gkpa ~ desktop, data = e2o)
Cohen d statistic of difference between two means
lower effect upper
gkpa 0.16 0.21 0.25
Multivariate (Mahalanobis) distance between groups
[1] 0.21
r equivalent of difference between two means
gkpa
0.08
Sex differences in variance hypothesis plot – sum scores.
GG_denhist(e2o, var='gksum', group='sex')
Warning: Grouping variable contained missing values. These were removed. If you want an NA group, convert to explicit value.
Statistical test – sum scores
describe2((e2o %>% filter(sex==2))$gksum)
describe2((e2o %>% filter(sex==1))$gksum)
2.055458280132737/1.95687101044259
[1] 1.05038
var.test(
(e2o %>% filter(sex == 2))$gksum,
(e2o %>% filter(sex == 1))$gksum
)
F test to compare two variances
data: (e2o %>% filter(sex == 2))$gksum and (e2o %>% filter(sex == 1))$gksum
F = 0.90637, num df = 6539, denom df = 6758, p-value = 0.000062
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.8638296 0.9510305
sample estimates:
ratio of variances
0.9063732
Sex differences in variance hypothesis plot – 2PL variant
GG_denhist(e2o, var='gkdsum2', group='sex')
Warning: Grouping variable contained missing values. These were removed. If you want an NA group, convert to explicit value.
Statistical test – 2PL variant
describe2((e2o %>% filter(sex==2))$gkdsum2)
describe2((e2o %>% filter(sex==1))$gkdsum2)
1.388418874823938/1.21994734532738
[1] 1.138097
var.test(
(e2o %>% filter(sex == 2))$gkdsum2,
(e2o %>% filter(sex == 1))$gkdsum2
)
F test to compare two variances
data: (e2o %>% filter(sex == 2))$gkdsum2 and (e2o %>% filter(sex == 1))$gkdsum2
F = 0.77204, num df = 6539, denom df = 6758, p-value < 0.00000000000000022
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.7358040 0.8100812
sample estimates:
ratio of variances
0.7720424
Averaged international differences
sd(e2o$gkdsumg)
[1] 1
e2o$gkdsumg = e2o$gkdsumg/sd(e2o$gkdsumg, na.rm=T)
sd(e2o$gkpa)
[1] 1
sd(e2o$gkfa)
[1] 1
sd(e2o$gkdsum2)
[1] 1
sd(e2o$gkdsum3)
[1] 1
e2o$gkdsum2 = e2o$gkdsum2/sd(e2o$gkdsum2)
e2o$gkdsum3 = e2o$gkdsum3/sd(e2o$gkdsum3)
sd(e2o$gkdsum2)
[1] 1
sd(e2o$gkdsums)
[1] 1
e2o$gkdsums = e2o$gkdsums/sd(e2o$gkdsums)
sd(e2o$gkdsums)
[1] 1
sd(e2o$gkdsum)
[1] NA
e2o$gkdsum4 = e2o$gkdsum4/sd(e2o$gkdsum4)
e2o$gksumstand <- normalise(e2o$gksum)
mean(abs(aggregate(e2o$gksumstand, list(e2o$country), mean)$x))
[1] 0.5345239
mean(abs(aggregate(e2o$gkdsum4, list(e2o$country), mean)$F1))
[1] 0.5048382
mean(abs(aggregate(e2o$gkdsum2, list(e2o$country), mean)$F1))
[1] 0.4964571
mean(abs(aggregate(e2o$gkdsum3, list(e2o$country), mean)$F1))
[1] 0.5114363
mean(abs(aggregate(e2o$gkdsums, list(e2o$country), mean)$F1))
[1] 0.5297136
mean(abs(aggregate(e2o$gkdsumg, list(e2o$country), mean)$F1))
[1] 0.5155056
mean(abs(aggregate(e2o$gkpa, list(e2o$country), mean)$x))
[1] 0.5363404
mean(abs(aggregate(e2o$gkfa, list(e2o$country), mean)$x))
[1] 0.5315517
Time taken and general knowledge score – LOESS method.
e2o$time = as.numeric(e2o$testelapse)
e2o$time[e2o$time > 3000] <- NA
GG_denhist(e2o, "time")
Warning: Removed 183 rows containing non-finite outside the scale range (`stat_bin()`).Warning: Removed 183 rows containing non-finite outside the scale range (`stat_density()`).
fit <- lm(data=e2o, as.formula(glue::glue("gksumstand ~ rcs(time, 7)")))
summary(fit)
Call:
lm(formula = as.formula(glue::glue("gksumstand ~ rcs(time, 7)")),
data = e2o)
Residuals:
Min 1Q Median 3Q Max
-4.5660 -0.6802 0.0351 0.7142 3.2108
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.6700482 0.2008108 -8.317 < 0.0000000000000002 ***
rcs(time, 7)time 0.0041418 0.0005559 7.451 0.0000000000000982 ***
rcs(time, 7)time' -0.1132780 0.0306649 -3.694 0.000222 ***
rcs(time, 7)time'' 0.3417900 0.1441801 2.371 0.017774 *
rcs(time, 7)time''' -0.3043025 0.2242068 -1.357 0.174728
rcs(time, 7)time'''' 0.0912097 0.1674021 0.545 0.585863
rcs(time, 7)time''''' -0.0093541 0.0661387 -0.141 0.887531
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9927 on 13506 degrees of freedom
(183 observations deleted due to missingness)
Multiple R-squared: 0.01165, Adjusted R-squared: 0.01121
F-statistic: 26.54 on 6 and 13506 DF, p-value: < 0.00000000000000022
kirkegaard::GG_scatter(e2o, 'time', 'gksumstand') + geom_smooth(color = "green") + theme(axis.title = element_text(size = 15))+
xlab("Time Taken") +
ylab("General Knowledge")
ggsave(filename="pen.jpg", device ="jpeg", path="plots", width=9, height=5, dpi=320)
Age and general knowledge score – spline method.
e2o$age[e2o$age>100] <- mean(e2o$age)
fit <- lm(data=e2o, as.formula(glue::glue("gksumstand ~ rcs(age, 5)")))
summary(fit)
Call:
lm(formula = as.formula(glue::glue("gksumstand ~ rcs(age, 5)")),
data = e2o)
Residuals:
Min 1Q Median 3Q Max
-4.3307 -0.5558 0.0306 0.5927 4.0064
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.695408 0.145315 -25.430 < 0.0000000000000002 ***
rcs(age, 5)age 0.179795 0.008502 21.146 < 0.0000000000000002 ***
rcs(age, 5)age' -1.303652 0.236192 -5.519 0.0000000346 ***
rcs(age, 5)age'' 1.624818 0.500332 3.247 0.00117 **
rcs(age, 5)age''' 0.034634 0.318324 0.109 0.91336
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8716 on 13691 degrees of freedom
Multiple R-squared: 0.2405, Adjusted R-squared: 0.2403
F-statistic: 1084 on 4 and 13691 DF, p-value: < 0.00000000000000022
uzi <- seq(from=12, to=100, by=0.01)
uzi2 <- data.frame(age=uzi)
uzi2$fit = predict(fit, uzi2)
p <- ggplot(uzi2) +
geom_point(mapping = aes(age, gksumstand, color = 'black'), data=e2o) +
geom_line(mapping = aes(age, fit), color = "blue", size=1) +
xlab("Age") +
ylab("General Knowledge") +
theme(axis.title = element_text(size = 18))
p
ggsave(filename="agetake.jpg", device ="jpeg", path="plots", width=9, height=5, dpi=320)
Age and general knowledge score – LOESS method.
kirkegaard::GG_scatter(e2o, 'age', 'gksumstand') + geom_smooth(color = "green") + theme(axis.title = element_text(size = 15))+
xlab("Age") +
ylab("General Knowledge")
ggsave(filename="pena.jpg", device ="jpeg", path="plots", width=9, height=5, dpi=320)
Calculating the norms for the test using all three methods.
##############################NORMING
e2o <- e2o %>%
rename("Q1" = "V458",
"Q2" = "V459",
"Q3" = "V460",
"Q4" = "V461",
"Q5" = "V462",
"Q6" = "V463",
"Q7" = "V464",
"Q8" = "V465",
"Q9" = "V466",
"Q10" = "V467",
"Q11" = "V468",
"Q12" = "V469",
"Q13" = "V470",
"Q14" = "V471",
"Q15" = "V472",
"Q16" = "V473",
"Q17" = "V474",
"Q18" = "V475",
"Q19" = "V476",
"Q20" = "V477",
"Q21" = "V478",
"Q22" = "V479",
"Q23" = "V480",
"Q24" = "V481",
"Q25" = "V482",
"Q26" = "V483",
"Q27" = "V484",
"Q28" = "V485",
"Q29" = "V486",
"Q30" = "V487",
"Q31" = "V488",
"Q32" = "V489"
)
e2o2 <- subset(e2o, e2o$engnat==1)
e2o2$sex <- e2o2$gender
e2o2$sex[e2o2$sex==3] <- NA
e2o2$sex[e2o2$sex==0] <- NA
max(e2o2$gksum)
e2o2$noabrev = e2o2$gksum - e2o2$Q28
for(i in 180:310) {
print(i)
print(qnorm(sum(e2o2$noabrev<i)/length(e2o2$noabrev))*15+100)
}
age <- c(0, 0, 0, 0)
sd <- c(0, 0, 0, 0)
agesd <- data.frame(age, sd)
for(i in 13:20) {
agen <- subset(e2o2, e2o2$age==i)
agesd[i-12, 1] <- i
agesd[i-12, 2] <- sd(agen$noabrev)
}
cor.test(agesd$age, agesd$sd)
fit <- lm(data=e2o2, as.formula(glue::glue("noabrev ~ rcs(age, 5)")))
summary(fit)
predict(fit, newdata=data.frame(age=c(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)))
fit2 <- lm(data=agesd, as.formula(glue::glue("sd ~ rcs(age, 3)")))
summary(fit2)
predict(fit2, newdata=data.frame(age=c(13, 14, 15, 16, 17, 18, 19, 20)))
agen <- subset(e2o2, e2o2$age<50 & e2o2$age>30)
sd(agen$noabrev)
mean(agen$noabrev)
agen <- subset(e2o2, e2o2$age<71 & e2o2$age>50)
sd(agen$noabrev)
mean(agen$noabrev)
mean(e2o2$sex, na.rm=T)
################################
First attempt at a latent model (not considered)
############
e2o2$IQF = e2o2$gksum
e2o2$Female = e2o2$sex-1
#agestrat(datas=e2o2, 129)
#mirtanswers22 <- mirt(e2o2[, 138:297], model=1, itemtype='2PL')
#mirtdistractors22 <- mirt(e2o2[, 298:457], model=1, itemtype='2PL')
#resans <- residuals(mirtanswers22)
#resdist <- residuals(mirtdistractors22)
#min(resans, na.rm=T)
latax <- "
#latents:
CK =~ Q1 + Q2 + Q3 + Q4 + Q7 + Q8 + Q17 + Q18 + Q19 + Q20 + Q24 + Q25 + Q27 + Q28 + Q29 + Q32
MK =~ Q5 + Q6 + Q31
TK =~ Q13 + Q14 + Q15 + Q16 + Q21 + Q22 + Q26 + Q30
IK =~ Q9 + Q10 + Q11 + Q12 + Q23
GK =~ CK + MK + TK + IK
"
latafitx <- sem(latax, data=e2o, estimator="DWLS")
Warning: lavaan->lav_options_est_dwls():
estimator “DWLS” is not recommended for continuous data. Did you forget to set the ordered=
argument?
summary(latafitx, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 49 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 68
Number of observations 13696
Model Test User Model:
Test statistic 33486.857
Degrees of freedom 460
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 273469.812
Degrees of freedom 496
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.879
Tucker-Lewis Index (TLI) 0.870
Root Mean Square Error of Approximation:
RMSEA 0.072
90 Percent confidence interval - lower 0.072
90 Percent confidence interval - upper 0.073
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.000
Standardized Root Mean Square Residual:
SRMR 0.072
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Unstructured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
CK =~
Q1 1.000 0.797 0.475
Q2 0.852 0.011 78.717 0.000 0.679 0.420
Q3 0.467 0.006 72.710 0.000 0.372 0.396
Q4 0.436 0.007 59.691 0.000 0.348 0.275
Q7 0.697 0.009 77.513 0.000 0.555 0.413
Q8 0.949 0.011 85.479 0.000 0.756 0.495
Q17 0.733 0.009 85.215 0.000 0.584 0.520
Q18 0.971 0.011 90.858 0.000 0.774 0.582
Q19 1.059 0.012 87.732 0.000 0.843 0.501
Q20 1.045 0.011 91.470 0.000 0.832 0.585
Q24 1.273 0.013 95.377 0.000 1.014 0.638
Q25 0.907 0.011 83.124 0.000 0.722 0.445
Q27 0.634 0.008 76.975 0.000 0.505 0.405
Q28 0.072 0.007 10.218 0.000 0.057 0.040
Q29 1.019 0.011 90.897 0.000 0.812 0.587
Q32 1.012 0.011 90.264 0.000 0.806 0.572
MK =~
Q5 1.000 0.537 0.568
Q6 0.979 0.014 71.996 0.000 0.525 0.533
Q31 1.713 0.021 80.044 0.000 0.920 0.708
TK =~
Q13 1.000 0.958 0.590
Q14 0.420 0.005 78.995 0.000 0.402 0.520
Q15 0.977 0.012 83.801 0.000 0.936 0.561
Q16 0.684 0.009 77.915 0.000 0.655 0.496
Q21 0.816 0.010 84.631 0.000 0.782 0.562
Q22 0.846 0.011 78.112 0.000 0.811 0.489
Q26 0.896 0.010 87.124 0.000 0.859 0.639
Q30 1.045 0.012 88.894 0.000 1.001 0.675
IK =~
Q9 1.000 1.190 0.615
Q10 1.101 0.012 93.983 0.000 1.311 0.727
Q11 0.879 0.009 93.240 0.000 1.046 0.721
Q12 0.749 0.008 88.279 0.000 0.892 0.585
Q23 0.408 0.006 71.009 0.000 0.486 0.401
GK =~
CK 1.000 0.832 0.832
MK 0.637 0.009 69.291 0.000 0.787 0.787
TK 0.801 0.011 71.194 0.000 0.554 0.554
IK 1.242 0.017 72.747 0.000 0.692 0.692
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q1 2.172 0.030 71.608 0.000 2.172 0.774
.Q2 2.157 0.034 62.995 0.000 2.157 0.824
.Q3 0.742 0.016 45.712 0.000 0.742 0.843
.Q4 1.479 0.021 69.274 0.000 1.479 0.924
.Q7 1.496 0.025 59.894 0.000 1.496 0.829
.Q8 1.764 0.028 63.613 0.000 1.764 0.755
.Q17 0.917 0.022 41.101 0.000 0.917 0.729
.Q18 1.171 0.024 49.277 0.000 1.171 0.662
.Q19 2.123 0.030 70.264 0.000 2.123 0.749
.Q20 1.332 0.027 49.056 0.000 1.332 0.658
.Q24 1.496 0.028 52.835 0.000 1.496 0.593
.Q25 2.108 0.031 67.581 0.000 2.108 0.802
.Q27 1.299 0.021 61.520 0.000 1.299 0.836
.Q28 2.089 0.037 57.198 0.000 2.089 0.998
.Q29 1.252 0.027 46.560 0.000 1.252 0.655
.Q32 1.332 0.025 52.836 0.000 1.332 0.672
.Q5 0.605 0.019 31.409 0.000 0.605 0.677
.Q6 0.695 0.015 46.133 0.000 0.695 0.716
.Q31 0.843 0.030 28.541 0.000 0.843 0.499
.Q13 1.724 0.033 52.817 0.000 1.724 0.652
.Q14 0.437 0.011 40.395 0.000 0.437 0.729
.Q15 1.911 0.042 45.547 0.000 1.911 0.685
.Q16 1.315 0.023 58.166 0.000 1.315 0.754
.Q21 1.328 0.027 49.719 0.000 1.328 0.685
.Q22 2.091 0.031 67.531 0.000 2.091 0.761
.Q26 1.068 0.025 42.893 0.000 1.068 0.592
.Q30 1.199 0.029 41.978 0.000 1.199 0.545
.Q9 2.334 0.049 47.669 0.000 2.334 0.622
.Q10 1.531 0.041 37.280 0.000 1.531 0.471
.Q11 1.012 0.030 33.903 0.000 1.012 0.481
.Q12 1.529 0.026 59.685 0.000 1.529 0.658
.Q23 1.229 0.020 60.682 0.000 1.229 0.839
.CK 0.195 0.006 33.995 0.000 0.308 0.308
.MK 0.110 0.006 19.176 0.000 0.381 0.381
.TK 0.636 0.011 56.316 0.000 0.693 0.693
.IK 0.739 0.016 46.254 0.000 0.522 0.522
GK 0.439 0.008 53.393 0.000 1.000 1.000
Parallel analysis that estimates the number of necessary factors:
fa.parallel(e2o2[, 458:489])
Parallel analysis suggests that the number of factors = 7 and the number of components = 6
Factors that end up being chosen + residual matrix of the 6 subfactors:
e2o2$COKa = e2o2$Q13 + e2o2$Q14 + e2o2$Q15 + e2o2$Q16 + e2o2$Q22 + e2o2$Q30 + e2o2$Q21 + e2o2$Q26
e2o2$IKa = e2o2$Q9 + e2o2$Q10 + e2o2$Q11 + e2o2$Q12 + e2o2$Q23
e2o2$CKa = e2o2$Q3 + e2o2$Q5 + e2o2$Q6 + e2o2$Q7 + e2o2$Q8 + e2o2$Q24 + e2o2$Q31 + e2o2$Q20
e2o2$AKa = e2o2$Q4 + e2o2$Q17 + e2o2$Q19 + e2o2$Q27 + e2o2$Q32
e2o2$LKa = e2o2$Q1 + e2o2$Q2 + e2o2$Q25
e2o2$TKa = e2o2$Q18 + e2o2$Q21 + e2o2$Q26 + e2o2$Q29
e2o2$GKa = e2o2$COKa + e2o2$IKa + e2o2$CKa + e2o2$AKa + e2o2$LKa + e2o2$TKa
f <- fa(e2o2 %>% select(COKa, IKa, CKa, AKa, LKa, TKa), nfactors=1)
f$residual
COKa IKa CKa AKa LKa TKa
COKa 0.67921936 0.103961890 -0.06587854 -0.17583817 -0.156389558 0.20946454
IKa 0.10396189 0.644056941 -0.05299861 -0.06747902 -0.002675833 0.02185844
CKa -0.06587854 -0.052998609 0.53415147 0.13641800 0.059067827 -0.04991165
AKa -0.17583817 -0.067479023 0.13641800 0.67717512 0.220020553 -0.08472936
LKa -0.15638956 -0.002675833 0.05906783 0.22002055 0.737924698 -0.09663451
TKa 0.20946454 0.021858440 -0.04991165 -0.08472936 -0.096634514 0.39513650
Minor statistical testing.
1/sqrt(13696-2)*1.96
[1] 0.01674908
1/sqrt(13696-2)*2.575829
[1] 0.02201161
1/sqrt(13696-2)*3.290527
[1] 0.02811903
r <- 0.0281
n <- 13696
# Compute t-value
t_value <- r * sqrt((n - 2) / (1 - r^2))
# Compute two-tailed p-value
p_value <- 2 * (1 - pt(abs(t_value), df=n-2))
print(p_value)
[1] 0.001005834
qnorm(0.9995)
[1] 3.290527
cohen.d(data=e2o2, COKa ~ sex)
Call: cohen.d(x = COKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
COKa -1.14 -1.11 -1.07
Multivariate (Mahalanobis) distance between groups
[1] 1.1
r equivalent of difference between two means
COKa
-0.48
cohen.d(data=e2o2, IKa ~ sex)
Call: cohen.d(x = IKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
IKa -0.77 -0.73 -0.7
Multivariate (Mahalanobis) distance between groups
[1] 0.73
r equivalent of difference between two means
IKa
-0.34
cohen.d(data=e2o2, CKa ~ sex)
Call: cohen.d(x = CKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
CKa -0.17 -0.14 -0.11
Multivariate (Mahalanobis) distance between groups
[1] 0.14
r equivalent of difference between two means
CKa
-0.07
cohen.d(data=e2o2, AKa ~ sex)
Call: cohen.d(x = AKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
AKa 0.61 0.65 0.68
Multivariate (Mahalanobis) distance between groups
[1] 0.65
r equivalent of difference between two means
AKa
0.31
cohen.d(data=e2o2, LKa ~ sex)
Call: cohen.d(x = LKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
LKa 0.31 0.34 0.38
Multivariate (Mahalanobis) distance between groups
[1] 0.34
r equivalent of difference between two means
LKa
0.17
cohen.d(data=e2o2, TKa ~ sex)
Call: cohen.d(x = TKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
TKa -0.68 -0.64 -0.61
Multivariate (Mahalanobis) distance between groups
[1] 0.64
r equivalent of difference between two means
TKa
-0.31
cohen.d(data=e2o2, GKa ~ sex)
Call: cohen.d(x = GKa ~ sex, data = e2o2)
Cohen d statistic of difference between two means
lower effect upper
GKa -0.51 -0.48 -0.44
Multivariate (Mahalanobis) distance between groups
[1] 0.48
r equivalent of difference between two means
GKa
-0.23
cohen.d(data=e2o2, COKa ~ sex)$p
COKa
0
cohen.d(data=e2o2, IKa ~ sex)$p
IKa
0
cohen.d(data=e2o2, CKa ~ sex)$p
CKa
0.0000000000000008881784
cohen.d(data=e2o2, AKa ~ sex)$p
AKa
0
cohen.d(data=e2o2, LKa ~ sex)$p
LKa
0
cohen.d(data=e2o2, TKa ~ sex)$p
TKa
0
cohen.d(data=e2o2, GKa ~ sex)$p
GKa
0
Latent model with added covariances between technical/computational and aesthetic/literary knowledge.
latax2 <- "
#latents:
COK =~ Q13 + Q14 + Q15 + Q16 + Q22 + Q30 + Q21 + Q26
IK =~ Q9 + Q10 + Q11 + Q12 + Q23
CK =~ Q3 + Q5 + Q6 + Q7 + Q8 + Q24 + Q31 + Q20
AK =~ Q4 + Q17 + Q19 + Q27 + Q32
LK =~ Q1 + Q2 + Q25
TK =~ Q18 + Q21 + Q26 + Q29
GK =~ COK + IK + CK + AK + LK + TK
TK ~~ COK
AK ~~ LK
"
latafitx2 <- cfa(latax2, data=e2o2, estimator="DWLS", std.lv=T)
Warning: lavaan->lav_options_est_dwls():
estimator “DWLS” is not recommended for continuous data. Did you forget to set the ordered=
argument?
summary(latafitx2, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 82 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 72
Number of observations 13696
Model Test User Model:
Test statistic 22508.951
Degrees of freedom 424
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 268500.876
Degrees of freedom 465
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.918
Tucker-Lewis Index (TLI) 0.910
Root Mean Square Error of Approximation:
RMSEA 0.062
90 Percent confidence interval - lower 0.061
90 Percent confidence interval - upper 0.062
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.000
Standardized Root Mean Square Residual:
SRMR 0.060
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Unstructured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
COK =~
Q13 0.917 0.008 115.149 0.000 1.026 0.631
Q14 0.383 0.004 102.052 0.000 0.428 0.553
Q15 0.904 0.008 110.752 0.000 1.011 0.606
Q16 0.625 0.006 98.550 0.000 0.700 0.530
Q22 0.769 0.008 98.945 0.000 0.861 0.519
Q30 0.958 0.008 122.693 0.000 1.072 0.723
Q21 0.516 0.010 50.759 0.000 0.577 0.414
Q26 0.392 0.011 37.256 0.000 0.438 0.326
IK =~
Q9 0.880 0.009 97.016 0.000 1.189 0.614
Q10 0.969 0.009 102.061 0.000 1.310 0.727
Q11 0.774 0.008 100.477 0.000 1.046 0.721
Q12 0.660 0.007 95.077 0.000 0.892 0.585
Q23 0.360 0.005 75.134 0.000 0.487 0.403
CK =~
Q3 0.211 0.004 54.232 0.000 0.392 0.418
Q5 0.255 0.004 56.750 0.000 0.475 0.503
Q6 0.248 0.004 57.022 0.000 0.462 0.469
Q7 0.327 0.006 56.908 0.000 0.609 0.453
Q8 0.441 0.007 59.491 0.000 0.822 0.538
Q24 0.592 0.010 62.189 0.000 1.103 0.694
Q31 0.424 0.007 60.095 0.000 0.790 0.608
Q20 0.474 0.008 60.854 0.000 0.884 0.621
AK =~
Q4 0.313 0.005 60.238 0.000 0.434 0.343
Q17 0.508 0.007 77.980 0.000 0.704 0.628
Q19 0.708 0.009 79.271 0.000 0.982 0.584
Q27 0.432 0.006 72.304 0.000 0.599 0.481
Q32 0.695 0.009 80.141 0.000 0.964 0.685
LK =~
Q1 0.808 0.012 68.156 0.000 1.037 0.619
Q2 0.707 0.010 67.430 0.000 0.907 0.560
Q25 0.722 0.011 67.951 0.000 0.926 0.571
TK =~
Q18 0.315 0.024 13.219 0.000 0.844 0.634
Q21 0.089 0.007 11.850 0.000 0.237 0.170
Q26 0.163 0.013 13.051 0.000 0.437 0.325
Q29 0.319 0.024 13.232 0.000 0.855 0.618
GK =~
COK 0.502 0.005 100.683 0.000 0.448 0.448
IK 0.910 0.010 90.731 0.000 0.673 0.673
CK 1.571 0.029 54.297 0.000 0.844 0.844
AK 0.962 0.014 70.354 0.000 0.693 0.693
LK 0.804 0.014 59.362 0.000 0.626 0.626
TK 2.482 0.194 12.802 0.000 0.928 0.928
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.COK ~~
.TK 0.189 0.027 7.013 0.000 0.189 0.189
.AK ~~
.LK 0.612 0.016 38.675 0.000 0.612 0.612
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q13 1.590 0.034 47.161 0.000 1.590 0.602
.Q14 0.415 0.011 38.037 0.000 0.415 0.694
.Q15 1.765 0.043 41.150 0.000 1.765 0.633
.Q16 1.256 0.023 54.441 0.000 1.256 0.720
.Q22 2.007 0.032 63.376 0.000 2.007 0.730
.Q30 1.052 0.030 35.151 0.000 1.052 0.478
.Q21 1.419 0.026 53.547 0.000 1.419 0.732
.Q26 1.240 0.024 51.852 0.000 1.240 0.686
.Q9 2.336 0.049 47.726 0.000 2.336 0.623
.Q10 1.534 0.041 37.359 0.000 1.534 0.472
.Q11 1.012 0.030 33.904 0.000 1.012 0.481
.Q12 1.528 0.026 59.663 0.000 1.528 0.658
.Q23 1.227 0.020 60.590 0.000 1.227 0.838
.Q3 0.726 0.016 44.509 0.000 0.726 0.825
.Q5 0.667 0.018 36.108 0.000 0.667 0.747
.Q6 0.758 0.014 53.320 0.000 0.758 0.780
.Q7 1.433 0.025 56.716 0.000 1.433 0.794
.Q8 1.659 0.028 58.502 0.000 1.659 0.710
.Q24 1.307 0.030 44.101 0.000 1.307 0.518
.Q31 1.065 0.025 42.452 0.000 1.065 0.630
.Q20 1.244 0.028 44.768 0.000 1.244 0.614
.Q4 1.411 0.022 65.058 0.000 1.411 0.882
.Q17 0.762 0.023 32.772 0.000 0.762 0.606
.Q19 1.869 0.033 57.476 0.000 1.869 0.659
.Q27 1.195 0.022 54.866 0.000 1.195 0.769
.Q32 1.051 0.028 37.934 0.000 1.051 0.531
.Q1 1.731 0.036 48.453 0.000 1.731 0.617
.Q2 1.796 0.037 48.057 0.000 1.796 0.686
.Q25 1.772 0.035 51.101 0.000 1.772 0.674
.Q18 1.057 0.027 39.035 0.000 1.057 0.598
.Q29 1.180 0.030 39.513 0.000 1.180 0.618
.COK 1.000 0.799 0.799
.IK 1.000 0.547 0.547
.CK 1.000 0.288 0.288
.AK 1.000 0.520 0.520
.LK 1.000 0.608 0.608
.TK 1.000 0.140 0.140
GK 1.000 1.000 1.000
Latent model with no shared covariances.
latax22 <- "
#latents:
COK =~ Q13 + Q14 + Q15 + Q16 + Q22 + Q30
IK =~ Q9 + Q10 + Q11 + Q12 + Q23
CK =~ Q3 + Q5 + Q6 + Q7 + Q8 + Q24 + Q31 + Q20
AK =~ Q4 + Q17 + Q19 + Q27 + Q32
LK =~ Q1 + Q2 + Q25
TK =~ Q18 + Q21 + Q26 + Q29
GK =~ COK + IK + CK + AK + LK + TK
"
latafitx22 <- cfa(latax22, data=e2o2, estimator="DWLS", std.lv=T)
Warning: lavaan->lav_options_est_dwls():
estimator “DWLS” is not recommended for continuous data. Did you forget to set the ordered=
argument?
summary(latafitx22, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 74 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 68
Number of observations 13696
Model Test User Model:
Test statistic 28736.395
Degrees of freedom 428
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 268500.876
Degrees of freedom 465
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.894
Tucker-Lewis Index (TLI) 0.885
Root Mean Square Error of Approximation:
RMSEA 0.069
90 Percent confidence interval - lower 0.069
90 Percent confidence interval - upper 0.070
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.000
Standardized Root Mean Square Residual:
SRMR 0.068
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Unstructured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
COK =~
Q13 0.880 0.008 110.606 0.000 1.024 0.630
Q14 0.368 0.004 98.994 0.000 0.428 0.554
Q15 0.870 0.008 105.919 0.000 1.012 0.606
Q16 0.609 0.006 96.071 0.000 0.709 0.537
Q22 0.728 0.008 94.347 0.000 0.848 0.511
Q30 0.936 0.008 117.793 0.000 1.089 0.734
IK =~
Q9 0.883 0.009 98.800 0.000 1.186 0.612
Q10 0.978 0.009 104.293 0.000 1.313 0.729
Q11 0.778 0.008 102.563 0.000 1.045 0.720
Q12 0.666 0.007 97.029 0.000 0.894 0.587
Q23 0.361 0.005 76.044 0.000 0.485 0.401
CK =~
Q3 0.236 0.004 64.769 0.000 0.392 0.418
Q5 0.285 0.004 69.321 0.000 0.474 0.501
Q6 0.277 0.004 69.634 0.000 0.461 0.467
Q7 0.366 0.005 69.434 0.000 0.609 0.453
Q8 0.498 0.007 74.478 0.000 0.827 0.541
Q24 0.663 0.008 79.973 0.000 1.103 0.694
Q31 0.475 0.006 75.729 0.000 0.789 0.607
Q20 0.532 0.007 77.185 0.000 0.884 0.621
AK =~
Q4 0.281 0.005 55.266 0.000 0.408 0.322
Q17 0.483 0.007 72.671 0.000 0.702 0.625
Q19 0.692 0.009 74.277 0.000 1.005 0.597
Q27 0.406 0.006 67.626 0.000 0.590 0.474
Q32 0.663 0.009 74.435 0.000 0.964 0.685
LK =~
Q1 0.742 0.012 60.334 0.000 1.022 0.610
Q2 0.626 0.010 59.664 0.000 0.862 0.533
Q25 0.710 0.012 60.570 0.000 0.978 0.603
TK =~
Q18 0.291 0.017 17.175 0.000 0.829 0.623
Q21 0.222 0.013 17.208 0.000 0.633 0.454
Q26 0.257 0.015 17.218 0.000 0.733 0.546
Q29 0.295 0.017 17.185 0.000 0.841 0.609
GK =~
COK 0.595 0.005 114.695 0.000 0.512 0.512
IK 0.896 0.010 93.890 0.000 0.667 0.667
CK 1.328 0.019 70.219 0.000 0.799 0.799
AK 1.054 0.015 68.472 0.000 0.726 0.726
LK 0.946 0.017 56.188 0.000 0.687 0.687
TK 2.671 0.161 16.562 0.000 0.936 0.936
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q13 1.592 0.034 46.978 0.000 1.592 0.603
.Q14 0.415 0.011 37.944 0.000 0.415 0.694
.Q15 1.763 0.043 40.888 0.000 1.763 0.632
.Q16 1.243 0.023 53.528 0.000 1.243 0.712
.Q22 2.029 0.032 63.974 0.000 2.029 0.738
.Q30 1.014 0.030 33.343 0.000 1.014 0.461
.Q9 2.344 0.049 48.012 0.000 2.344 0.625
.Q10 1.524 0.041 37.190 0.000 1.524 0.469
.Q11 1.014 0.030 34.054 0.000 1.014 0.481
.Q12 1.523 0.026 59.560 0.000 1.523 0.656
.Q23 1.229 0.020 60.749 0.000 1.229 0.839
.Q3 0.726 0.016 44.515 0.000 0.726 0.825
.Q5 0.668 0.018 36.181 0.000 0.668 0.749
.Q6 0.759 0.014 53.417 0.000 0.759 0.782
.Q7 1.434 0.025 56.739 0.000 1.434 0.795
.Q8 1.651 0.028 58.135 0.000 1.651 0.707
.Q24 1.308 0.030 44.098 0.000 1.308 0.518
.Q31 1.068 0.025 42.563 0.000 1.068 0.632
.Q20 1.243 0.028 44.732 0.000 1.243 0.614
.Q4 1.434 0.022 66.267 0.000 1.434 0.896
.Q17 0.766 0.023 32.877 0.000 0.766 0.609
.Q19 1.824 0.033 55.369 0.000 1.824 0.644
.Q27 1.205 0.022 55.340 0.000 1.205 0.776
.Q32 1.053 0.028 37.791 0.000 1.053 0.531
.Q1 1.762 0.036 49.404 0.000 1.762 0.628
.Q2 1.874 0.037 50.728 0.000 1.874 0.716
.Q25 1.673 0.036 46.839 0.000 1.673 0.636
.Q18 1.083 0.025 43.227 0.000 1.083 0.612
.Q21 1.539 0.026 59.286 0.000 1.539 0.794
.Q26 1.268 0.024 52.546 0.000 1.268 0.702
.Q29 1.203 0.028 43.005 0.000 1.203 0.630
.COK 1.000 0.738 0.738
.IK 1.000 0.555 0.555
.CK 1.000 0.362 0.362
.AK 1.000 0.474 0.474
.LK 1.000 0.528 0.528
.TK 1.000 0.123 0.123
GK 1.000 1.000 1.000
Calculating the latent differences between sexes in each factor.
latax3 <- "
#latents:
COK =~ Q13 + Q14 + Q15 + Q16 + Q22 + Q30 + Q21 + Q26
COK ~ sex
"
latafitx3 <- sem(latax3, data=e2o2)
summary(latafitx3, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 26 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 17
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 1610.196
Degrees of freedom 27
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 26610.592
Degrees of freedom 36
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.940
Tucker-Lewis Index (TLI) 0.921
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -172415.151
Loglikelihood unrestricted model (H1) -171610.053
Akaike (AIC) 344864.302
Bayesian (BIC) 344991.725
Sample-size adjusted Bayesian (SABIC) 344937.701
Root Mean Square Error of Approximation:
RMSEA 0.066
90 Percent confidence interval - lower 0.064
90 Percent confidence interval - upper 0.069
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.000
Standardized Root Mean Square Residual:
SRMR 0.037
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
COK =~
Q13 1.000 1.024 0.629
Q14 0.422 0.008 52.412 0.000 0.433 0.558
Q15 1.034 0.018 57.773 0.000 1.059 0.633
Q16 0.657 0.014 48.601 0.000 0.673 0.509
Q22 0.901 0.017 52.220 0.000 0.923 0.556
Q30 0.954 0.016 59.386 0.000 0.977 0.658
Q21 0.717 0.014 50.076 0.000 0.734 0.528
Q26 0.662 0.014 48.185 0.000 0.678 0.504
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
COK ~
sex -1.087 0.021 -51.020 0.000 -1.061 -0.531
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q13 1.601 0.023 69.435 0.000 1.601 0.604
.Q14 0.413 0.006 73.192 0.000 0.413 0.688
.Q15 1.673 0.024 69.159 0.000 1.673 0.599
.Q16 1.293 0.017 75.104 0.000 1.293 0.741
.Q22 1.906 0.026 73.303 0.000 1.906 0.691
.Q30 1.251 0.019 67.433 0.000 1.251 0.567
.Q21 1.394 0.019 74.430 0.000 1.394 0.721
.Q26 1.350 0.018 75.281 0.000 1.350 0.746
.COK 0.753 0.021 35.760 0.000 0.718 0.718
latax4 <- "
#latents:
IK =~ Q9 + Q10 + Q11 + Q12 + Q23
IK ~ sex
"
latafitx4 <- sem(latax4, data=e2o2)
summary(latafitx4, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 28 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 11
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 723.463
Degrees of freedom 9
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 16623.534
Degrees of freedom 15
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.957
Tucker-Lewis Index (TLI) 0.928
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -116156.722
Loglikelihood unrestricted model (H1) -115794.990
Akaike (AIC) 232335.443
Bayesian (BIC) 232417.893
Sample-size adjusted Bayesian (SABIC) 232382.936
Root Mean Square Error of Approximation:
RMSEA 0.077
90 Percent confidence interval - lower 0.073
90 Percent confidence interval - upper 0.082
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.177
Standardized Root Mean Square Residual:
SRMR 0.035
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
IK =~
Q9 1.000 1.110 0.573
Q10 1.187 0.021 56.216 0.000 1.318 0.730
Q11 0.932 0.017 55.648 0.000 1.035 0.713
Q12 0.903 0.017 53.361 0.000 1.003 0.657
Q23 0.397 0.012 34.537 0.000 0.441 0.365
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
IK ~
sex -0.908 0.024 -38.235 0.000 -0.818 -0.409
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q9 2.521 0.036 70.729 0.000 2.521 0.672
.Q10 1.525 0.027 55.835 0.000 1.525 0.468
.Q11 1.038 0.018 58.226 0.000 1.038 0.492
.Q12 1.322 0.021 64.484 0.000 1.322 0.568
.Q23 1.265 0.016 78.198 0.000 1.265 0.867
.IK 1.027 0.032 31.715 0.000 0.833 0.833
latax5 <- "
#latents:
CK =~ Q3 + Q5 + Q6 + Q7 + Q8 + Q24 + Q31 + Q20
CK ~ sex
"
latafitx5 <- sem(latax5, data=e2o2)
summary(latafitx5, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 30 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 17
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 2149.091
Degrees of freedom 27
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 21003.231
Degrees of freedom 36
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.899
Tucker-Lewis Index (TLI) 0.865
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -163573.680
Loglikelihood unrestricted model (H1) -162499.135
Akaike (AIC) 327181.360
Bayesian (BIC) 327308.782
Sample-size adjusted Bayesian (SABIC) 327254.758
Root Mean Square Error of Approximation:
RMSEA 0.077
90 Percent confidence interval - lower 0.074
90 Percent confidence interval - upper 0.080
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 0.032
Standardized Root Mean Square Residual:
SRMR 0.048
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
CK =~
Q3 1.000 0.389 0.414
Q5 1.373 0.036 37.929 0.000 0.534 0.567
Q6 1.290 0.036 36.179 0.000 0.502 0.510
Q7 1.997 0.052 38.229 0.000 0.777 0.578
Q8 2.225 0.059 37.941 0.000 0.865 0.567
Q24 2.537 0.065 39.334 0.000 0.986 0.622
Q31 1.988 0.051 38.668 0.000 0.773 0.594
Q20 1.976 0.053 37.107 0.000 0.768 0.539
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
CK ~
sex -0.051 0.008 -6.660 0.000 -0.132 -0.066
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q3 0.729 0.009 76.819 0.000 0.729 0.828
.Q5 0.603 0.009 70.704 0.000 0.603 0.679
.Q6 0.716 0.010 73.506 0.000 0.716 0.740
.Q7 1.204 0.017 70.066 0.000 1.204 0.666
.Q8 1.579 0.022 70.680 0.000 1.579 0.678
.Q24 1.544 0.023 67.072 0.000 1.544 0.613
.Q31 1.094 0.016 69.017 0.000 1.094 0.647
.Q20 1.442 0.020 72.183 0.000 1.442 0.710
.CK 0.151 0.007 22.120 0.000 0.996 0.996
latax6 <- "
#latents:
AK =~ Q4 + Q17 + Q19 + Q27 + Q32
AK ~ sex
"
latafitx6 <- sem(latax6, data=e2o2)
summary(latafitx6, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 31 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 11
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 1645.263
Degrees of freedom 9
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 13097.492
Degrees of freedom 15
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.875
Tucker-Lewis Index (TLI) 0.792
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -107794.106
Loglikelihood unrestricted model (H1) -106971.474
Akaike (AIC) 215610.212
Bayesian (BIC) 215692.662
Sample-size adjusted Bayesian (SABIC) 215657.705
Root Mean Square Error of Approximation:
RMSEA 0.117
90 Percent confidence interval - lower 0.112
90 Percent confidence interval - upper 0.122
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 1.000
Standardized Root Mean Square Residual:
SRMR 0.058
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
AK =~
Q4 1.000 0.649 0.511
Q17 1.253 0.027 46.675 0.000 0.813 0.722
Q19 1.138 0.031 36.326 0.000 0.739 0.439
Q27 0.888 0.024 37.630 0.000 0.576 0.462
Q32 1.409 0.031 45.373 0.000 0.915 0.649
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
AK ~
sex 0.518 0.015 34.254 0.000 0.798 0.399
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q4 1.191 0.017 71.198 0.000 1.191 0.739
.Q17 0.606 0.012 49.149 0.000 0.606 0.478
.Q19 2.288 0.031 74.641 0.000 2.288 0.807
.Q27 1.222 0.017 73.650 0.000 1.222 0.786
.Q32 1.148 0.019 59.472 0.000 1.148 0.579
.AK 0.354 0.013 26.689 0.000 0.841 0.841
latax7 <- "
#latents:
LK =~ Q1 + Q2 + Q25
LK ~ sex
"
latafitx7 <- sem(latax7, data=e2o2)
summary(latafitx7, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 29 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 7
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 506.443
Degrees of freedom 2
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 5208.149
Degrees of freedom 6
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.903
Tucker-Lewis Index (TLI) 0.709
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -73912.341
Loglikelihood unrestricted model (H1) -73659.119
Akaike (AIC) 147838.681
Bayesian (BIC) 147891.150
Sample-size adjusted Bayesian (SABIC) 147868.904
Root Mean Square Error of Approximation:
RMSEA 0.138
90 Percent confidence interval - lower 0.128
90 Percent confidence interval - upper 0.148
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 1.000
Standardized Root Mean Square Residual:
SRMR 0.049
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
LK =~
Q1 1.000 1.086 0.649
Q2 0.919 0.027 34.258 0.000 0.999 0.617
Q25 0.709 0.021 33.842 0.000 0.770 0.476
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
LK ~
sex 0.542 0.025 21.926 0.000 0.499 0.249
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.Q1 1.620 0.039 41.985 0.000 1.620 0.579
.Q2 1.624 0.034 47.154 0.000 1.624 0.619
.Q25 2.023 0.030 66.563 0.000 2.023 0.773
.LK 1.107 0.041 26.922 0.000 0.938 0.938
latax8 <- "
#latents:
GK =~ COKa + IKa + CKa + AKa + LKa + TKa
GK ~ sex
"
latafitx8 <- sem(latax8, data=e2o2)
summary(latafitx8, standardized = T, fit.measures = T)
lavaan 0.6-19 ended normally after 57 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 13
Used Total
Number of observations 13299 13696
Model Test User Model:
Test statistic 12998.829
Degrees of freedom 14
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 33227.669
Degrees of freedom 21
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.609
Tucker-Lewis Index (TLI) 0.413
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -232139.911
Loglikelihood unrestricted model (H1) -225640.497
Akaike (AIC) 464305.822
Bayesian (BIC) 464403.263
Sample-size adjusted Bayesian (SABIC) 464361.950
Root Mean Square Error of Approximation:
RMSEA 0.264
90 Percent confidence interval - lower 0.260
90 Percent confidence interval - upper 0.268
P-value H_0: RMSEA <= 0.050 0.000
P-value H_0: RMSEA >= 0.080 1.000
Standardized Root Mean Square Residual:
SRMR 0.157
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
GK =~
COKa 1.000 5.200 0.714
IKa 0.658 0.010 62.827 0.000 3.423 0.607
CKa 0.700 0.012 59.630 0.000 3.642 0.574
AKa 0.360 0.008 43.548 0.000 1.872 0.415
LKa 0.269 0.007 40.040 0.000 1.400 0.381
TKa 0.625 0.008 79.808 0.000 3.250 0.857
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
GK ~
sex -3.662 0.099 -37.010 0.000 -0.704 -0.352
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.COKa 25.950 0.407 63.707 0.000 25.950 0.490
.IKa 20.096 0.279 72.149 0.000 20.096 0.632
.CKa 26.934 0.365 73.697 0.000 26.934 0.670
.AKa 16.822 0.215 78.328 0.000 16.822 0.828
.LKa 11.537 0.146 78.931 0.000 11.537 0.855
.TKa 3.825 0.101 38.008 0.000 3.825 0.266
.GK 23.692 0.548 43.215 0.000 0.876 0.876
######################
#can't use sempaths for this version of R
#semPaths(latafitx2, whatLabels="std",
# sizeMan = 6,
# sizeMan2 = 6,
# node.width = 0.5,
# edge.label.cex = .4,
# style = "ram",
# width=20,
# edge.label.position=0.45,
# height=13)
#png(file="C:/Users/micha/OneDrive/Documents/rstuff/mkgtinv/cfa.png", width=3000, height=1500)
#semPaths(latafitx2, whatLabels="std",
# sizeMan = 5.9,
# sizeMan2 = 5.9,
# node.width = 0.5,
# edge.label.cex = .245,
# style = "ram",
# width=40,
# edge.label.position=0.465,
#edge.color='black',
#height=20)
#dev.off()
#################################NATIONAL DIFFERENCES