Import surveys, combine into single data frame, delete identifying information, assign IDs, and separate out by scale for item examination.
# https://hansjoerg.me/2018/04/23/rasch-in-r-tutorial/
knitr::opts_chunk$set(message = F, warning = F)
# load libraries ----------------------------------------------------------
library(stringi)
library(psych)
library(DT)
library(naniar)
library(UpSetR)
library(nFactors)
library(lavaan)
library(corrplot)
library(tidyr)
library(ggplot2)
library(dplyr)
library("eRm")
library("ltm")
library("difR")
library("psych")
# load data ---------------------------------------------------------------
# alt <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Alternative Scales_April 28, 2022_12.34.csv", header=T)
# alt <- alt[-c(1,2),]
# alt$scale <- "alt"
#
# orig <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Original Scales_April 28, 2022_12.35.csv", header=T)
# orig <- orig[-c(1,2),]
# orig$scale <- "orig"
#
# df <- rbind.data.frame(alt, orig)
# df <- subset(df, select = -c(1:19))
# names(df)
# myFun <- function(n) {
# a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
# paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
# }
# df$id <- myFun(nrow(df))
# write.csv(df, file="imported_anonymized.csv", row.names = F)
df <- read.csv(file="imported_anonymized.csv", header=T)
# extract items -----------------------------------------------------------
# new items
EEochem <- subset(df, select=c(scale,grep("EEochem", colnames(df)))) # entry expectations
CCdisc <- subset(df, select=grep("CCdisc", colnames(df))) # classroom climate
IDochem <- cbind.data.frame(subset(df, select=c(scale,grep("IDochem", colnames(df)))), subset(df, select=grep("FASochem", colnames(df)))) # identity
CSochem <- subset(df, select=grep("CSochem", colnames(df))) # career satisfaction
# established scales
MSchem <- subset(df, select=c(scale,grep("MSchem", colnames(df)))) # discipline growth mindset (chemistry)
IPchem <- subset(df, select=grep("IPchem", colnames(df))) # instructor growth mindset (chemistry)
SEchem <- subset(df, select=grep("SEchem", colnames(df))) # disciplinary self-efficacy (chemistry)
MSochem <- subset(df, select=c(scale, grep("MSochem", colnames(df)))) # disciplinary growth mindset (organic chemistry)
IPochem <- subset(df, select=grep("IPochem", colnames(df))) # instructor growth mindset (organic chemistry)
SEochem <- subset(df, select=grep("SEochem", colnames(df))) # disciplinary self-efficacy (organic chemistry)
CNEBochem_class <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(1:3))) # entity norms and beliefs
CNEBochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(4:6))) # entity norms and beliefs
CNHSochem_others <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(1:3))) # help seeking
CNHSochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(4:6))) # help seeking
CNSWochem <- subset(df, select=grep("CNSWochem", colnames(df))) # help seeking
FCochem <- subset(df, select=grep("FCochem", colnames(df))) # faculty caring
d <- subset(EEochem, scale == "orig", select=-c(scale, EEochem05))
EEochem_desc <- data.frame(describe(d))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
# gg_miss_upset(EEochem)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## EEochem01 EEochem02 EEochem03 EEochem04 EEochem06 EEochem07 EEochem08 EEochem09
## 0.617 0.316 0.992 0.269 0.462 0.636 0.608 0.315
## EEochem10 EEochem11
## 0.415 0.753
##
## Loadings:
## Factor1 Factor2
## EEochem01 -0.617
## EEochem02 0.825
## EEochem04 0.854
## EEochem06 0.570 0.462
## EEochem08 0.550
## EEochem09 0.769
## EEochem10 0.718
## EEochem03
## EEochem07 0.449 0.403
## EEochem11 0.494
##
## Factor1 Factor2
## SS loadings 2.576 2.041
## Proportion Var 0.258 0.204
## Cumulative Var 0.258 0.462
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 48.39 on 26 degrees of freedom.
## The p-value is 0.00487
d <- subset(EEochem, scale == "alt", select=-c(scale, EEochem05))
EEochem_desc <- data.frame(describe(d))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
# gg_miss_upset(EEochem)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## EEochem01 EEochem02 EEochem03 EEochem04 EEochem06 EEochem07 EEochem08 EEochem09
## 0.418 0.419 0.760 0.308 0.336 0.503 0.420 0.219
## EEochem10 EEochem11
## 0.413 0.850
##
## Loadings:
## Factor1 Factor2
## EEochem06 0.766
## EEochem07 0.647
## EEochem08 0.757
## EEochem09 0.866
## EEochem10 0.765
## EEochem01 -0.750
## EEochem02 0.728
## EEochem04 0.773
## EEochem03 -0.453
## EEochem11
##
## Factor1 Factor2
## SS loadings 3.262 2.092
## Proportion Var 0.326 0.209
## Cumulative Var 0.326 0.535
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 36.77 on 26 degrees of freedom.
## The p-value is 0.0784
level_key <- c("1"="0","2"="0","3"="1","4"="1")
d <- na.omit(subset(EEochem, scale == "orig", select=-c(scale, EEochem05)))
d <- d %>%
mutate_at(vars(1:10), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -313.8875 667.7751 715.4156
##
## Coefficients:
## value std.err z.vals
## Dffclt.EEochem01 -1.0152 0.5520 -1.8391
## Dffclt.EEochem02 -0.3080 0.2333 -1.3199
## Dffclt.EEochem03 5.4193 8.1129 0.6680
## Dffclt.EEochem04 -0.6143 6.5746 -0.0934
## Dffclt.EEochem06 0.5451 0.3061 1.7810
## Dffclt.EEochem07 1.0261 1.1112 0.9234
## Dffclt.EEochem08 1.6231 1.1488 1.4129
## Dffclt.EEochem09 0.8988 0.5058 1.7771
## Dffclt.EEochem10 1.1689 0.6057 1.9298
## Dffclt.EEochem11 2.2690 1.5683 1.4468
## Dscrmn.EEochem01 1.4680 0.5119 2.8680
## Dscrmn.EEochem02 -2.3334 0.7464 -3.1262
## Dscrmn.EEochem03 -0.8787 1.1212 -0.7837
## Dscrmn.EEochem04 -15.5984 79.3718 -0.1965
## Dscrmn.EEochem06 -2.0636 0.6326 -3.2620
## Dscrmn.EEochem07 -4.0151 2.3854 -1.6832
## Dscrmn.EEochem08 -2.0195 0.8885 -2.2729
## Dscrmn.EEochem09 -0.9835 0.3670 -2.6797
## Dscrmn.EEochem10 -1.6177 0.5388 -3.0026
## Dscrmn.EEochem11 -1.6660 0.7667 -2.1728
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.0034
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## EEochem01 8.8411 0.6139
## EEochem02 8.7005 0.604
## EEochem03 8.0004 0.2772
## EEochem04 3.5940 0.4752
## EEochem06 13.3848 0.3762
## EEochem07 2.9388 0.6832
## EEochem08 3.8521 0.8218
## EEochem09 15.4080 0.3762
## EEochem10 10.0181 0.5446
## EEochem11 28.9379 0.0297
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
level_key <- c("1"="0","2"="0","3"="1","4"="1")
d <- na.omit(subset(EEochem, scale == "alt", select=-c(scale, EEochem05)))
d <- d %>%
mutate_at(vars(1:10), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -367.6475 775.2949 827.1973
##
## Coefficients:
## value std.err z.vals
## Dffclt.EEochem01 -1.2008 0.5964 -2.0136
## Dffclt.EEochem02 -0.7830 0.3596 -2.1776
## Dffclt.EEochem03 3.7687 3.6536 1.0315
## Dffclt.EEochem04 -0.7738 8.7969 -0.0880
## Dffclt.EEochem06 0.3852 0.2077 1.8548
## Dffclt.EEochem07 0.6509 0.3354 1.9408
## Dffclt.EEochem08 1.0008 0.6717 1.4898
## Dffclt.EEochem09 0.5860 17.4074 0.0337
## Dffclt.EEochem10 0.7890 0.3586 2.2002
## Dffclt.EEochem11 4.1645 3.3639 1.2380
## Dscrmn.EEochem01 2.4769 0.7244 3.4191
## Dscrmn.EEochem02 -1.4759 0.3998 -3.6914
## Dscrmn.EEochem03 -1.1625 0.8989 -1.2933
## Dscrmn.EEochem04 -16.1026 97.5465 -0.1651
## Dscrmn.EEochem06 -3.6888 0.9552 -3.8619
## Dscrmn.EEochem07 -2.8361 0.7873 -3.6024
## Dscrmn.EEochem08 -3.5203 1.3930 -2.5272
## Dscrmn.EEochem09 -21.7695 299.8035 -0.0726
## Dscrmn.EEochem10 -1.9784 0.5277 -3.7489
## Dscrmn.EEochem11 -0.4393 0.3315 -1.3251
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.0025
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## EEochem01 79.1890 0.0198
## EEochem02 17.2820 0.3663
## EEochem03 8.3676 0.5842
## EEochem04 0.4989 0.8119
## EEochem06 9.8604 0.4752
## EEochem07 12.0342 0.4653
## EEochem08 3.4390 0.901
## EEochem09 1.1539 0.6832
## EEochem10 38.7274 0.0198
## EEochem11 12.3592 0.4257
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
d <- subset(MSchem, scale == "orig", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## MSchem01 MSchem02 MSchem03 MSchem04 MSchem05 MSchem06 MSchem07
## 0.446 0.250 0.130 0.111 0.181 0.148 0.370
##
## Loadings:
## Factor1 Factor2
## MSchem01 0.689
## MSchem02 0.841
## MSchem03 0.885
## MSchem04 0.872
## MSchem05 0.882
## MSchem06 0.840
## MSchem07 0.725
##
## Factor1 Factor2
## SS loadings 3.019 2.346
## Proportion Var 0.431 0.335
## Cumulative Var 0.431 0.766
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 58.7 on 8 degrees of freedom.
## The p-value is 8.36e-10
d <- subset(MSchem, scale == "alt", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## MSchem01 MSchem02 MSchem03 MSchem04 MSchem05 MSchem06 MSchem07
## 0.630 0.465 0.316 0.005 0.580 0.234 0.249
##
## Loadings:
## Factor1 Factor2
## MSchem03 0.763
## MSchem04 0.950
## MSchem02 0.516 -0.518
## MSchem05 0.619
## MSchem06 0.779
## MSchem07 0.788
## MSchem01 0.452 -0.407
##
## Factor1 Factor2
## SS loadings 2.280 2.240
## Proportion Var 0.326 0.320
## Cumulative Var 0.326 0.646
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 28.43 on 8 degrees of freedom.
## The p-value is 0.000399
d <- na.omit(subset(MSchem, scale == "orig", select=-c(scale)))
d <- d %>%
mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -156.3357 340.6713 373.8436
##
## Coefficients:
## value std.err z.vals
## Dffclt.MSchem01 0.7191 0.2015 3.5680
## Dffclt.MSchem02 0.9095 0.1473 6.1760
## Dffclt.MSchem03 0.6621 28.2841 0.0234
## Dffclt.MSchem04 0.6573 141.3955 0.0046
## Dffclt.MSchem05 1.2824 0.2006 6.3937
## Dffclt.MSchem06 0.7141 8.1624 0.0875
## Dffclt.MSchem07 0.8585 0.1594 5.3858
## Dscrmn.MSchem01 2.0890 0.6755 3.0926
## Dscrmn.MSchem02 4.3513 1.5437 2.8187
## Dscrmn.MSchem03 36.5386 64691.4546 0.0006
## Dscrmn.MSchem04 41.8914 284944.3405 0.0001
## Dscrmn.MSchem05 -4.6447 1.9170 -2.4229
## Dscrmn.MSchem06 -28.4131 6425.7796 -0.0044
## Dscrmn.MSchem07 -3.4425 1.3838 -2.4877
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): <1e-06
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## MSchem01 6.6518 0.9604
## MSchem02 13.0174 0.6832
## MSchem03 7.1227 0.1287
## MSchem04 3.1666 0.1287
## MSchem05 11.1334 0.4356
## MSchem06 10.3535 0.1386
## MSchem07 15.9756 0.7525
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
d <- na.omit(subset(MSchem, scale == "alt", select=-c(scale)))
d <- d %>%
mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -204.2268 436.4537 473.0653
##
## Coefficients:
## value std.err z.vals
## Dffclt.MSchem01 1.5145 0.3328 4.5503
## Dffclt.MSchem02 1.5351 0.2273 6.7536
## Dffclt.MSchem03 1.2176 0.1177 10.3419
## Dffclt.MSchem04 1.2235 0.1243 9.8394
## Dffclt.MSchem05 2.1301 0.4753 4.4815
## Dffclt.MSchem06 1.3973 0.4373 3.1952
## Dffclt.MSchem07 1.4204 0.1881 7.5514
## Dscrmn.MSchem01 1.4601 0.4921 2.9668
## Dscrmn.MSchem02 2.5670 0.8779 2.9241
## Dscrmn.MSchem03 4.4709 2.3306 1.9184
## Dscrmn.MSchem04 4.1839 2.1923 1.9085
## Dscrmn.MSchem05 -1.8569 0.7585 -2.4481
## Dscrmn.MSchem06 -13.1618 151.8881 -0.0867
## Dscrmn.MSchem07 -3.0482 1.0868 -2.8048
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.002
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## MSchem01 12.0221 0.4554
## MSchem02 13.6625 0.4752
## MSchem03 8.4967 0.4851
## MSchem04 8.2060 0.5347
## MSchem05 4.2549 0.9505
## MSchem06 2.4127 0.3762
## MSchem07 14.2003 0.4653
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
d <- subset(IDochem, scale == "orig", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## IDochem01 IDochem02 IDochem03 IDochem04 IDochem05 IDochem06 IDochem10
## 0.025 0.295 0.567 0.195 0.107 0.554 0.813
## IDochem07 IDochem08 IDochem09 FASochem02 FASochem03 FASochem05
## 0.850 0.690 0.604 0.404 0.450 0.257
##
## Loadings:
## Factor1 Factor2 Factor3
## IDochem02 0.649 0.527
## IDochem04 0.863
## IDochem05 0.916
## IDochem06 0.608
## IDochem08 0.504
## IDochem09 0.578
## FASochem02 0.521 0.440
## FASochem03 -0.608
## FASochem05 0.402 0.680
## IDochem01 0.908
## IDochem03 0.430 0.436
## IDochem10
## IDochem07
##
## Factor1 Factor2 Factor3
## SS loadings 3.195 2.132 1.863
## Proportion Var 0.246 0.164 0.143
## Cumulative Var 0.246 0.410 0.553
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 54.43 on 42 degrees of freedom.
## The p-value is 0.0946
d <- subset(IDochem, scale == "alt", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
corr <- corr.test(d)
corrplot(corr$r)
ggplot(gather(d), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(d)
d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## IDochem01 IDochem02 IDochem03 IDochem04 IDochem05 IDochem06 IDochem10
## 0.235 0.170 0.372 0.092 0.108 0.502 0.541
## IDochem07 IDochem08 IDochem09 FASochem02 FASochem03 FASochem05
## 0.565 0.434 0.345 0.214 0.364 0.310
##
## Loadings:
## Factor1 Factor2 Factor3
## IDochem01 0.666 0.550
## IDochem02 0.860
## IDochem03 0.687
## IDochem04 0.881
## IDochem05 0.872
## IDochem06 0.678
## IDochem10 0.591
## FASochem02 0.795
## FASochem03 -0.723
## FASochem05 0.689
## IDochem07 0.650
## IDochem08 0.715
## IDochem09 0.682
##
## Factor1 Factor2 Factor3
## SS loadings 4.120 2.863 1.765
## Proportion Var 0.317 0.220 0.136
## Cumulative Var 0.317 0.537 0.673
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 30.51 on 42 degrees of freedom.
## The p-value is 0.906
d <- na.omit(subset(IDochem, scale == "orig", select=-c(scale)))
d <- d %>%
mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -533.2004 1118.401 1180.006
##
## Coefficients:
## value std.err z.vals
## Dffclt.IDochem01 0.0564 0.1225 0.4606
## Dffclt.IDochem02 -0.0090 1.0464 -0.0086
## Dffclt.IDochem03 0.3622 0.1722 2.1028
## Dffclt.IDochem04 -0.1341 0.1353 -0.9907
## Dffclt.IDochem05 -0.0385 0.1010 -0.3810
## Dffclt.IDochem06 -0.3094 0.2125 -1.4556
## Dffclt.IDochem10 -1.7846 0.5929 -3.0101
## Dffclt.IDochem07 5.1393 5.9396 0.8653
## Dffclt.IDochem08 0.1899 0.2833 0.6703
## Dffclt.IDochem09 0.3020 0.2323 1.2998
## Dffclt.FASochem02 -0.0303 0.1424 -0.2131
## Dffclt.FASochem03 -0.8866 0.2516 -3.5242
## Dffclt.FASochem05 -0.7880 0.2135 -3.6905
## Dscrmn.IDochem01 3.2738 1.1376 2.8779
## Dscrmn.IDochem02 26.5153 3067.0155 0.0086
## Dscrmn.IDochem03 2.1595 0.6132 3.5214
## Dscrmn.IDochem04 2.7362 0.8737 3.1318
## Dscrmn.IDochem05 4.6204 2.5736 1.7953
## Dscrmn.IDochem06 1.3690 0.4295 3.1875
## Dscrmn.IDochem10 1.1262 0.4520 2.4917
## Dscrmn.IDochem07 0.2689 0.3131 0.8590
## Dscrmn.IDochem08 0.9156 0.3393 2.6981
## Dscrmn.IDochem09 1.2415 0.3931 3.1581
## Dscrmn.FASochem02 2.3855 0.8079 2.9529
## Dscrmn.FASochem03 -1.7238 0.5328 -3.2355
## Dscrmn.FASochem05 2.2139 0.6809 3.2513
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.0013
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## IDochem01 6.4024 0.5347
## IDochem02 2.1744 0.4455
## IDochem03 9.9752 0.3564
## IDochem04 3.5235 0.9505
## IDochem05 10.6292 0.1089
## IDochem06 9.2774 0.3267
## IDochem10 9.8560 0.3564
## IDochem07 8.8230 0.3168
## IDochem08 6.0416 0.7129
## IDochem09 11.2748 0.2376
## FASochem02 12.0463 0.1287
## FASochem03 11.4617 0.2277
## FASochem05 15.8426 0.0297
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
d <- na.omit(subset(IDochem, scale == "alt", select=-c(scale)))
d <- d %>%
mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)
##
## Call:
## ltm(formula = d ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -584.16 1220.32 1287.263
##
## Coefficients:
## value std.err z.vals
## Dffclt.IDochem01 0.2794 0.0989 2.8251
## Dffclt.IDochem02 0.1891 0.0914 2.0700
## Dffclt.IDochem03 0.4078 0.1133 3.5987
## Dffclt.IDochem04 0.0194 17.2786 0.0011
## Dffclt.IDochem05 0.0383 180.0793 0.0002
## Dffclt.IDochem06 -0.0596 0.1563 -0.3812
## Dffclt.IDochem10 -0.9278 0.2561 -3.6234
## Dffclt.IDochem07 3.5306 2.4329 1.4512
## Dffclt.IDochem08 0.9570 0.2895 3.3057
## Dffclt.IDochem09 0.7992 0.1987 4.0228
## Dffclt.FASochem02 0.4085 0.1472 2.7749
## Dffclt.FASochem03 -0.6282 0.2072 -3.0321
## Dffclt.FASochem05 -0.6961 0.1667 -4.1758
## Dscrmn.IDochem01 3.5396 0.7861 4.5027
## Dscrmn.IDochem02 4.1796 1.1048 3.7831
## Dscrmn.IDochem03 2.8360 0.6733 4.2121
## Dscrmn.IDochem04 34.8307 31080.4580 0.0011
## Dscrmn.IDochem05 41.9287 197281.2471 0.0002
## Dscrmn.IDochem06 1.7114 0.4160 4.1144
## Dscrmn.IDochem10 1.5233 0.4373 3.4838
## Dscrmn.IDochem07 0.3870 0.2813 1.3758
## Dscrmn.IDochem08 1.0953 0.3342 3.2778
## Dscrmn.IDochem09 1.5562 0.4163 3.7380
## Dscrmn.FASochem02 1.8720 0.4523 4.1390
## Dscrmn.FASochem03 -1.5989 0.4233 -3.7777
## Dscrmn.FASochem05 2.5472 0.7425 3.4308
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 1.3e-06
## quasi-Newton: BFGS
item.fit(mod2, simulate.p.value=T)
##
## Item-Fit Statistics and P-values
##
## Call:
## ltm(formula = d ~ z1)
##
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100
##
## X^2 Pr(>X^2)
## IDochem01 10.5375 0.2574
## IDochem02 7.3938 0.5545
## IDochem03 7.6702 0.495
## IDochem04 3.7759 0.5941
## IDochem05 0.2925 0.9109
## IDochem06 6.7098 0.802
## IDochem10 7.2910 0.7228
## IDochem07 8.3404 0.4851
## IDochem08 13.4286 0.1287
## IDochem09 10.3072 0.3663
## FASochem02 20.8875 0.0198
## FASochem03 8.2527 0.5644
## FASochem05 5.8190 0.8317
plot.ltm(mod2, type = 'ICC', auto.key = FALSE)
plot.ltm(mod2, type = 'IIC', auto.key = FALSE)
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
n <- n + 1
}
plot(mod2, type=c("IIC"), items=c(0))
Meeting notes: - add entry expectation to survey map - provax rotation for EFAs
ee - kurtosis is worse for the original - refine efa - should run separate 2PLs on separate factors
gm - promax
id - drop first item from efa
once we have factors, check alpha beliefs about self vs. beliefs about chem/orgo 15 min convo on what we’ve learned so far for group meeting