Organic Chemistry Survey: 2PL Original and Alternative

Prepare Data

Import surveys, combine into single data frame, delete identifying information, assign IDs, and separate out by scale for item examination.

# https://hansjoerg.me/2018/04/23/rasch-in-r-tutorial/

knitr::opts_chunk$set(message = F, warning = F)

# load libraries ----------------------------------------------------------
library(stringi)
library(psych)
library(DT)
library(naniar)
library(UpSetR)
library(nFactors)
library(lavaan)
library(corrplot)
library(tidyr)

library(ggplot2)
library(dplyr)
library("eRm")
library("ltm")
library("difR")
library("psych")

# load data ---------------------------------------------------------------
# alt <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Alternative Scales_April 28, 2022_12.34.csv", header=T)
# alt <- alt[-c(1,2),]
# alt$scale <- "alt"
# 
# orig <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Original Scales_April 28, 2022_12.35.csv", header=T)
# orig <- orig[-c(1,2),]
# orig$scale <- "orig"
# 
# df <- rbind.data.frame(alt, orig)
# df <- subset(df, select = -c(1:19))
# names(df)
# myFun <- function(n) {
#   a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
#   paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
# }
# df$id <- myFun(nrow(df))
# write.csv(df, file="imported_anonymized.csv", row.names = F)

df <- read.csv(file="imported_anonymized.csv", header=T)

# extract items -----------------------------------------------------------
# new items
EEochem <- subset(df, select=c(scale,grep("EEochem", colnames(df)))) # entry expectations
CCdisc <- subset(df, select=grep("CCdisc", colnames(df))) # classroom climate
IDochem <- cbind.data.frame(subset(df, select=c(scale,grep("IDochem", colnames(df)))), subset(df, select=grep("FASochem", colnames(df)))) # identity
CSochem <- subset(df, select=grep("CSochem", colnames(df))) # career satisfaction

# established scales
MSchem <- subset(df, select=c(scale,grep("MSchem", colnames(df)))) # discipline growth mindset (chemistry)
IPchem <- subset(df, select=grep("IPchem", colnames(df))) # instructor growth mindset (chemistry)
SEchem <- subset(df, select=grep("SEchem", colnames(df))) # disciplinary self-efficacy (chemistry)
MSochem <- subset(df, select=c(scale, grep("MSochem", colnames(df)))) # disciplinary growth mindset (organic chemistry)
IPochem <- subset(df, select=grep("IPochem", colnames(df))) # instructor growth mindset (organic chemistry)
SEochem <- subset(df, select=grep("SEochem", colnames(df))) # disciplinary self-efficacy (organic chemistry)
CNEBochem_class <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(1:3))) # entity norms and beliefs
CNEBochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(4:6))) # entity norms and beliefs
CNHSochem_others <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(1:3))) # help seeking
CNHSochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(4:6))) # help seeking
CNSWochem <- subset(df, select=grep("CNSWochem", colnames(df))) # help seeking
FCochem <- subset(df, select=grep("FCochem", colnames(df))) # faculty caring

Entry Expectations

Items

I took this course only because it was required
I was looking forward to this course
I heard this course would be very difficult
I chose to take Organic Chemistry because I am really interested in the topic
I chose to take Organic Chemistry because is is a requirement for my major or I am pre-med intending
I think that what we will study in Organic Chemistry will be important for me to know
I think the field of organic chemistry is an important discipline
I heard this course would cover content that is important…for success in future classes
I heard this course would cover content that is important…for success in my future career
I heard this course would cover content that is important…for studying or practicing medicine
I heard this course would cover content that is important…for the MCAT exam

Notes

Issue with EEochem05 – all participants in alternative scale sample selected yes/YES. Item is dropped from both models.

Stats - Original

Univariate Stats

d <- subset(EEochem, scale == "orig", select=-c(scale, EEochem05))
EEochem_desc <- data.frame(describe(d))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

# gg_miss_upset(EEochem)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## EEochem01 EEochem02 EEochem03 EEochem04 EEochem06 EEochem07 EEochem08 EEochem09 
##     0.617     0.316     0.992     0.269     0.462     0.636     0.608     0.315 
## EEochem10 EEochem11 
##     0.415     0.753 
## 
## Loadings:
##           Factor1 Factor2
## EEochem01 -0.617         
## EEochem02  0.825         
## EEochem04  0.854         
## EEochem06  0.570   0.462 
## EEochem08          0.550 
## EEochem09          0.769 
## EEochem10          0.718 
## EEochem03                
## EEochem07  0.449   0.403 
## EEochem11          0.494 
## 
##                Factor1 Factor2
## SS loadings      2.576   2.041
## Proportion Var   0.258   0.204
## Cumulative Var   0.258   0.462
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 48.39 on 26 degrees of freedom.
## The p-value is 0.00487

Stats - Alternative

Univariate Stats

d <- subset(EEochem, scale == "alt", select=-c(scale, EEochem05))
EEochem_desc <- data.frame(describe(d))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

# gg_miss_upset(EEochem)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## EEochem01 EEochem02 EEochem03 EEochem04 EEochem06 EEochem07 EEochem08 EEochem09 
##     0.418     0.419     0.760     0.308     0.336     0.503     0.420     0.219 
## EEochem10 EEochem11 
##     0.413     0.850 
## 
## Loadings:
##           Factor1 Factor2
## EEochem06  0.766         
## EEochem07  0.647         
## EEochem08  0.757         
## EEochem09  0.866         
## EEochem10  0.765         
## EEochem01         -0.750 
## EEochem02          0.728 
## EEochem04          0.773 
## EEochem03         -0.453 
## EEochem11                
## 
##                Factor1 Factor2
## SS loadings      3.262   2.092
## Proportion Var   0.326   0.209
## Cumulative Var   0.326   0.535
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 36.77 on 26 degrees of freedom.
## The p-value is 0.0784

2PL Model - Original

Summary & Fit

level_key <- c("1"="0","2"="0","3"="1","4"="1")

d <- na.omit(subset(EEochem, scale == "orig", select=-c(scale, EEochem05)))
d <- d %>% 
  mutate_at(vars(1:10), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -313.8875 667.7751 715.4156
## 
## Coefficients:
##                     value std.err  z.vals
## Dffclt.EEochem01  -1.0152  0.5520 -1.8391
## Dffclt.EEochem02  -0.3080  0.2333 -1.3199
## Dffclt.EEochem03   5.4193  8.1129  0.6680
## Dffclt.EEochem04  -0.6143  6.5746 -0.0934
## Dffclt.EEochem06   0.5451  0.3061  1.7810
## Dffclt.EEochem07   1.0261  1.1112  0.9234
## Dffclt.EEochem08   1.6231  1.1488  1.4129
## Dffclt.EEochem09   0.8988  0.5058  1.7771
## Dffclt.EEochem10   1.1689  0.6057  1.9298
## Dffclt.EEochem11   2.2690  1.5683  1.4468
## Dscrmn.EEochem01   1.4680  0.5119  2.8680
## Dscrmn.EEochem02  -2.3334  0.7464 -3.1262
## Dscrmn.EEochem03  -0.8787  1.1212 -0.7837
## Dscrmn.EEochem04 -15.5984 79.3718 -0.1965
## Dscrmn.EEochem06  -2.0636  0.6326 -3.2620
## Dscrmn.EEochem07  -4.0151  2.3854 -1.6832
## Dscrmn.EEochem08  -2.0195  0.8885 -2.2729
## Dscrmn.EEochem09  -0.9835  0.3670 -2.6797
## Dscrmn.EEochem10  -1.6177  0.5388 -3.0026
## Dscrmn.EEochem11  -1.6660  0.7667 -2.1728
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.0034 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##               X^2 Pr(>X^2)
## EEochem01  8.8411   0.6139
## EEochem02  8.7005    0.604
## EEochem03  8.0004   0.2772
## EEochem04  3.5940   0.4752
## EEochem06 13.3848   0.3762
## EEochem07  2.9388   0.6832
## EEochem08  3.8521   0.8218
## EEochem09 15.4080   0.3762
## EEochem10 10.0181   0.5446
## EEochem11 28.9379   0.0297

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

2PL Model - Alternative

Summary & Fit

level_key <- c("1"="0","2"="0","3"="1","4"="1")

d <- na.omit(subset(EEochem, scale == "alt", select=-c(scale, EEochem05)))
d <- d %>% 
  mutate_at(vars(1:10), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -367.6475 775.2949 827.1973
## 
## Coefficients:
##                     value  std.err  z.vals
## Dffclt.EEochem01  -1.2008   0.5964 -2.0136
## Dffclt.EEochem02  -0.7830   0.3596 -2.1776
## Dffclt.EEochem03   3.7687   3.6536  1.0315
## Dffclt.EEochem04  -0.7738   8.7969 -0.0880
## Dffclt.EEochem06   0.3852   0.2077  1.8548
## Dffclt.EEochem07   0.6509   0.3354  1.9408
## Dffclt.EEochem08   1.0008   0.6717  1.4898
## Dffclt.EEochem09   0.5860  17.4074  0.0337
## Dffclt.EEochem10   0.7890   0.3586  2.2002
## Dffclt.EEochem11   4.1645   3.3639  1.2380
## Dscrmn.EEochem01   2.4769   0.7244  3.4191
## Dscrmn.EEochem02  -1.4759   0.3998 -3.6914
## Dscrmn.EEochem03  -1.1625   0.8989 -1.2933
## Dscrmn.EEochem04 -16.1026  97.5465 -0.1651
## Dscrmn.EEochem06  -3.6888   0.9552 -3.8619
## Dscrmn.EEochem07  -2.8361   0.7873 -3.6024
## Dscrmn.EEochem08  -3.5203   1.3930 -2.5272
## Dscrmn.EEochem09 -21.7695 299.8035 -0.0726
## Dscrmn.EEochem10  -1.9784   0.5277 -3.7489
## Dscrmn.EEochem11  -0.4393   0.3315 -1.3251
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.0025 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##               X^2 Pr(>X^2)
## EEochem01 79.1890   0.0198
## EEochem02 17.2820   0.3663
## EEochem03  8.3676   0.5842
## EEochem04  0.4989   0.8119
## EEochem06  9.8604   0.4752
## EEochem07 12.0342   0.4653
## EEochem08  3.4390    0.901
## EEochem09  1.1539   0.6832
## EEochem10 38.7274   0.0198
## EEochem11 12.3592   0.4257

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

Disciplinary Growth Mindset (C)

Items

Even if I were to spend a lot of time working on difficult chemistry problems, I cannot develop my intelligence in chemistry further.
I won’t get better at chemistry if I try harder.
I could never excel in chemistry because I do not have what it takes to be a chemistry person.
I could never become really good at chemistry even if I were to work hard because I don’t have natural ability.
I can become even better at solving chemistry problems through hard work.
I am capable of really understanding chemistry if I work hard.
I can change my intelligence in chemistry quite a lot by working hard.

Stats - Original

Univariate Stats

d <- subset(MSchem, scale == "orig", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## MSchem01 MSchem02 MSchem03 MSchem04 MSchem05 MSchem06 MSchem07 
##    0.446    0.250    0.130    0.111    0.181    0.148    0.370 
## 
## Loadings:
##          Factor1 Factor2
## MSchem01  0.689         
## MSchem02  0.841         
## MSchem03  0.885         
## MSchem04  0.872         
## MSchem05          0.882 
## MSchem06          0.840 
## MSchem07          0.725 
## 
##                Factor1 Factor2
## SS loadings      3.019   2.346
## Proportion Var   0.431   0.335
## Cumulative Var   0.431   0.766
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 58.7 on 8 degrees of freedom.
## The p-value is 8.36e-10

Stats - Alternative

Univariate Stats

d <- subset(MSchem, scale == "alt", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## MSchem01 MSchem02 MSchem03 MSchem04 MSchem05 MSchem06 MSchem07 
##    0.630    0.465    0.316    0.005    0.580    0.234    0.249 
## 
## Loadings:
##          Factor1 Factor2
## MSchem03  0.763         
## MSchem04  0.950         
## MSchem02  0.516  -0.518 
## MSchem05          0.619 
## MSchem06          0.779 
## MSchem07          0.788 
## MSchem01  0.452  -0.407 
## 
##                Factor1 Factor2
## SS loadings      2.280   2.240
## Proportion Var   0.326   0.320
## Cumulative Var   0.326   0.646
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 28.43 on 8 degrees of freedom.
## The p-value is 0.000399

2PL Model - Original

Summary & Fit

d <- na.omit(subset(MSchem, scale == "orig", select=-c(scale)))
d <- d %>% 
  mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -156.3357 340.6713 373.8436
## 
## Coefficients:
##                    value     std.err  z.vals
## Dffclt.MSchem01   0.7191      0.2015  3.5680
## Dffclt.MSchem02   0.9095      0.1473  6.1760
## Dffclt.MSchem03   0.6621     28.2841  0.0234
## Dffclt.MSchem04   0.6573    141.3955  0.0046
## Dffclt.MSchem05   1.2824      0.2006  6.3937
## Dffclt.MSchem06   0.7141      8.1624  0.0875
## Dffclt.MSchem07   0.8585      0.1594  5.3858
## Dscrmn.MSchem01   2.0890      0.6755  3.0926
## Dscrmn.MSchem02   4.3513      1.5437  2.8187
## Dscrmn.MSchem03  36.5386  64691.4546  0.0006
## Dscrmn.MSchem04  41.8914 284944.3405  0.0001
## Dscrmn.MSchem05  -4.6447      1.9170 -2.4229
## Dscrmn.MSchem06 -28.4131   6425.7796 -0.0044
## Dscrmn.MSchem07  -3.4425      1.3838 -2.4877
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): <1e-06 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##              X^2 Pr(>X^2)
## MSchem01  6.6518   0.9604
## MSchem02 13.0174   0.6832
## MSchem03  7.1227   0.1287
## MSchem04  3.1666   0.1287
## MSchem05 11.1334   0.4356
## MSchem06 10.3535   0.1386
## MSchem07 15.9756   0.7525

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

2PL Model - Alternative

Summary & Fit

d <- na.omit(subset(MSchem, scale == "alt", select=-c(scale)))
d <- d %>% 
  mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -204.2268 436.4537 473.0653
## 
## Coefficients:
##                    value  std.err  z.vals
## Dffclt.MSchem01   1.5145   0.3328  4.5503
## Dffclt.MSchem02   1.5351   0.2273  6.7536
## Dffclt.MSchem03   1.2176   0.1177 10.3419
## Dffclt.MSchem04   1.2235   0.1243  9.8394
## Dffclt.MSchem05   2.1301   0.4753  4.4815
## Dffclt.MSchem06   1.3973   0.4373  3.1952
## Dffclt.MSchem07   1.4204   0.1881  7.5514
## Dscrmn.MSchem01   1.4601   0.4921  2.9668
## Dscrmn.MSchem02   2.5670   0.8779  2.9241
## Dscrmn.MSchem03   4.4709   2.3306  1.9184
## Dscrmn.MSchem04   4.1839   2.1923  1.9085
## Dscrmn.MSchem05  -1.8569   0.7585 -2.4481
## Dscrmn.MSchem06 -13.1618 151.8881 -0.0867
## Dscrmn.MSchem07  -3.0482   1.0868 -2.8048
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.002 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##              X^2 Pr(>X^2)
## MSchem01 12.0221   0.4554
## MSchem02 13.6625   0.4752
## MSchem03  8.4967   0.4851
## MSchem04  8.2060   0.5347
## MSchem05  4.2549   0.9505
## MSchem06  2.4127   0.3762
## MSchem07 14.2003   0.4653

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

Identity

Items

I see myself as a [chemistry kind of person]
My parents see me as a [chemistry kind of person]
My instructors see me as a [chemistry kind of person]
My friends see me as a [chemistry kind of person]
My peers see me as a [chemistry kind of person]
I have had experiences in which I was recognized as a [chemistry kind of person]
Knowing chemistry is important for (1=no jobs; 2=a few jobs; 3=most jobs; 4=all jobs)
Knowing chemistry helps me understand how the world works. (1=never; 2=sometimes; 3=most of the time; 4=all of the time)
Thinking like a chemist will help me do well in (1=none of my classes; 2=a few of my classes; 3=most of my classes; 4=all of my classes)
Chemistry makes the world a better place to live
I look forward to my [chemistry] classes.
I don’t care about learning chemistry.
In general, I find [chemistry] (1=very boring; 2=boring; 3=interesting; 4=very interesting)

Stats - Original

Univariate Stats

d <- subset(IDochem, scale == "orig", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##  IDochem01  IDochem02  IDochem03  IDochem04  IDochem05  IDochem06  IDochem10 
##      0.025      0.295      0.567      0.195      0.107      0.554      0.813 
##  IDochem07  IDochem08  IDochem09 FASochem02 FASochem03 FASochem05 
##      0.850      0.690      0.604      0.404      0.450      0.257 
## 
## Loadings:
##            Factor1 Factor2 Factor3
## IDochem02   0.649           0.527 
## IDochem04   0.863                 
## IDochem05   0.916                 
## IDochem06   0.608                 
## IDochem08           0.504         
## IDochem09           0.578         
## FASochem02          0.521   0.440 
## FASochem03         -0.608         
## FASochem05  0.402   0.680         
## IDochem01                   0.908 
## IDochem03   0.430           0.436 
## IDochem10                         
## IDochem07                         
## 
##                Factor1 Factor2 Factor3
## SS loadings      3.195   2.132   1.863
## Proportion Var   0.246   0.164   0.143
## Cumulative Var   0.246   0.410   0.553
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 54.43 on 42 degrees of freedom.
## The p-value is 0.0946

Stats - Alternative

Univariate Stats

d <- subset(IDochem, scale == "alt", select=-c(scale))
desc <- data.frame(describe(d))
datatable(subset(desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))

Item Correlations

corr <- corr.test(d)
corrplot(corr$r)

Histograms

ggplot(gather(d), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness

vis_miss(d)

EFA (All Items)

d <- na.omit(d)
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)

## 
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##  IDochem01  IDochem02  IDochem03  IDochem04  IDochem05  IDochem06  IDochem10 
##      0.235      0.170      0.372      0.092      0.108      0.502      0.541 
##  IDochem07  IDochem08  IDochem09 FASochem02 FASochem03 FASochem05 
##      0.565      0.434      0.345      0.214      0.364      0.310 
## 
## Loadings:
##            Factor1 Factor2 Factor3
## IDochem01   0.666   0.550         
## IDochem02   0.860                 
## IDochem03   0.687                 
## IDochem04   0.881                 
## IDochem05   0.872                 
## IDochem06   0.678                 
## IDochem10           0.591         
## FASochem02          0.795         
## FASochem03         -0.723         
## FASochem05          0.689         
## IDochem07                   0.650 
## IDochem08                   0.715 
## IDochem09                   0.682 
## 
##                Factor1 Factor2 Factor3
## SS loadings      4.120   2.863   1.765
## Proportion Var   0.317   0.220   0.136
## Cumulative Var   0.317   0.537   0.673
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 30.51 on 42 degrees of freedom.
## The p-value is 0.906

2PL Model - Original

Summary & Fit

d <- na.omit(subset(IDochem, scale == "orig", select=-c(scale)))
d <- d %>% 
  mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -533.2004 1118.401 1180.006
## 
## Coefficients:
##                     value   std.err  z.vals
## Dffclt.IDochem01   0.0564    0.1225  0.4606
## Dffclt.IDochem02  -0.0090    1.0464 -0.0086
## Dffclt.IDochem03   0.3622    0.1722  2.1028
## Dffclt.IDochem04  -0.1341    0.1353 -0.9907
## Dffclt.IDochem05  -0.0385    0.1010 -0.3810
## Dffclt.IDochem06  -0.3094    0.2125 -1.4556
## Dffclt.IDochem10  -1.7846    0.5929 -3.0101
## Dffclt.IDochem07   5.1393    5.9396  0.8653
## Dffclt.IDochem08   0.1899    0.2833  0.6703
## Dffclt.IDochem09   0.3020    0.2323  1.2998
## Dffclt.FASochem02 -0.0303    0.1424 -0.2131
## Dffclt.FASochem03 -0.8866    0.2516 -3.5242
## Dffclt.FASochem05 -0.7880    0.2135 -3.6905
## Dscrmn.IDochem01   3.2738    1.1376  2.8779
## Dscrmn.IDochem02  26.5153 3067.0155  0.0086
## Dscrmn.IDochem03   2.1595    0.6132  3.5214
## Dscrmn.IDochem04   2.7362    0.8737  3.1318
## Dscrmn.IDochem05   4.6204    2.5736  1.7953
## Dscrmn.IDochem06   1.3690    0.4295  3.1875
## Dscrmn.IDochem10   1.1262    0.4520  2.4917
## Dscrmn.IDochem07   0.2689    0.3131  0.8590
## Dscrmn.IDochem08   0.9156    0.3393  2.6981
## Dscrmn.IDochem09   1.2415    0.3931  3.1581
## Dscrmn.FASochem02  2.3855    0.8079  2.9529
## Dscrmn.FASochem03 -1.7238    0.5328 -3.2355
## Dscrmn.FASochem05  2.2139    0.6809  3.2513
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.0013 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##                X^2 Pr(>X^2)
## IDochem01   6.4024   0.5347
## IDochem02   2.1744   0.4455
## IDochem03   9.9752   0.3564
## IDochem04   3.5235   0.9505
## IDochem05  10.6292   0.1089
## IDochem06   9.2774   0.3267
## IDochem10   9.8560   0.3564
## IDochem07   8.8230   0.3168
## IDochem08   6.0416   0.7129
## IDochem09  11.2748   0.2376
## FASochem02 12.0463   0.1287
## FASochem03 11.4617   0.2277
## FASochem05 15.8426   0.0297

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

2PL Model - Alternative

Summary & Fit

d <- na.omit(subset(IDochem, scale == "alt", select=-c(scale)))
d <- d %>% 
  mutate_at(vars(1:ncol(d)), recode, `1` = 0, `2` = 0, `3` = 1, `4` = 1)
mod2 <- ltm(d ~ z1)
summary(mod2)

## 
## Call:
## ltm(formula = d ~ z1)
## 
## Model Summary:
##  log.Lik     AIC      BIC
##  -584.16 1220.32 1287.263
## 
## Coefficients:
##                     value     std.err  z.vals
## Dffclt.IDochem01   0.2794      0.0989  2.8251
## Dffclt.IDochem02   0.1891      0.0914  2.0700
## Dffclt.IDochem03   0.4078      0.1133  3.5987
## Dffclt.IDochem04   0.0194     17.2786  0.0011
## Dffclt.IDochem05   0.0383    180.0793  0.0002
## Dffclt.IDochem06  -0.0596      0.1563 -0.3812
## Dffclt.IDochem10  -0.9278      0.2561 -3.6234
## Dffclt.IDochem07   3.5306      2.4329  1.4512
## Dffclt.IDochem08   0.9570      0.2895  3.3057
## Dffclt.IDochem09   0.7992      0.1987  4.0228
## Dffclt.FASochem02  0.4085      0.1472  2.7749
## Dffclt.FASochem03 -0.6282      0.2072 -3.0321
## Dffclt.FASochem05 -0.6961      0.1667 -4.1758
## Dscrmn.IDochem01   3.5396      0.7861  4.5027
## Dscrmn.IDochem02   4.1796      1.1048  3.7831
## Dscrmn.IDochem03   2.8360      0.6733  4.2121
## Dscrmn.IDochem04  34.8307  31080.4580  0.0011
## Dscrmn.IDochem05  41.9287 197281.2471  0.0002
## Dscrmn.IDochem06   1.7114      0.4160  4.1144
## Dscrmn.IDochem10   1.5233      0.4373  3.4838
## Dscrmn.IDochem07   0.3870      0.2813  1.3758
## Dscrmn.IDochem08   1.0953      0.3342  3.2778
## Dscrmn.IDochem09   1.5562      0.4163  3.7380
## Dscrmn.FASochem02  1.8720      0.4523  4.1390
## Dscrmn.FASochem03 -1.5989      0.4233 -3.7777
## Dscrmn.FASochem05  2.5472      0.7425  3.4308
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 1.3e-06 
## quasi-Newton: BFGS

item.fit(mod2, simulate.p.value=T)

## 
## Item-Fit Statistics and P-values
## 
## Call:
## ltm(formula = d ~ z1)
## 
## Alternative: Items do not fit the model
## Ability Categories: 10
## Monte Carlo samples: 100 
## 
##                X^2 Pr(>X^2)
## IDochem01  10.5375   0.2574
## IDochem02   7.3938   0.5545
## IDochem03   7.6702    0.495
## IDochem04   3.7759   0.5941
## IDochem05   0.2925   0.9109
## IDochem06   6.7098    0.802
## IDochem10   7.2910   0.7228
## IDochem07   8.3404   0.4851
## IDochem08  13.4286   0.1287
## IDochem09  10.3072   0.3663
## FASochem02 20.8875   0.0198
## FASochem03  8.2527   0.5644
## FASochem05  5.8190   0.8317

All Items ICC

plot.ltm(mod2, type = 'ICC', auto.key = FALSE)

All Items IIC

plot.ltm(mod2, type = 'IIC', auto.key = FALSE)

Individual ICC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'ICC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Individual IIC Plots

items <- colnames(d)
n <- 1
for (i in 1:ncol(d)) {
  plot.ltm(mod2, type = 'IIC', auto.key = FALSE, items = n, main = items[n], annot = F)
  n <- n + 1
}

Test Information Function

plot(mod2, type=c("IIC"), items=c(0))

Meeting notes: - add entry expectation to survey map - provax rotation for EFAs

ee - kurtosis is worse for the original - refine efa - should run separate 2PLs on separate factors

gm - promax

id - drop first item from efa

once we have factors, check alpha beliefs about self vs. beliefs about chem/orgo 15 min convo on what we’ve learned so far for group meeting

Organic Chemistry Survey: 2PL Original and Alternative

Heather Perkins

5/13/2022

Prepare Data

Entry Expectations

Items

Notes

Stats - Original

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

Stats - Alternative

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

2PL Model - Original

Summary & Fit

All Items ICC

All Items IIC

Individual ICC Plots

Individual IIC Plots

Test Information Function

2PL Model - Alternative

Summary & Fit

All Items ICC

All Items IIC

Individual ICC Plots

Individual IIC Plots

Test Information Function

Disciplinary Growth Mindset (C)

Items

Stats - Original

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

Stats - Alternative

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

2PL Model - Original

Summary & Fit

All Items ICC

All Items IIC

Individual ICC Plots

Individual IIC Plots

Test Information Function

2PL Model - Alternative

Summary & Fit

All Items ICC

All Items IIC

Individual ICC Plots

Individual IIC Plots

Test Information Function

Identity

Items

Stats - Original

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

Stats - Alternative

Univariate Stats

Item Correlations

Histograms

Missingness

EFA (All Items)

2PL Model - Original

Summary & Fit

All Items ICC

All Items IIC

Individual ICC Plots