Prepare Data

Import surveys, combine into single data frame, delete identifying information, assign IDs, and separate out by scale for item examination.

knitr::opts_chunk$set(message = F, warning = F)

# load libraries ----------------------------------------------------------
library(stringi)
library(psych)
library(DT)
library(ggplot2)
library(tidyr)
library(naniar)
library(UpSetR)
library(nFactors)
library(lavaan)

# load data ---------------------------------------------------------------
# alt <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Alternative Scales_April 28, 2022_12.34.csv", header=T)
# alt <- alt[-c(1,2),]
# alt$scale <- "alt"
# 
# orig <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Original Scales_April 28, 2022_12.35.csv", header=T)
# orig <- orig[-c(1,2),]
# orig$scale <- "orig"
# 
# df <- rbind.data.frame(alt, orig)
# df <- subset(df, select = -c(1:19))
# names(df)
# myFun <- function(n) {
#   a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
#   paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
# }
# df$id <- myFun(nrow(df))
# write.csv(df, file="imported_anonymized.csv", row.names = F)

df <- read.csv(file="imported_anonymized.csv", header=T)

# extract items -----------------------------------------------------------
# new items
EEochem <- subset(df, select=grep("EEochem", colnames(df))) # entry expectations
CCdisc <- subset(df, select=grep("CCdisc", colnames(df))) # classroom climate
IDochem <- cbind.data.frame(subset(df, select=grep("IDochem", colnames(df))), subset(df, select=grep("FASochem", colnames(df)))) # identity
CSochem <- subset(df, select=grep("CSochem", colnames(df))) # career satisfaction

# established scales
MSchem <- subset(df, select=grep("MSchem", colnames(df))) # discipline growth mindset (chemistry)
IPchem <- subset(df, select=grep("IPchem", colnames(df))) # instructor growth mindset (chemistry)
SEchem <- subset(df, select=grep("SEchem", colnames(df))) # disciplinary self-efficacy (chemistry)
MSochem <- subset(df, select=grep("MSochem", colnames(df))) # disciplinary growth mindset (organic chemistry)
IPochem <- subset(df, select=grep("IPochem", colnames(df))) # instructor growth mindset (organic chemistry)
SEochem <- subset(df, select=grep("SEochem", colnames(df))) # disciplinary self-efficacy (organic chemistry)
CNEBochem_class <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(1:3))) # entity norms and beliefs
CNEBochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(4:6))) # entity norms and beliefs
CNHSochem_others <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(1:3))) # help seeking
CNHSochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(4:6))) # help seeking
CNSWochem <- subset(df, select=grep("CNSWochem", colnames(df))) # help seeking
FCochem <- subset(df, select=grep("FCochem", colnames(df))) # faculty caring

New Scales

Entry Expectations

Items

  1. I took this course only because it was required
  2. I was looking forward to this course
  3. I heard this course would be very difficult
  4. I chose to take Organic Chemistry because I am really interested in the topic
  5. I chose to take Organic Chemistry because is is a requirement for my major or I am pre-med intending
  6. I think that what we will study in Organic Chemistry will be important for me to know
  7. I think the field of organic chemistry is an important discipline
  8. I heard this course would cover content that is important…for success in future classes
  9. I heard this course would cover content that is important…for success in my future career
  10. I heard this course would cover content that is important…for studying or practicing medicine
  11. I heard this course would cover content that is important…for the MCAT exam

Stats

Univariate Stats
EEochem_desc <- data.frame(describe(EEochem))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(EEochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(EEochem)

# gg_miss_upset(EEochem)
EFA (All Items)
EEochem_items <- c("Took course b/c required",
                    "Looking forward to course",
                    "Heard difficult",
                    "Interested in topic",
                    "Requirement",
                    "Important to know",
                    "Important discipline",
                    "Important future classes",
                    "Important future career",
                    "Important medicine",
                    "Important MCAT")

d <- na.omit(EEochem)
colnames(d) <- EEochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##  Took course b/c required Looking forward to course           Heard difficult 
##                     0.470                     0.409                     0.867 
##       Interested in topic               Requirement         Important to know 
##                     0.299                     0.737                     0.398 
##      Important discipline  Important future classes   Important future career 
##                     0.543                     0.494                     0.310 
##        Important medicine            Important MCAT 
##                     0.431                     0.803 
## 
## Loadings:
##                           Factor1 Factor2
## Important to know          0.707         
## Important discipline       0.622         
## Important future classes   0.706         
## Important future career    0.818         
## Important medicine         0.754         
## Took course b/c required          -0.710 
## Looking forward to course          0.720 
## Interested in topic                0.781 
## Requirement                       -0.513 
## Heard difficult                          
## Important MCAT             0.431         
## 
##                Factor1 Factor2
## SS loadings      3.025   2.213
## Proportion Var   0.275   0.201
## Cumulative Var   0.275   0.476
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 88.45 on 34 degrees of freedom.
## The p-value is 9.78e-07
EFA (First Seven Items)
d <- subset(na.omit(EEochem), select=-c(EEochem08,EEochem09,EEochem10,EEochem11))
colnames(d) <- EEochem_items[1:7]
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##  Took course b/c required Looking forward to course           Heard difficult 
##                     0.371                     0.423                     0.669 
##       Interested in topic               Requirement         Important to know 
##                     0.337                     0.724                     0.430 
##      Important discipline 
##                     0.376 
## 
## Loadings:
##                           Factor1 Factor2
## Took course b/c required   0.753         
## Looking forward to course -0.592   0.476 
## Heard difficult            0.515         
## Interested in topic       -0.626   0.521 
## Requirement                0.514         
## Important to know                  0.740 
## Important discipline               0.789 
## 
##                Factor1 Factor2
## SS loadings      1.862   1.807
## Proportion Var   0.266   0.258
## Cumulative Var   0.266   0.524
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 17.21 on 8 degrees of freedom.
## The p-value is 0.028

Classroom Climate

Items

  1. Students in this class were competitive with each other
  2. I didn’t feel comfortable studying with others in this class
  3. Other students in this class treated me disrespectfully (1=never; 2=once; 3=several times; 4=frequently)
  4. I had bad experiences in studying with others for this class (1=never; 2=once; 3=several times; 4=frequently)

Stats

Univariate Stats
CCdisc_desc <- data.frame(describe(CCdisc))
datatable(subset(CCdisc_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CCdisc), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CCdisc)

gg_miss_upset(CCdisc)

EFA
CCdisc_items <- c("Students competitive",
                  "Uncomfortable studying",
                  "Treated disrespectifully",
                  "Bad experiences studying")

d <- na.omit(CCdisc)
colnames(d) <- CCdisc_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##     Students competitive   Uncomfortable studying Treated disrespectifully 
##                    0.988                    0.982                    0.724 
## Bad experiences studying 
##                    0.157 
## 
## Loadings:
## [1] 0.525 0.918            
## 
##                Factor1
## SS loadings      1.149
## Proportion Var   0.287
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 10.29 on 2 degrees of freedom.
## The p-value is 0.00583

Career Satisfaction

Items

How important are the following factors for your future career satisfaction?

  1. Making money
  2. Having a prestigious career
  3. Helping others
  4. Having job security and opportunity
  5. Working with people
  6. Developing new knowledge and skills
  7. Having lots of personal and family time
  8. Having a low-stress job
  9. Being in an exciting environment
  10. Solving societal problems
  11. Making use of my talents and abilities
  12. Making use of my education
  13. Living up to family expectations

Stats

Univariate Stats
CSochem_desc <- data.frame(t(do.call(rbind, describe(CSochem))))
datatable(subset(CSochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CSochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CSochem)

# gg_miss_upset(CSochem)
EFA (Two factor)
CSochem_items <- c("Money",
                  "Prestige",
                  "Helping",
                  "Job security",
                  "Working people",
                  "New knowledge",
                  "Personal/family time",
                  "Low-stress",
                  "Excitement",
                  "Social problems",
                  "Using talents",
                  "Using education",
                  "Family expectations")

d <- na.omit(CSochem)
colnames(d) <- CSochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##                Money             Prestige              Helping 
##                0.883                0.839                0.614 
##         Job security       Working people        New knowledge 
##                0.683                0.710                0.359 
## Personal/family time           Low-stress           Excitement 
##                0.357                0.659                0.557 
##      Social problems        Using talents      Using education 
##                0.708                0.471                0.486 
##  Family expectations 
##                0.906 
## 
## Loadings:
##                      Factor1 Factor2
## Helping               0.621         
## Working people        0.538         
## New knowledge         0.796         
## Excitement            0.649         
## Social problems       0.539         
## Using talents         0.727         
## Using education       0.710         
## Personal/family time          0.754 
## Low-stress                    0.579 
## Money                               
## Prestige                            
## Job security          0.474         
## Family expectations                 
## 
##                Factor1 Factor2
## SS loadings      3.563   1.204
## Proportion Var   0.274   0.093
## Cumulative Var   0.274   0.367
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 166.19 on 53 degrees of freedom.
## The p-value is 1.32e-13
EFA (Three factor)
EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##                Money             Prestige              Helping 
##                0.884                0.847                0.612 
##         Job security       Working people        New knowledge 
##                0.688                0.544                0.250 
## Personal/family time           Low-stress           Excitement 
##                0.412                0.554                0.533 
##      Social problems        Using talents      Using education 
##                0.714                0.256                0.412 
##  Family expectations 
##                0.918 
## 
## Loadings:
##                      Factor1 Factor2 Factor3
## Excitement            0.569                 
## Using talents         0.838                 
## Using education       0.708                 
## Working people                0.654         
## New knowledge         0.437   0.745         
## Personal/family time                  0.708 
## Low-stress                            0.647 
## Money                                       
## Prestige                                    
## Helping                       0.498         
## Job security                                
## Social problems                             
## Family expectations                         
## 
##                Factor1 Factor2 Factor3
## SS loadings      2.229   1.915   1.232
## Proportion Var   0.171   0.147   0.095
## Cumulative Var   0.171   0.319   0.414
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 115.43 on 42 degrees of freedom.
## The p-value is 8.99e-09
EFA (Four factor)
EFA <- factanal(d, factors = 4, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 4, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##                Money             Prestige              Helping 
##                0.785                0.005                0.616 
##         Job security       Working people        New knowledge 
##                0.709                0.593                0.199 
## Personal/family time           Low-stress           Excitement 
##                0.192                0.676                0.550 
##      Social problems        Using talents      Using education 
##                0.714                0.152                0.458 
##  Family expectations 
##                0.839 
## 
## Loadings:
##                      Factor1 Factor2 Factor3 Factor4
## Helping               0.534                         
## Working people        0.624                         
## New knowledge         0.835                         
## Using talents                 0.873                 
## Using education               0.626                 
## Prestige                              0.954         
## Personal/family time                          0.844 
## Low-stress                                    0.544 
## Money                                               
## Job security                                        
## Excitement            0.417   0.478                 
## Social problems       0.444                         
## Family expectations                                 
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      2.253   1.777   1.284   1.198
## Proportion Var   0.173   0.137   0.099   0.092
## Cumulative Var   0.173   0.310   0.409   0.501
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 69.35 on 32 degrees of freedom.
## The p-value is 0.000143

Identity

Items

  1. I see myself as a [chemistry kind of person]
  2. My parents see me as a [chemistry kind of person]
  3. My instructors see me as a [chemistry kind of person]
  4. My friends see me as a [chemistry kind of person]
  5. My peers see me as a [chemistry kind of person]
  6. I have had experiences in which I was recognized as a [chemistry kind of person]
  7. Knowing chemistry is important for (1=no jobs; 2=a few jobs; 3=most jobs; 4=all jobs)
  8. Knowing chemistry helps me understand how the world works. (1=never; 2=sometimes; 3=most of the time; 4=all of the time)
  9. Thinking like a chemist will help me do well in (1=none of my classes; 2=a few of my classes; 3=most of my classes; 4=all of my classes)
  10. Chemistry makes the world a better place to live
  11. I look forward to my [chemistry] classes.
  12. I don’t care about learning chemistry.
  13. In general, I find [chemistry] (1=very boring; 2=boring; 3=interesting; 4=very interesting)

Stats

Univariate Stats
IDochem_desc <- describe(IDochem)
datatable(subset(IDochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(IDochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(IDochem)

gg_miss_upset(IDochem)

CFA
IDochem_items <- c("Parents R",
                   "Inst R",
                   "Friends R",
                   "Peers R",
                   "Exp R",
                   "Important",
                   "Understand",
                   "Help classes",
                   "World better",
                   "Look forward",
                   "Dont care",
                   "Boring",
                   "REC","VAL","FAS")

d <- na.omit(IDochem)
id_model <- ' rec =~ IDochem02 + IDochem03 + IDochem04 + IDochem05 + IDochem06
              val =~ IDochem07 + IDochem08 + IDochem09 + IDochem10
              fas =~ FASochem02 + FASochem03 + FASochem05'

fit <- cfa(id_model, data=d)

summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 49 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        27
##                                                       
##   Number of observations                           176
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                85.689
##   Degrees of freedom                                51
##   P-value (Chi-square)                           0.002
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1252.883
##   Degrees of freedom                                66
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.971
##   Tucker-Lewis Index (TLI)                       0.962
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1856.897
##   Loglikelihood unrestricted model (H1)      -1814.052
##                                                       
##   Akaike (AIC)                                3767.794
##   Bayesian (BIC)                              3853.397
##   Sample-size adjusted Bayesian (BIC)         3767.895
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.062
##   90 Percent confidence interval - lower         0.038
##   90 Percent confidence interval - upper         0.085
##   P-value RMSEA <= 0.05                          0.184
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.068
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   rec =~                                              
##     IDochem02         1.000                           
##     IDochem03         0.748    0.071   10.564    0.000
##     IDochem04         1.082    0.065   16.707    0.000
##     IDochem05         1.033    0.061   16.851    0.000
##     IDochem06         0.835    0.081   10.318    0.000
##   val =~                                              
##     IDochem07         1.000                           
##     IDochem08         1.815    0.370    4.907    0.000
##     IDochem09         2.149    0.402    5.349    0.000
##     IDochem10         1.760    0.368    4.785    0.000
##   fas =~                                              
##     FASochem02        1.000                           
##     FASochem03       -0.905    0.083  -10.842    0.000
##     FASochem05        0.908    0.074   12.193    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   rec ~~                                              
##     val               0.090    0.023    3.929    0.000
##     fas               0.349    0.056    6.218    0.000
##   val ~~                                              
##     fas               0.124    0.028    4.392    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .IDochem02         0.225    0.027    8.184    0.000
##    .IDochem03         0.310    0.035    8.906    0.000
##    .IDochem04         0.091    0.016    5.646    0.000
##    .IDochem05         0.076    0.014    5.359    0.000
##    .IDochem06         0.413    0.046    8.937    0.000
##    .IDochem07         0.224    0.026    8.717    0.000
##    .IDochem08         0.344    0.043    7.909    0.000
##    .IDochem09         0.164    0.032    5.069    0.000
##    .IDochem10         0.380    0.047    8.141    0.000
##    .FASochem02        0.271    0.039    6.922    0.000
##    .FASochem03        0.281    0.038    7.468    0.000
##    .FASochem05        0.142    0.026    5.562    0.000
##     rec               0.524    0.077    6.780    0.000
##     val               0.060    0.021    2.857    0.004
##     fas               0.505    0.082    6.169    0.000
CFA Vis
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=IDochem_items, sizeMan=8, edge.label.cex=0.75)

Established Items

Disciplinary Growth Mindset

(Chemistry)

Items
  1. Even if I were to spend a lot of time working on difficult chemistry problems, I cannot develop my intelligence in chemistry further.
  2. I won’t get better at chemistry if I try harder.
  3. I could never excel in chemistry because I do not have what it takes to be a chemistry person.
  4. I could never become really good at chemistry even if I were to work hard because I don’t have natural ability.
  5. I can become even better at solving chemistry problems through hard work.
  6. I am capable of really understanding chemistry if I work hard.
  7. I can change my intelligence in chemistry quite a lot by working hard.
Stats
Univariate Stats
MSchem_desc <- data.frame(describe(MSchem))
datatable(subset(MSchem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(MSchem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(MSchem)

# gg_miss_upset(MSchem)
CFA
MSchem_items <- c("Cannot develop",
                   "Won't get better",
                   "Don't have what it takes",
                   "No natural ability",
                   "Growth hard work",
                   "Capable work hard",
                   "Change work hard",
                   "FIX","GRO")

d <- na.omit(MSchem)
mind_model <- ' fix =~ MSchem01 + MSchem02 + MSchem03 + MSchem04
              gro =~  MSchem05 + MSchem06 + MSchem07'

fit <- cfa(mind_model, data=d)

summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 32 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
##                                                       
##   Number of observations                           180
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                46.730
##   Degrees of freedom                                13
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               871.010
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.960
##   Tucker-Lewis Index (TLI)                       0.936
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -997.476
##   Loglikelihood unrestricted model (H1)       -974.112
##                                                       
##   Akaike (AIC)                                2024.953
##   Bayesian (BIC)                              2072.847
##   Sample-size adjusted Bayesian (BIC)         2025.342
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.120
##   90 Percent confidence interval - lower         0.084
##   90 Percent confidence interval - upper         0.158
##   P-value RMSEA <= 0.05                          0.001
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.045
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   fix =~                                              
##     MSchem01          1.000                           
##     MSchem02          1.030    0.108    9.514    0.000
##     MSchem03          1.246    0.117   10.670    0.000
##     MSchem04          1.424    0.128   11.162    0.000
##   gro =~                                              
##     MSchem05          1.000                           
##     MSchem06          1.354    0.121   11.164    0.000
##     MSchem07          1.328    0.125   10.634    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   fix ~~                                              
##     gro              -0.183    0.032   -5.735    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .MSchem01          0.351    0.039    8.933    0.000
##    .MSchem02          0.212    0.025    8.478    0.000
##    .MSchem03          0.131    0.019    6.730    0.000
##    .MSchem04          0.071    0.019    3.813    0.000
##    .MSchem05          0.198    0.024    8.232    0.000
##    .MSchem06          0.087    0.021    4.235    0.000
##    .MSchem07          0.166    0.025    6.547    0.000
##     fix               0.298    0.059    5.049    0.000
##     gro               0.215    0.040    5.379    0.000
CFA Vis
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=MSchem_items, sizeMan=8, edge.label.cex=0.75)

(Organic Chemistry)

Items
  1. Even if I were to spend a lot of time working on difficult organic chemistry problems, I cannot develop my intelligence in organic chemistry further.
  2. I won’t get better at organic chemistry if I try harder.
  3. I could never excel in organic chemistry because I do not have what it takes to be an organic chemistry person.
  4. I could never become really good at organic chemistry even if I were to work hard because I don’t have natural ability.
  5. I can become even better at solving organic chemistry problems through hard work.
  6. I am capable of really understanding organic chemistry if I work hard.
  7. I can change my intelligence in organic chemistry quite a lot by working hard.
Stats
Univariate Stats
MSochem_desc <- data.frame(describe(MSochem))
datatable(subset(MSochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(MSochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(MSochem)

# gg_miss_upset(MSochem)
CFA
MSochem_items <- c("Cannot develop",
                   "Won't get better",
                   "Don't have what it takes",
                   "No natural ability",
                   "Growth hard work",
                   "Capable work hard",
                   "Change work hard",
                   "FIX","GRO")

d <- na.omit(MSochem)
mindo_model <- ' fix =~ MSochem01 + MSochem02 + MSochem03 + MSochem04
              gro =~  MSochem05 + MSochem06 + MSochem07'

fit <- cfa(mindo_model, data=d)

summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
##                                                       
##   Number of observations                           181
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                40.284
##   Degrees of freedom                                13
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              1053.371
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.974
##   Tucker-Lewis Index (TLI)                       0.957
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -918.777
##   Loglikelihood unrestricted model (H1)       -898.635
##                                                       
##   Akaike (AIC)                                1867.554
##   Bayesian (BIC)                              1915.531
##   Sample-size adjusted Bayesian (BIC)         1868.025
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.108
##   90 Percent confidence interval - lower         0.071
##   90 Percent confidence interval - upper         0.146
##   P-value RMSEA <= 0.05                          0.007
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.035
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   fix =~                                              
##     MSochem01         1.000                           
##     MSochem02         1.031    0.072   14.240    0.000
##     MSochem03         1.154    0.077   15.060    0.000
##     MSochem04         1.200    0.075   16.015    0.000
##   gro =~                                              
##     MSochem05         1.000                           
##     MSochem06         1.204    0.090   13.372    0.000
##     MSochem07         1.064    0.093   11.492    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   fix ~~                                              
##     gro              -0.270    0.039   -6.886    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .MSochem01         0.196    0.024    8.231    0.000
##    .MSochem02         0.156    0.020    7.787    0.000
##    .MSochem03         0.145    0.020    7.144    0.000
##    .MSochem04         0.100    0.017    5.834    0.000
##    .MSochem05         0.157    0.020    7.771    0.000
##    .MSochem06         0.075    0.017    4.452    0.000
##    .MSochem07         0.177    0.023    7.768    0.000
##     fix               0.418    0.062    6.708    0.000
##     gro               0.262    0.042    6.176    0.000
CFA Vis
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=MSochem_items, sizeMan=8, edge.label.cex=0.75)

Instructor Growth Mindset

(Chemistry)

Items
  1. Instructors in chemistry believe that most students are capable of meeting the course expectations.
  2. Instructors in chemistry believe that most students are capable of getting an ‘A’ if they put in the effort.
  3. Instructors in chemistry see this course as a “weed out” course to fail weaker students.
  4. The instructor expects the course to take a lot of effort.
Stats
Univariate Stats
IPchem_desc <- data.frame(describe(IPchem))
datatable(subset(IPchem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(IPchem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(IPchem)

# gg_miss_upset(IPchem)
EFA
IPchem_items <- c("Most students capable expectations",
                  "Most students capable A",
                  "Weed out course",
                  "Effort")

d <- na.omit(IPchem)
colnames(d) <- IPchem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## Most students capable expectations            Most students capable A 
##                              0.706                              0.005 
##                    Weed out course                             Effort 
##                              0.898                              0.986 
## 
## Loadings:
## [1]  0.542  0.997              
## 
##                Factor1
## SS loadings      1.405
## Proportion Var   0.351
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 18.28 on 2 degrees of freedom.
## The p-value is 0.000107

(Organic Chemistry)

Items
  1. Instructors in organic chemistry believe that most students are capable of meeting the course expectations.
  2. Instructors in organic chemistry believe that most students are capable of getting an ‘A’ if they put in the effort.
  3. Instructors in organic chemistry see this course as a “weed out” course to fail weaker students.
  4. The instructor expects the course to take a lot of effort.
Stats
Univariate Stats
IPochem_desc <- data.frame(describe(IPochem))
datatable(subset(IPochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(IPochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(IPochem)

# gg_miss_upset(IPchem)
EFA
IPochem_items <- c("Most students capable expectations",
                  "Most students capable A",
                  "Weed out course",
                  "Effort")

d <- na.omit(IPochem)
colnames(d) <- IPochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
## Most students capable expectations            Most students capable A 
##                              0.397                              0.571 
##                    Weed out course                             Effort 
##                              0.817                              1.000 
## 
## Loadings:
## [1]  0.776  0.655 -0.427       
## 
##                Factor1
## SS loadings      1.214
## Proportion Var   0.304
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 5.5 on 2 degrees of freedom.
## The p-value is 0.0639

Disciplinary Self-Efficacy

(Chemistry)

Items
  1. If I study, I will do well on a test in an chemistry course.
  2. I am able to help my classmates with chemistry coursework.
  3. I get a sinking feeling when I think of trying to tackle tough chemistry problems.
  4. I always understand concepts I am studying in chemistry.
  5. Other people understand more than I do about what is going on in my chemistry courses.
Stats
Univariate Stats
SEchem_desc <- data.frame(describe(SEchem))
datatable(subset(SEchem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(SEchem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(SEchem)

gg_miss_upset(SEchem)

CFA
SEchem_items <- c("Study test",
                   "Help classmates",
                   "Sinking feeling",
                   "Always understand",
                   "Others understand",
                   "SE")

d <- na.omit(SEchem)
se_model <- ' se =~ SEchem01 + SEchem02 + SEchem03 + SEchem04 + SEchem05'

fit <- cfa(se_model, data=d)

summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 23 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        10
##                                                       
##   Number of observations                           180
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 5.982
##   Degrees of freedom                                 5
##   P-value (Chi-square)                           0.308
## 
## Model Test Baseline Model:
## 
##   Test statistic                               157.031
##   Degrees of freedom                                10
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.993
##   Tucker-Lewis Index (TLI)                       0.987
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1007.531
##   Loglikelihood unrestricted model (H1)      -1004.540
##                                                       
##   Akaike (AIC)                                2035.063
##   Bayesian (BIC)                              2066.992
##   Sample-size adjusted Bayesian (BIC)         2035.322
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.033
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.113
##   P-value RMSEA <= 0.05                          0.544
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.031
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   se =~                                               
##     SEchem01          1.000                           
##     SEchem02          0.957    0.162    5.895    0.000
##     SEchem03         -0.896    0.162   -5.545    0.000
##     SEchem04          0.729    0.134    5.441    0.000
##     SEchem05         -0.844    0.154   -5.465    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .SEchem01          0.425    0.061    7.015    0.000
##    .SEchem02          0.403    0.057    7.096    0.000
##    .SEchem03          0.489    0.063    7.771    0.000
##    .SEchem04          0.351    0.044    7.910    0.000
##    .SEchem05          0.461    0.059    7.879    0.000
##     se                0.289    0.073    3.966    0.000
CFA Vis
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=SEchem_items, sizeMan=8, edge.label.cex=0.75)

(Organic Chemistry)

Items
  1. If I study, I will do well on a test in an organic chemistry course.
  2. I am able to help my classmates with organic chemistry coursework.
  3. I get a sinking feeling when I think of trying to tackle tough organic chemistry problems.
  4. I always understand concepts I am studying in organic chemistry.
  5. Other people understand more than I do about what is going on in my organic chemistry courses.
Stats
Univariate Stats
SEochem_desc <- data.frame(describe(SEochem))
datatable(subset(SEochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(SEochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(SEochem)

gg_miss_upset(SEochem)

CFA
SEochem_items <- c("Study test",
                   "Help classmates",
                   "Sinking feeling",
                   "Always understand",
                   "Others understand",
                   "SE")

d <- na.omit(SEochem)
seo_model <- ' se =~ SEochem01 + SEochem02 + SEochem03 + SEochem04 + SEochem05'

fit <- cfa(seo_model, data=d)

summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 21 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        10
##                                                       
##   Number of observations                           177
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 2.865
##   Degrees of freedom                                 5
##   P-value (Chi-square)                           0.721
## 
## Model Test Baseline Model:
## 
##   Test statistic                               210.396
##   Degrees of freedom                                10
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.021
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -943.374
##   Loglikelihood unrestricted model (H1)       -941.941
##                                                       
##   Akaike (AIC)                                1906.747
##   Bayesian (BIC)                              1938.509
##   Sample-size adjusted Bayesian (BIC)         1906.841
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.077
##   P-value RMSEA <= 0.05                          0.863
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.019
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   se =~                                               
##     SEochem01         1.000                           
##     SEochem02         0.986    0.124    7.941    0.000
##     SEochem03        -0.833    0.121   -6.889    0.000
##     SEochem04         0.679    0.107    6.350    0.000
##     SEochem05        -0.697    0.113   -6.146    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .SEochem01         0.304    0.048    6.335    0.000
##    .SEochem02         0.284    0.046    6.220    0.000
##    .SEochem03         0.427    0.054    7.933    0.000
##    .SEochem04         0.375    0.045    8.298    0.000
##    .SEochem05         0.438    0.052    8.408    0.000
##     se                0.361    0.072    5.003    0.000
CFA Vis
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=SEchem_items, sizeMan=8, edge.label.cex=0.75)

Classroom Norms

Class Entity Beliefs

Items

Imagine students who scored low on the first exam in this class. What did most students in this class think about the low scoring students?

  1. …low scoring students were not smart enough to truly understand [organic chemistry]
  2. …not everyone has the natural ability to get an ‘A’ in [organic chemistry]
  3. …with enough hard work, low scoring students could have been getting an ‘A’ in [organic chemistry]
Stats
Univariate Stats
CNEBochem_class_desc <- data.frame(describe(CNEBochem_class))
datatable(subset(CNEBochem_class_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CNEBochem_class), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CNEBochem_class)

# gg_miss_upset(CNEBochem_class)

Own Entity Beliefs

Items

What did you think about the low scoring students? I thought…

  1. …low scoring students were not smart enough to truly understand [organic chemistry]
  2. …not everyone has the natural ability to get an ‘A’ in [organic chemistry]
  3. …with enough hard work, low scoring students could have been getting an ‘A’ in [organic chemistry]
Stats
Univariate Stats
CNEBochem_self_desc <- data.frame(describe(CNEBochem_self))
datatable(subset(CNEBochem_self_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CNEBochem_self), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CNEBochem_self)

# gg_miss_upset(CNEBochem_class)

Help Seeking Norms

Items

When students struggled in this course, how comfortable did they feel seeking help from…

  1. …their classmates
  2. …the instructor
  3. …the TA(s)
Stats
Univariate Stats
CNHSochem_others_desc <- data.frame(describe(CNHSochem_others))
datatable(subset(CNHSochem_others_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CNHSochem_others), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CNHSochem_others)

# gg_miss_upset(CNEBochem_class)

Own Help Seeking

Items

How comfortable did you feel in seeking help from…

  1. …your classmates
  2. …the instructor
  3. …the TA(s)
Stats
Univariate Stats
CNHSochem_self_desc <- data.frame(describe(CNHSochem_self))
datatable(subset(CNHSochem_self_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CNHSochem_self), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CNHSochem_self)

# gg_miss_upset(CNEBochem_class)

Safe to be Wrong

Items

During class…

  1. Students felt safe to be wrong.
  2. Students felt comfortable admitting to their classmates when they were struggling with course concepts.
  3. Students were happy to help each other learn.
  4. I felt comfortable admitting to my classmates when I was struggling.
  5. I was often too embarrassed to admit when I struggled in the course.
Stats
Univariate Stats
CNSWochem_desc <- data.frame(describe(CNSWochem))
datatable(subset(CNSWochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(CNSWochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(CNSWochem)

# gg_miss_upset(CNEBochem_class)

Faculty Caring

Items

In general, I felt that my instructor would…

  1. Take the time to talk to me if I needed help.
  2. Be sensitive to my difficulties if I shared them
  3. Be sympathetic if I was upset.
  4. Try to understand my problem if I talked about it.

Stats

Univariate Stats
FCochem_desc <- data.frame(describe(FCochem))
datatable(subset(FCochem_desc, select=-c(n, trimmed, mad))) %>%
  formatRound(1:10) %>%
  formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
Histograms
ggplot(gather(FCochem), aes(value)) + 
  geom_histogram(bins = 4) + 
  facet_wrap(~key)

Missingness
vis_miss(FCochem)

# gg_miss_upset(CNEBochem_class)
EFA
FCochem_items <- c("Talk",
                  "Sensitive",
                  "Sympathetic",
                  "Understand")

d <- na.omit(FCochem)
colnames(d) <- FCochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
## 
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
## 
## Uniquenesses:
##        Talk   Sensitive Sympathetic  Understand 
##       0.346       0.264       0.161       0.163 
## 
## Loadings:
## [1] 0.809 0.858 0.916 0.915
## 
##                Factor1
## SS loadings      3.066
## Proportion Var   0.767
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 10.14 on 2 degrees of freedom.
## The p-value is 0.00627