Import surveys, combine into single data frame, delete identifying information, assign IDs, and separate out by scale for item examination.
knitr::opts_chunk$set(message = F, warning = F)
# load libraries ----------------------------------------------------------
library(stringi)
library(psych)
library(DT)
library(ggplot2)
library(tidyr)
library(naniar)
library(UpSetR)
library(nFactors)
library(lavaan)
# load data ---------------------------------------------------------------
# alt <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Alternative Scales_April 28, 2022_12.34.csv", header=T)
# alt <- alt[-c(1,2),]
# alt$scale <- "alt"
#
# orig <- read.csv(file="UBelong Post-Survey Pitt OChem Spring 2022 Original Scales_April 28, 2022_12.35.csv", header=T)
# orig <- orig[-c(1,2),]
# orig$scale <- "orig"
#
# df <- rbind.data.frame(alt, orig)
# df <- subset(df, select = -c(1:19))
# names(df)
# myFun <- function(n) {
# a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
# paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
# }
# df$id <- myFun(nrow(df))
# write.csv(df, file="imported_anonymized.csv", row.names = F)
df <- read.csv(file="imported_anonymized.csv", header=T)
# extract items -----------------------------------------------------------
# new items
EEochem <- subset(df, select=grep("EEochem", colnames(df))) # entry expectations
CCdisc <- subset(df, select=grep("CCdisc", colnames(df))) # classroom climate
IDochem <- cbind.data.frame(subset(df, select=grep("IDochem", colnames(df))), subset(df, select=grep("FASochem", colnames(df)))) # identity
CSochem <- subset(df, select=grep("CSochem", colnames(df))) # career satisfaction
# established scales
MSchem <- subset(df, select=grep("MSchem", colnames(df))) # discipline growth mindset (chemistry)
IPchem <- subset(df, select=grep("IPchem", colnames(df))) # instructor growth mindset (chemistry)
SEchem <- subset(df, select=grep("SEchem", colnames(df))) # disciplinary self-efficacy (chemistry)
MSochem <- subset(df, select=grep("MSochem", colnames(df))) # disciplinary growth mindset (organic chemistry)
IPochem <- subset(df, select=grep("IPochem", colnames(df))) # instructor growth mindset (organic chemistry)
SEochem <- subset(df, select=grep("SEochem", colnames(df))) # disciplinary self-efficacy (organic chemistry)
CNEBochem_class <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(1:3))) # entity norms and beliefs
CNEBochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNEBochem", colnames(df))), select=c(4:6))) # entity norms and beliefs
CNHSochem_others <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(1:3))) # help seeking
CNHSochem_self <- cbind.data.frame(subset(subset(df, select=grep("CNHSochem", colnames(df))), select=c(4:6))) # help seeking
CNSWochem <- subset(df, select=grep("CNSWochem", colnames(df))) # help seeking
FCochem <- subset(df, select=grep("FCochem", colnames(df))) # faculty caring
EEochem_desc <- data.frame(describe(EEochem))
datatable(subset(EEochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(EEochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(EEochem)
# gg_miss_upset(EEochem)
EEochem_items <- c("Took course b/c required",
"Looking forward to course",
"Heard difficult",
"Interested in topic",
"Requirement",
"Important to know",
"Important discipline",
"Important future classes",
"Important future career",
"Important medicine",
"Important MCAT")
d <- na.omit(EEochem)
colnames(d) <- EEochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Took course b/c required Looking forward to course Heard difficult
## 0.470 0.409 0.867
## Interested in topic Requirement Important to know
## 0.299 0.737 0.398
## Important discipline Important future classes Important future career
## 0.543 0.494 0.310
## Important medicine Important MCAT
## 0.431 0.803
##
## Loadings:
## Factor1 Factor2
## Important to know 0.707
## Important discipline 0.622
## Important future classes 0.706
## Important future career 0.818
## Important medicine 0.754
## Took course b/c required -0.710
## Looking forward to course 0.720
## Interested in topic 0.781
## Requirement -0.513
## Heard difficult
## Important MCAT 0.431
##
## Factor1 Factor2
## SS loadings 3.025 2.213
## Proportion Var 0.275 0.201
## Cumulative Var 0.275 0.476
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 88.45 on 34 degrees of freedom.
## The p-value is 9.78e-07
d <- subset(na.omit(EEochem), select=-c(EEochem08,EEochem09,EEochem10,EEochem11))
colnames(d) <- EEochem_items[1:7]
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Took course b/c required Looking forward to course Heard difficult
## 0.371 0.423 0.669
## Interested in topic Requirement Important to know
## 0.337 0.724 0.430
## Important discipline
## 0.376
##
## Loadings:
## Factor1 Factor2
## Took course b/c required 0.753
## Looking forward to course -0.592 0.476
## Heard difficult 0.515
## Interested in topic -0.626 0.521
## Requirement 0.514
## Important to know 0.740
## Important discipline 0.789
##
## Factor1 Factor2
## SS loadings 1.862 1.807
## Proportion Var 0.266 0.258
## Cumulative Var 0.266 0.524
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 17.21 on 8 degrees of freedom.
## The p-value is 0.028
CCdisc_desc <- data.frame(describe(CCdisc))
datatable(subset(CCdisc_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CCdisc), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CCdisc)
gg_miss_upset(CCdisc)
CCdisc_items <- c("Students competitive",
"Uncomfortable studying",
"Treated disrespectifully",
"Bad experiences studying")
d <- na.omit(CCdisc)
colnames(d) <- CCdisc_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Students competitive Uncomfortable studying Treated disrespectifully
## 0.988 0.982 0.724
## Bad experiences studying
## 0.157
##
## Loadings:
## [1] 0.525 0.918
##
## Factor1
## SS loadings 1.149
## Proportion Var 0.287
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 10.29 on 2 degrees of freedom.
## The p-value is 0.00583
How important are the following factors for your future career satisfaction?
CSochem_desc <- data.frame(t(do.call(rbind, describe(CSochem))))
datatable(subset(CSochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CSochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CSochem)
# gg_miss_upset(CSochem)
CSochem_items <- c("Money",
"Prestige",
"Helping",
"Job security",
"Working people",
"New knowledge",
"Personal/family time",
"Low-stress",
"Excitement",
"Social problems",
"Using talents",
"Using education",
"Family expectations")
d <- na.omit(CSochem)
colnames(d) <- CSochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 2, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 2, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Money Prestige Helping
## 0.883 0.839 0.614
## Job security Working people New knowledge
## 0.683 0.710 0.359
## Personal/family time Low-stress Excitement
## 0.357 0.659 0.557
## Social problems Using talents Using education
## 0.708 0.471 0.486
## Family expectations
## 0.906
##
## Loadings:
## Factor1 Factor2
## Helping 0.621
## Working people 0.538
## New knowledge 0.796
## Excitement 0.649
## Social problems 0.539
## Using talents 0.727
## Using education 0.710
## Personal/family time 0.754
## Low-stress 0.579
## Money
## Prestige
## Job security 0.474
## Family expectations
##
## Factor1 Factor2
## SS loadings 3.563 1.204
## Proportion Var 0.274 0.093
## Cumulative Var 0.274 0.367
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 166.19 on 53 degrees of freedom.
## The p-value is 1.32e-13
EFA <- factanal(d, factors = 3, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 3, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Money Prestige Helping
## 0.884 0.847 0.612
## Job security Working people New knowledge
## 0.688 0.544 0.250
## Personal/family time Low-stress Excitement
## 0.412 0.554 0.533
## Social problems Using talents Using education
## 0.714 0.256 0.412
## Family expectations
## 0.918
##
## Loadings:
## Factor1 Factor2 Factor3
## Excitement 0.569
## Using talents 0.838
## Using education 0.708
## Working people 0.654
## New knowledge 0.437 0.745
## Personal/family time 0.708
## Low-stress 0.647
## Money
## Prestige
## Helping 0.498
## Job security
## Social problems
## Family expectations
##
## Factor1 Factor2 Factor3
## SS loadings 2.229 1.915 1.232
## Proportion Var 0.171 0.147 0.095
## Cumulative Var 0.171 0.319 0.414
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 115.43 on 42 degrees of freedom.
## The p-value is 8.99e-09
EFA <- factanal(d, factors = 4, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 4, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Money Prestige Helping
## 0.785 0.005 0.616
## Job security Working people New knowledge
## 0.709 0.593 0.199
## Personal/family time Low-stress Excitement
## 0.192 0.676 0.550
## Social problems Using talents Using education
## 0.714 0.152 0.458
## Family expectations
## 0.839
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## Helping 0.534
## Working people 0.624
## New knowledge 0.835
## Using talents 0.873
## Using education 0.626
## Prestige 0.954
## Personal/family time 0.844
## Low-stress 0.544
## Money
## Job security
## Excitement 0.417 0.478
## Social problems 0.444
## Family expectations
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 2.253 1.777 1.284 1.198
## Proportion Var 0.173 0.137 0.099 0.092
## Cumulative Var 0.173 0.310 0.409 0.501
##
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 69.35 on 32 degrees of freedom.
## The p-value is 0.000143
IDochem_desc <- describe(IDochem)
datatable(subset(IDochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(IDochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(IDochem)
gg_miss_upset(IDochem)
IDochem_items <- c("Parents R",
"Inst R",
"Friends R",
"Peers R",
"Exp R",
"Important",
"Understand",
"Help classes",
"World better",
"Look forward",
"Dont care",
"Boring",
"REC","VAL","FAS")
d <- na.omit(IDochem)
id_model <- ' rec =~ IDochem02 + IDochem03 + IDochem04 + IDochem05 + IDochem06
val =~ IDochem07 + IDochem08 + IDochem09 + IDochem10
fas =~ FASochem02 + FASochem03 + FASochem05'
fit <- cfa(id_model, data=d)
summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 49 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 27
##
## Number of observations 176
##
## Model Test User Model:
##
## Test statistic 85.689
## Degrees of freedom 51
## P-value (Chi-square) 0.002
##
## Model Test Baseline Model:
##
## Test statistic 1252.883
## Degrees of freedom 66
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.971
## Tucker-Lewis Index (TLI) 0.962
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1856.897
## Loglikelihood unrestricted model (H1) -1814.052
##
## Akaike (AIC) 3767.794
## Bayesian (BIC) 3853.397
## Sample-size adjusted Bayesian (BIC) 3767.895
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.062
## 90 Percent confidence interval - lower 0.038
## 90 Percent confidence interval - upper 0.085
## P-value RMSEA <= 0.05 0.184
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.068
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## rec =~
## IDochem02 1.000
## IDochem03 0.748 0.071 10.564 0.000
## IDochem04 1.082 0.065 16.707 0.000
## IDochem05 1.033 0.061 16.851 0.000
## IDochem06 0.835 0.081 10.318 0.000
## val =~
## IDochem07 1.000
## IDochem08 1.815 0.370 4.907 0.000
## IDochem09 2.149 0.402 5.349 0.000
## IDochem10 1.760 0.368 4.785 0.000
## fas =~
## FASochem02 1.000
## FASochem03 -0.905 0.083 -10.842 0.000
## FASochem05 0.908 0.074 12.193 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## rec ~~
## val 0.090 0.023 3.929 0.000
## fas 0.349 0.056 6.218 0.000
## val ~~
## fas 0.124 0.028 4.392 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .IDochem02 0.225 0.027 8.184 0.000
## .IDochem03 0.310 0.035 8.906 0.000
## .IDochem04 0.091 0.016 5.646 0.000
## .IDochem05 0.076 0.014 5.359 0.000
## .IDochem06 0.413 0.046 8.937 0.000
## .IDochem07 0.224 0.026 8.717 0.000
## .IDochem08 0.344 0.043 7.909 0.000
## .IDochem09 0.164 0.032 5.069 0.000
## .IDochem10 0.380 0.047 8.141 0.000
## .FASochem02 0.271 0.039 6.922 0.000
## .FASochem03 0.281 0.038 7.468 0.000
## .FASochem05 0.142 0.026 5.562 0.000
## rec 0.524 0.077 6.780 0.000
## val 0.060 0.021 2.857 0.004
## fas 0.505 0.082 6.169 0.000
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=IDochem_items, sizeMan=8, edge.label.cex=0.75)
MSchem_desc <- data.frame(describe(MSchem))
datatable(subset(MSchem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(MSchem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(MSchem)
# gg_miss_upset(MSchem)
MSchem_items <- c("Cannot develop",
"Won't get better",
"Don't have what it takes",
"No natural ability",
"Growth hard work",
"Capable work hard",
"Change work hard",
"FIX","GRO")
d <- na.omit(MSchem)
mind_model <- ' fix =~ MSchem01 + MSchem02 + MSchem03 + MSchem04
gro =~ MSchem05 + MSchem06 + MSchem07'
fit <- cfa(mind_model, data=d)
summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 32 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 15
##
## Number of observations 180
##
## Model Test User Model:
##
## Test statistic 46.730
## Degrees of freedom 13
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 871.010
## Degrees of freedom 21
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.960
## Tucker-Lewis Index (TLI) 0.936
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -997.476
## Loglikelihood unrestricted model (H1) -974.112
##
## Akaike (AIC) 2024.953
## Bayesian (BIC) 2072.847
## Sample-size adjusted Bayesian (BIC) 2025.342
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.120
## 90 Percent confidence interval - lower 0.084
## 90 Percent confidence interval - upper 0.158
## P-value RMSEA <= 0.05 0.001
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.045
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## fix =~
## MSchem01 1.000
## MSchem02 1.030 0.108 9.514 0.000
## MSchem03 1.246 0.117 10.670 0.000
## MSchem04 1.424 0.128 11.162 0.000
## gro =~
## MSchem05 1.000
## MSchem06 1.354 0.121 11.164 0.000
## MSchem07 1.328 0.125 10.634 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## fix ~~
## gro -0.183 0.032 -5.735 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .MSchem01 0.351 0.039 8.933 0.000
## .MSchem02 0.212 0.025 8.478 0.000
## .MSchem03 0.131 0.019 6.730 0.000
## .MSchem04 0.071 0.019 3.813 0.000
## .MSchem05 0.198 0.024 8.232 0.000
## .MSchem06 0.087 0.021 4.235 0.000
## .MSchem07 0.166 0.025 6.547 0.000
## fix 0.298 0.059 5.049 0.000
## gro 0.215 0.040 5.379 0.000
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=MSchem_items, sizeMan=8, edge.label.cex=0.75)
MSochem_desc <- data.frame(describe(MSochem))
datatable(subset(MSochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(MSochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(MSochem)
# gg_miss_upset(MSochem)
MSochem_items <- c("Cannot develop",
"Won't get better",
"Don't have what it takes",
"No natural ability",
"Growth hard work",
"Capable work hard",
"Change work hard",
"FIX","GRO")
d <- na.omit(MSochem)
mindo_model <- ' fix =~ MSochem01 + MSochem02 + MSochem03 + MSochem04
gro =~ MSochem05 + MSochem06 + MSochem07'
fit <- cfa(mindo_model, data=d)
summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 35 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 15
##
## Number of observations 181
##
## Model Test User Model:
##
## Test statistic 40.284
## Degrees of freedom 13
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 1053.371
## Degrees of freedom 21
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.974
## Tucker-Lewis Index (TLI) 0.957
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -918.777
## Loglikelihood unrestricted model (H1) -898.635
##
## Akaike (AIC) 1867.554
## Bayesian (BIC) 1915.531
## Sample-size adjusted Bayesian (BIC) 1868.025
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.108
## 90 Percent confidence interval - lower 0.071
## 90 Percent confidence interval - upper 0.146
## P-value RMSEA <= 0.05 0.007
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.035
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## fix =~
## MSochem01 1.000
## MSochem02 1.031 0.072 14.240 0.000
## MSochem03 1.154 0.077 15.060 0.000
## MSochem04 1.200 0.075 16.015 0.000
## gro =~
## MSochem05 1.000
## MSochem06 1.204 0.090 13.372 0.000
## MSochem07 1.064 0.093 11.492 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## fix ~~
## gro -0.270 0.039 -6.886 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .MSochem01 0.196 0.024 8.231 0.000
## .MSochem02 0.156 0.020 7.787 0.000
## .MSochem03 0.145 0.020 7.144 0.000
## .MSochem04 0.100 0.017 5.834 0.000
## .MSochem05 0.157 0.020 7.771 0.000
## .MSochem06 0.075 0.017 4.452 0.000
## .MSochem07 0.177 0.023 7.768 0.000
## fix 0.418 0.062 6.708 0.000
## gro 0.262 0.042 6.176 0.000
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=MSochem_items, sizeMan=8, edge.label.cex=0.75)
IPchem_desc <- data.frame(describe(IPchem))
datatable(subset(IPchem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(IPchem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(IPchem)
# gg_miss_upset(IPchem)
IPchem_items <- c("Most students capable expectations",
"Most students capable A",
"Weed out course",
"Effort")
d <- na.omit(IPchem)
colnames(d) <- IPchem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Most students capable expectations Most students capable A
## 0.706 0.005
## Weed out course Effort
## 0.898 0.986
##
## Loadings:
## [1] 0.542 0.997
##
## Factor1
## SS loadings 1.405
## Proportion Var 0.351
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 18.28 on 2 degrees of freedom.
## The p-value is 0.000107
IPochem_desc <- data.frame(describe(IPochem))
datatable(subset(IPochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(IPochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(IPochem)
# gg_miss_upset(IPchem)
IPochem_items <- c("Most students capable expectations",
"Most students capable A",
"Weed out course",
"Effort")
d <- na.omit(IPochem)
colnames(d) <- IPochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Most students capable expectations Most students capable A
## 0.397 0.571
## Weed out course Effort
## 0.817 1.000
##
## Loadings:
## [1] 0.776 0.655 -0.427
##
## Factor1
## SS loadings 1.214
## Proportion Var 0.304
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 5.5 on 2 degrees of freedom.
## The p-value is 0.0639
SEchem_desc <- data.frame(describe(SEchem))
datatable(subset(SEchem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(SEchem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(SEchem)
gg_miss_upset(SEchem)
SEchem_items <- c("Study test",
"Help classmates",
"Sinking feeling",
"Always understand",
"Others understand",
"SE")
d <- na.omit(SEchem)
se_model <- ' se =~ SEchem01 + SEchem02 + SEchem03 + SEchem04 + SEchem05'
fit <- cfa(se_model, data=d)
summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 23 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Number of observations 180
##
## Model Test User Model:
##
## Test statistic 5.982
## Degrees of freedom 5
## P-value (Chi-square) 0.308
##
## Model Test Baseline Model:
##
## Test statistic 157.031
## Degrees of freedom 10
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.993
## Tucker-Lewis Index (TLI) 0.987
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1007.531
## Loglikelihood unrestricted model (H1) -1004.540
##
## Akaike (AIC) 2035.063
## Bayesian (BIC) 2066.992
## Sample-size adjusted Bayesian (BIC) 2035.322
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.033
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.113
## P-value RMSEA <= 0.05 0.544
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.031
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## se =~
## SEchem01 1.000
## SEchem02 0.957 0.162 5.895 0.000
## SEchem03 -0.896 0.162 -5.545 0.000
## SEchem04 0.729 0.134 5.441 0.000
## SEchem05 -0.844 0.154 -5.465 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .SEchem01 0.425 0.061 7.015 0.000
## .SEchem02 0.403 0.057 7.096 0.000
## .SEchem03 0.489 0.063 7.771 0.000
## .SEchem04 0.351 0.044 7.910 0.000
## .SEchem05 0.461 0.059 7.879 0.000
## se 0.289 0.073 3.966 0.000
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=SEchem_items, sizeMan=8, edge.label.cex=0.75)
SEochem_desc <- data.frame(describe(SEochem))
datatable(subset(SEochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(SEochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(SEochem)
gg_miss_upset(SEochem)
SEochem_items <- c("Study test",
"Help classmates",
"Sinking feeling",
"Always understand",
"Others understand",
"SE")
d <- na.omit(SEochem)
seo_model <- ' se =~ SEochem01 + SEochem02 + SEochem03 + SEochem04 + SEochem05'
fit <- cfa(seo_model, data=d)
summary(fit, fit.measures=TRUE)
## lavaan 0.6-9 ended normally after 21 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Number of observations 177
##
## Model Test User Model:
##
## Test statistic 2.865
## Degrees of freedom 5
## P-value (Chi-square) 0.721
##
## Model Test Baseline Model:
##
## Test statistic 210.396
## Degrees of freedom 10
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.021
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -943.374
## Loglikelihood unrestricted model (H1) -941.941
##
## Akaike (AIC) 1906.747
## Bayesian (BIC) 1938.509
## Sample-size adjusted Bayesian (BIC) 1906.841
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.077
## P-value RMSEA <= 0.05 0.863
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.019
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## se =~
## SEochem01 1.000
## SEochem02 0.986 0.124 7.941 0.000
## SEochem03 -0.833 0.121 -6.889 0.000
## SEochem04 0.679 0.107 6.350 0.000
## SEochem05 -0.697 0.113 -6.146 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .SEochem01 0.304 0.048 6.335 0.000
## .SEochem02 0.284 0.046 6.220 0.000
## .SEochem03 0.427 0.054 7.933 0.000
## .SEochem04 0.375 0.045 8.298 0.000
## .SEochem05 0.438 0.052 8.408 0.000
## se 0.361 0.072 5.003 0.000
semPlot::semPaths(fit, "std", edge.color="black", asize=2, nodeLabels=SEchem_items, sizeMan=8, edge.label.cex=0.75)
Imagine students who scored low on the first exam in this class. What did most students in this class think about the low scoring students?
CNEBochem_class_desc <- data.frame(describe(CNEBochem_class))
datatable(subset(CNEBochem_class_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CNEBochem_class), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CNEBochem_class)
# gg_miss_upset(CNEBochem_class)
What did you think about the low scoring students? I thought…
CNEBochem_self_desc <- data.frame(describe(CNEBochem_self))
datatable(subset(CNEBochem_self_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CNEBochem_self), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CNEBochem_self)
# gg_miss_upset(CNEBochem_class)
When students struggled in this course, how comfortable did they feel seeking help from…
CNHSochem_others_desc <- data.frame(describe(CNHSochem_others))
datatable(subset(CNHSochem_others_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CNHSochem_others), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CNHSochem_others)
# gg_miss_upset(CNEBochem_class)
How comfortable did you feel in seeking help from…
CNHSochem_self_desc <- data.frame(describe(CNHSochem_self))
datatable(subset(CNHSochem_self_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CNHSochem_self), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CNHSochem_self)
# gg_miss_upset(CNEBochem_class)
During class…
CNSWochem_desc <- data.frame(describe(CNSWochem))
datatable(subset(CNSWochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(CNSWochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(CNSWochem)
# gg_miss_upset(CNEBochem_class)
In general, I felt that my instructor would…
FCochem_desc <- data.frame(describe(FCochem))
datatable(subset(FCochem_desc, select=-c(n, trimmed, mad))) %>%
formatRound(1:10) %>%
formatStyle(8:9, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
ggplot(gather(FCochem), aes(value)) +
geom_histogram(bins = 4) +
facet_wrap(~key)
vis_miss(FCochem)
# gg_miss_upset(CNEBochem_class)
FCochem_items <- c("Talk",
"Sensitive",
"Sympathetic",
"Understand")
d <- na.omit(FCochem)
colnames(d) <- FCochem_items
ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows
EFA <- factanal(d, factors = 1, rotation = "varimax", cutoff = 0.3)
print(EFA, digits=3, cutoff=.4, sort=TRUE)
##
## Call:
## factanal(x = d, factors = 1, rotation = "varimax", cutoff = 0.3)
##
## Uniquenesses:
## Talk Sensitive Sympathetic Understand
## 0.346 0.264 0.161 0.163
##
## Loadings:
## [1] 0.809 0.858 0.916 0.915
##
## Factor1
## SS loadings 3.066
## Proportion Var 0.767
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 10.14 on 2 degrees of freedom.
## The p-value is 0.00627