This document includes the analytic report for Phase 1 of Study 1 in the STEM Research Experience Project, which is aimed at determining the nature and associations of different underlying components of STEM research experience. The preregistration document may be accessed here.
# list of loaded packages and versions
si <- devtools::session_info()[[2]]
rownames(si) <- NULL
si %>%
select(package, loadedversion, date, source) %>%
#red bold the called packages
mutate(package =
cell_spec(package,
color = ifelse(package %in% libraries, "red", "black"),
bold = ifelse(package %in% libraries, TRUE, FALSE))) %>%
knitr::kable(escape = F, caption = "All loaded packages.
Bolded in red are those loaded explicitly with
<code>library()</code>") %>%
kable_styling() %>%
scroll_box(height = "300px")
| package | loadedversion | date | source |
|---|---|---|---|
| assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.2.1) |
| bslib | 0.4.0 | 2022-07-16 | CRAN (R 4.2.1) |
| cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.2.1) |
| callr | 3.7.3 | 2022-11-02 | CRAN (R 4.2.2) |
| cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.2.1) |
| cli | 3.4.1 | 2022-09-23 | CRAN (R 4.2.1) |
| colorspace | 2.0-3 | 2022-02-21 | CRAN (R 4.2.1) |
| crayon | 1.5.2 | 2022-09-29 | CRAN (R 4.2.1) |
| curl | 4.3.3 | 2022-10-06 | CRAN (R 4.2.1) |
| data.table | 1.14.6 | 2022-11-16 | CRAN (R 4.2.2) |
| DBI | 1.1.3 | 2022-06-18 | CRAN (R 4.2.1) |
| devtools | 2.4.5 | 2022-10-11 | CRAN (R 4.2.1) |
| digest | 0.6.29 | 2021-12-01 | CRAN (R 4.2.1) |
| dplyr | 1.0.10 | 2022-09-01 | CRAN (R 4.2.1) |
| EFAtools | 0.4.4 | 2023-01-06 | CRAN (R 4.2.3) |
| ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.2.1) |
| evaluate | 0.17 | 2022-10-07 | CRAN (R 4.2.1) |
| fansi | 1.0.3 | 2022-03-24 | CRAN (R 4.2.1) |
| fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.2.1) |
| forcats | 0.5.2 | 2022-08-19 | CRAN (R 4.2.1) |
| foreign | 0.8-83 | 2022-09-28 | CRAN (R 4.2.2) |
| fs | 1.5.2 | 2021-12-08 | CRAN (R 4.2.1) |
| generics | 0.1.3 | 2022-07-05 | CRAN (R 4.2.1) |
| glue | 1.6.2 | 2022-02-24 | CRAN (R 4.2.1) |
| haven | 2.5.1 | 2022-08-22 | CRAN (R 4.2.1) |
| hms | 1.1.2 | 2022-08-19 | CRAN (R 4.2.1) |
| htmltools | 0.5.3 | 2022-07-18 | CRAN (R 4.2.1) |
| htmlwidgets | 1.5.4 | 2021-09-08 | CRAN (R 4.2.1) |
| httpuv | 1.6.6 | 2022-09-08 | CRAN (R 4.2.1) |
| httr | 1.4.4 | 2022-08-17 | CRAN (R 4.2.1) |
| jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.2.1) |
| jsonlite | 1.8.2 | 2022-10-02 | CRAN (R 4.2.1) |
| kableExtra | 1.3.4 | 2021-02-20 | CRAN (R 4.2.1) |
| knitr | 1.40 | 2022-08-24 | CRAN (R 4.2.1) |
| labelled | 2.10.0 | 2022-09-14 | CRAN (R 4.2.2) |
| later | 1.3.0 | 2021-08-18 | CRAN (R 4.2.1) |
| lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.2.2) |
| lifecycle | 1.0.3 | 2022-10-07 | CRAN (R 4.2.1) |
| magrittr | 2.0.3 | 2022-03-30 | CRAN (R 4.2.1) |
| memoise | 2.0.1 | 2021-11-26 | CRAN (R 4.2.1) |
| mime | 0.12 | 2021-09-28 | CRAN (R 4.2.0) |
| miniUI | 0.1.1.1 | 2018-05-18 | CRAN (R 4.2.1) |
| mnormt | 2.1.1 | 2022-09-26 | CRAN (R 4.2.1) |
| munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.2.1) |
| nlme | 3.1-160 | 2022-10-10 | CRAN (R 4.2.2) |
| openxlsx | 4.2.5 | 2021-12-14 | CRAN (R 4.2.1) |
| pillar | 1.8.1 | 2022-08-19 | CRAN (R 4.2.1) |
| pkgbuild | 1.3.1 | 2021-12-20 | CRAN (R 4.2.1) |
| pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.2.1) |
| pkgload | 1.3.0 | 2022-06-27 | CRAN (R 4.2.1) |
| prettyunits | 1.1.1 | 2020-01-24 | CRAN (R 4.2.1) |
| processx | 3.7.0 | 2022-07-07 | CRAN (R 4.2.1) |
| profvis | 0.3.7 | 2020-11-02 | CRAN (R 4.2.1) |
| promises | 1.2.0.1 | 2021-02-11 | CRAN (R 4.2.1) |
| ps | 1.7.1 | 2022-06-18 | CRAN (R 4.2.1) |
| psych | 2.2.9 | 2022-09-29 | CRAN (R 4.2.2) |
| purrr | 1.0.1 | 2023-01-10 | CRAN (R 4.2.3) |
| R6 | 2.5.1 | 2021-08-19 | CRAN (R 4.2.1) |
| Rcpp | 1.0.10 | 2023-01-22 | CRAN (R 4.2.2) |
| readxl | 1.4.1 | 2022-08-17 | CRAN (R 4.2.1) |
| remotes | 2.4.2 | 2021-11-30 | CRAN (R 4.2.1) |
| rio | 0.5.29 | 2021-11-22 | CRAN (R 4.2.1) |
| rlang | 1.1.0 | 2023-03-14 | CRAN (R 4.2.3) |
| rmarkdown | 2.17 | 2022-10-07 | CRAN (R 4.2.1) |
| rstudioapi | 0.14 | 2022-08-22 | CRAN (R 4.2.1) |
| rvest | 1.0.3 | 2022-08-19 | CRAN (R 4.2.1) |
| sass | 0.4.2 | 2022-07-16 | CRAN (R 4.2.1) |
| scales | 1.2.1 | 2022-08-20 | CRAN (R 4.2.1) |
| sessioninfo | 1.2.2 | 2021-12-06 | CRAN (R 4.2.1) |
| shiny | 1.7.2 | 2022-07-19 | CRAN (R 4.2.1) |
| stringi | 1.7.8 | 2022-07-11 | CRAN (R 4.2.1) |
| stringr | 1.5.0 | 2022-12-02 | CRAN (R 4.2.2) |
| svglite | 2.1.0 | 2022-02-03 | CRAN (R 4.2.1) |
| systemfonts | 1.0.4 | 2022-02-11 | CRAN (R 4.2.1) |
| tibble | 3.1.8 | 2022-07-22 | CRAN (R 4.2.1) |
| tidyselect | 1.2.0 | 2022-10-10 | CRAN (R 4.2.1) |
| urlchecker | 1.0.1 | 2021-11-30 | CRAN (R 4.2.1) |
| usethis | 2.1.6 | 2022-05-25 | CRAN (R 4.2.1) |
| utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.2.1) |
| vctrs | 0.6.0 | 2023-03-16 | CRAN (R 4.2.3) |
| viridisLite | 0.4.1 | 2022-08-22 | CRAN (R 4.2.1) |
| webshot | 0.5.4 | 2022-09-26 | CRAN (R 4.2.1) |
| xfun | 0.33 | 2022-09-12 | CRAN (R 4.2.1) |
| xml2 | 1.3.3 | 2021-11-30 | CRAN (R 4.2.1) |
| xtable | 1.8-4 | 2019-04-21 | CRAN (R 4.2.1) |
| yaml | 2.3.5 | 2022-02-21 | CRAN (R 4.2.1) |
| zip | 2.2.1 | 2022-09-08 | CRAN (R 4.2.1) |
# load in data
dat <- rio::import(file = "../Data/SciEngSurvey Alumni Retro MASTER.sav")
We start by randomly selecting 60% of cases (302) for the exploratory factor analysis (EFA).
# randomize train ids
set.seed(seed = 20230503)
train <- sample(dat$ID, size = 302)
# extract items
items <- dat %>%
filter(ID %in% train) %>%
select(outcls1:outcls19)
Then, we can compute the initial communalities across the 19 items
# calculate communalities
psych::smc(items) %>%
as.data.frame() %>%
rename("communality" = ".") %>%
mutate(item = var_label(items)) %>%
relocate(item) %>%
knitr::kable(escape = F,
caption = "Communalities of all items
about STEM research experiences") %>%
kable_styling() %>%
scroll_box(height = "600px")
| item | communality | |
|---|---|---|
| outcls1 | Participated in Research / Eng Projects | 0.701 |
| outcls2 | Worked in Sci / Eng | 0.552 |
| outcls3 | Member of Research / Eng Team | 0.774 |
| outcls4 | Played Leadership Role | 0.613 |
| outcls5 | Generated Research Question / Eng Problem | 0.707 |
| outcls6 | Identified Questions | 0.823 |
| outcls7 | Collected Data / Identified Constraints | 0.873 |
| outcls8 | Interpreted Data / Found Solutions | 0.891 |
| outcls9 | Explained Results / Evaluated Solution Fit | 0.869 |
| outcls10 | Used Literature | 0.762 |
| outcls11 | Related Results to Work of Others | 0.726 |
| outcls12 | Gave Presentation to Students | 0.712 |
| outcls13 | Gave Professional Presentation | 0.709 |
| outcls14 | Wrote Article | 0.330 |
| outcls15 | Planned Research / Projects | 0.691 |
| outcls16 | Attended Lectures | 0.580 |
| outcls17 | Attended Conferences | 0.585 |
| outcls18 | Learned Technical Skills | 0.826 |
| outcls19 | Learned Terminology | 0.830 |
According to these initial communalities, we are not dropping any item because they all exceed the preregistered .15 threshold. Next, we will test the EFA assumptions
# Bartlett's test of sphericity
EFAtools::BARTLETT(x = items)
##
## ✔ The Bartlett's test of sphericity was significant at an alpha level of .05.
## These data are probably suitable for factor analysis.
##
## 𝜒²(171) = 5674, p < .001
# KMO test of sampling adequacy
EFAtools::KMO(x = items)
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is marvellous.
## These data are probably suitable for factor analysis.
##
## Overall: 0.947
##
## For each variable:
## outcls1 outcls2 outcls3 outcls4 outcls5 outcls6 outcls7 outcls8
## 0.959 0.949 0.918 0.947 0.967 0.961 0.953 0.953
## outcls9 outcls10 outcls11 outcls12 outcls13 outcls14 outcls15 outcls16
## 0.955 0.970 0.976 0.949 0.919 0.948 0.955 0.960
## outcls17 outcls18 outcls19
## 0.934 0.901 0.902
After having passed these assumption checks, we can move onto parallel analysis to determine the number of factors to extract.
set.seed(seed = 20230503)
EFAtools::PARALLEL(x = items,
n_datasets = 302,
eigen_type = "EFA")
## Parallel Analysis performed using 302 simulated random data sets
## Eigenvalues were found using EFA
##
## Decision rule used: means
##
## ── Number of factors to retain according to ────────────────────────────────────
##
## ◌ EFA-determined eigenvalues: 4
Lastly, we can now conduct the EFA with 4 extracted factors
efa <- psych::fa(r = cor(items, use = "pairwise"),
nfactors = 4,
rotate = "oblimin",
fm = "ml")
print(efa$loadings, cut = 0.15)
##
## Loadings:
## ML1 ML4 ML3 ML2
## outcls1 0.231 0.518
## outcls2 0.538 0.249
## outcls3 0.904
## outcls4 0.179 0.196 0.531
## outcls5 0.719
## outcls6 0.673 0.195
## outcls7 0.896
## outcls8 0.923
## outcls9 1.005
## outcls10 0.684 0.216
## outcls11 0.439 0.208 0.264
## outcls12 0.753
## outcls13 0.878
## outcls14 0.451
## outcls15 0.433 0.389
## outcls16 0.311 0.168 0.338
## outcls17 0.696 0.158
## outcls18 0.801
## outcls19 0.941
##
## ML1 ML4 ML3 ML2
## SS loadings 4.601 2.386 1.835 1.875
## Proportion Var 0.242 0.126 0.097 0.099
## Cumulative Var 0.242 0.368 0.464 0.563
The factors and corresponding items with a minimum loading of 0.15 are listed below. Bolded are items with cross-loadings on at least 2 factors. The asterisk * signifies that the cross-loaded item has the highest loading for this factor.