Set Working Directory
setwd("C:/Work Files/Directed Studies/Amber Fisher")
Load the data and necessary packages
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (haven)
library(lavaan)
## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.
Amber_Data <-read_sav("SW9420 Data.sav")
Make any data cleaning edits
Amber_Data <- Amber_Data %>%
mutate(across(everything(), as.numeric))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
str(Amber_Data)
## tibble [1,262 × 89] (S3: tbl_df/tbl/data.frame)
## $ Q3 : num [1:1262] NA NA 5.1e+07 5.1e+07 5.1e+07 ...
## $ Year : num [1:1262] 2 1 1 1 1 1 1 1 1 1 ...
## $ School : num [1:1262] 12 71 12 12 12 12 12 12 12 12 ...
## $ School_District: num [1:1262] 1 6 1 1 1 1 1 1 1 1 ...
## $ City : num [1:1262] 1 6 1 1 1 1 1 1 1 1 ...
## $ County : num [1:1262] 1 3 1 1 1 1 1 1 1 1 ...
## $ Gender_Identity: num [1:1262] 2 1 2 2 1 5 2 1 1 2 ...
## $ Race : num [1:1262] 6 5 8 6 6 8 6 6 6 6 ...
## $ cat_race : num [1:1262] 2 1 3 2 2 3 2 2 2 2 ...
## $ Q6 : num [1:1262] 6 3 5 5 5 5 5 5 5 5 ...
## $ Q8 : num [1:1262] 3 2 1 1 1 4 NA 1 1 1 ...
## $ Q10 : num [1:1262] 2 1 1 1 2 2 2 2 2 2 ...
## $ Q12 : num [1:1262] 2 2 2 2 2 1 2 2 2 1 ...
## $ Q15_1 : num [1:1262] 2 4 3 3 4 3 3 4 3 4 ...
## $ Q15_2 : num [1:1262] 2 3 3 2 4 3 3 4 4 2 ...
## $ Q15_3 : num [1:1262] 4 2 3 2 4 3 1 4 2 1 ...
## $ Q15_4 : num [1:1262] 2 3 4 2 2 3 1 3 1 1 ...
## $ Q15_5 : num [1:1262] 2 2 4 2 1 3 2 3 1 1 ...
## $ Q15_6 : num [1:1262] 3 4 3 3 4 3 4 3 1 4 ...
## $ Q17_1 : num [1:1262] 2 1 2 3 4 1 2 3 3 4 ...
## $ Q17_2 : num [1:1262] 3 3 2 2 4 1 4 4 2 3 ...
## $ Q17_3 : num [1:1262] 3 2 3 3 4 1 4 3 3 3 ...
## $ Q17_4 : num [1:1262] 2 3 3 3 3 1 3 3 3 3 ...
## $ Q18_1 : num [1:1262] 2 2 4 2 4 1 3 4 3 4 ...
## $ Q18_2 : num [1:1262] 3 3 4 2 4 1 4 4 3 4 ...
## $ Q18_3 : num [1:1262] 2 2 2 3 4 1 3 1 1 4 ...
## $ Q18_4 : num [1:1262] 4 2 4 3 4 1 4 4 3 4 ...
## $ Q18_5 : num [1:1262] 3 4 4 2 4 1 3 3 3 4 ...
## $ Q18_6 : num [1:1262] 3 3 4 2 4 1 3 3 3 4 ...
## $ Q18_7 : num [1:1262] 3 4 2 3 4 1 3 3 3 4 ...
## $ Q18_8 : num [1:1262] 3 2 4 3 4 1 3 3 2 4 ...
## $ Q18_9 : num [1:1262] 3 1 2 2 4 1 4 4 4 4 ...
## $ Q18_10 : num [1:1262] 2 4 4 3 4 1 3 4 3 4 ...
## $ Q20_1 : num [1:1262] 4 NA 3 4 5 1 5 5 5 5 ...
## $ Q20_2 : num [1:1262] 5 NA 4 3 5 1 4 5 5 5 ...
## $ Q20_3 : num [1:1262] 5 3 4 4 5 1 3 5 5 5 ...
## $ Q20_4 : num [1:1262] 4 NA 4 4 5 1 4 5 5 5 ...
## $ Q20_5 : num [1:1262] 5 NA 4 4 5 1 5 5 5 5 ...
## $ Q21 : num [1:1262] 5 NA 4 3 5 2 4 4 4 4 ...
## $ Q22_1 : num [1:1262] 4 3 2 2 4 1 3 3 3 4 ...
## $ Q22_2 : num [1:1262] 3 3 3 3 4 2 3 3 3 4 ...
## $ Q22_3 : num [1:1262] 3 2 4 2 4 2 3 3 3 4 ...
## $ Q23_1 : num [1:1262] 4 1 4 3 5 1 5 4 4 4 ...
## $ Q23_2 : num [1:1262] 5 4 2 4 5 1 5 3 4 4 ...
## $ Q23_3 : num [1:1262] 4 3 4 4 5 1 5 4 4 4 ...
## $ Q23_4 : num [1:1262] 4 5 3 4 5 1 5 4 4 4 ...
## $ Q23_5 : num [1:1262] 5 3 3 4 5 1 5 4 4 4 ...
## $ Q26_1 : num [1:1262] 3 4 3 2 3 2 3 2 1 4 ...
## $ Q26_2 : num [1:1262] 3 3 4 3 4 4 4 3 1 4 ...
## $ Q26_3 : num [1:1262] 3 1 4 2 4 1 4 3 1 4 ...
## $ Q26_4 : num [1:1262] 3 3 4 3 3 2 4 3 2 4 ...
## $ Q26_5 : num [1:1262] 2 4 3 3 2 1 4 3 1 4 ...
## $ Q27_1 : num [1:1262] 2 3 2 4 2 3 2 2 2 4 ...
## $ Q27_2 : num [1:1262] 2 4 2 2 2 3 2 1 2 4 ...
## $ Q27_3 : num [1:1262] 2 2 2 2 2 4 4 3 3 4 ...
## $ Q27_4 : num [1:1262] 3 3 2 1 2 4 3 2 3 4 ...
## $ Q27_5 : num [1:1262] 2 1 2 2 1 4 2 2 2 4 ...
## $ Q28_1 : num [1:1262] 2 4 2 1 2 1 4 2 2 4 ...
## $ Q28_2 : num [1:1262] 2 2 2 1 1 1 4 2 2 4 ...
## $ Q28_3 : num [1:1262] 3 3 2 1 2 1 4 2 2 4 ...
## $ Q28_4 : num [1:1262] 3 2 2 1 2 1 4 2 2 4 ...
## $ Q28_5 : num [1:1262] 2 2 2 1 1 1 4 1 2 4 ...
## $ Q29_1 : num [1:1262] 2 3 2 2 2 1 3 4 4 4 ...
## $ Q29_2 : num [1:1262] 3 3 2 3 2 1 3 4 4 4 ...
## $ Q29_3 : num [1:1262] 2 4 2 2 2 1 3 4 4 4 ...
## $ Q29_4 : num [1:1262] 2 2 3 3 3 1 4 4 4 4 ...
## $ Q29_5 : num [1:1262] 3 3 3 3 1 1 3 4 4 4 ...
## $ Q29_6 : num [1:1262] 2 3 3 2 2 1 3 4 4 4 ...
## $ Q29_7 : num [1:1262] 2 4 2 2 3 1 2 4 4 4 ...
## $ Q31_1 : num [1:1262] 3 1 4 3 3 2 4 3 3 3 ...
## $ Q31_2 : num [1:1262] 3 2 3 4 3 2 4 3 3 3 ...
## $ Q31_3 : num [1:1262] 3 1 3 3 2 2 4 3 3 3 ...
## $ Q31_4 : num [1:1262] 3 3 2 2 4 1 3 4 3 3 ...
## $ Q32_1 : num [1:1262] 4 5 5 2 5 2 5 4 4 5 ...
## $ Q32_2 : num [1:1262] 4 2 5 4 5 2 5 5 4 5 ...
## $ Q32_3 : num [1:1262] 5 NA NA NA NA NA NA NA NA NA ...
## $ Q32_4 : num [1:1262] 4 NA 5 4 5 2 5 5 4 5 ...
## $ Q32_5 : num [1:1262] 4 2 5 4 5 2 5 5 4 5 ...
## $ Q34_1 : num [1:1262] 3 4 3 3 1 2 2 2 3 4 ...
## $ Q34_2 : num [1:1262] 2 3 4 3 4 2 4 4 3 4 ...
## $ Q34_3 : num [1:1262] 2 4 2 3 2 3 2 3 3 4 ...
## $ Q34_4 : num [1:1262] 3 1 2 2 4 1 3 3 3 4 ...
## $ Q34_5 : num [1:1262] 3 3 3 2 3 1 3 3 3 4 ...
## $ Q36_1 : num [1:1262] 3 4 2 3 3 1 4 3 3 4 ...
## $ Q36_2 : num [1:1262] 3 3 2 2 3 1 4 3 3 4 ...
## $ Q36_3 : num [1:1262] 3 3 2 1 4 1 4 3 3 4 ...
## $ Q36_4 : num [1:1262] 3 3 1 2 3 1 4 3 3 4 ...
## $ Q36_5 : num [1:1262] 2 1 1 3 3 1 4 3 3 4 ...
## $ Q36_6 : num [1:1262] 2 4 3 2 3 1 4 3 3 4 ...
Amber_Data <- Amber_Data %>%
mutate(ID = row_number())
names(Amber_Data)
## [1] "Q3" "Year" "School" "School_District"
## [5] "City" "County" "Gender_Identity" "Race"
## [9] "cat_race" "Q6" "Q8" "Q10"
## [13] "Q12" "Q15_1" "Q15_2" "Q15_3"
## [17] "Q15_4" "Q15_5" "Q15_6" "Q17_1"
## [21] "Q17_2" "Q17_3" "Q17_4" "Q18_1"
## [25] "Q18_2" "Q18_3" "Q18_4" "Q18_5"
## [29] "Q18_6" "Q18_7" "Q18_8" "Q18_9"
## [33] "Q18_10" "Q20_1" "Q20_2" "Q20_3"
## [37] "Q20_4" "Q20_5" "Q21" "Q22_1"
## [41] "Q22_2" "Q22_3" "Q23_1" "Q23_2"
## [45] "Q23_3" "Q23_4" "Q23_5" "Q26_1"
## [49] "Q26_2" "Q26_3" "Q26_4" "Q26_5"
## [53] "Q27_1" "Q27_2" "Q27_3" "Q27_4"
## [57] "Q27_5" "Q28_1" "Q28_2" "Q28_3"
## [61] "Q28_4" "Q28_5" "Q29_1" "Q29_2"
## [65] "Q29_3" "Q29_4" "Q29_5" "Q29_6"
## [69] "Q29_7" "Q31_1" "Q31_2" "Q31_3"
## [73] "Q31_4" "Q32_1" "Q32_2" "Q32_3"
## [77] "Q32_4" "Q32_5" "Q34_1" "Q34_2"
## [81] "Q34_3" "Q34_4" "Q34_5" "Q36_1"
## [85] "Q36_2" "Q36_3" "Q36_4" "Q36_5"
## [89] "Q36_6" "ID"
#First Order CFA Model
SchoolClimate <- '
Engagement =~ Q15_1 + Q15_2 + Q15_3 + Q15_4 + Q15_5
Inclusion =~ Q17_1 + Q17_2 + Q17_3 + Q17_4
Connectedness =~ Q18_1 + Q18_2 + Q18_4 + Q18_7 + Q18_8 + Q18_9 + Q18_10
TeacherEquality =~ Q20_1 + Q20_2 + Q20_3 + Q20_4 + Q20_5
StudentTeacher =~ Q22_1 + Q22_2 + Q22_3
AdminEquality =~ Q23_1 + Q23_2 + Q23_3 + Q23_4 + Q23_5
Diversity =~ Q26_1 + Q26_3 + Q26_4 + Q26_5
HarmfulBehavior =~ Q27_1 + Q27_2 + Q27_3 + Q27_4 + Q27_5
StudentEquality =~ Q28_1 + Q28_2 + Q28_3 + Q28_4 + Q28_5
SocialNorms =~ Q29_1 + Q29_2 + Q29_3 + Q29_4 + Q29_5 + Q29_6 + Q29_7
RulePerception =~ Q31_1 + Q31_2 + Q31_3
RuleEquality =~ Q32_1 + Q32_2 + Q32_3 + Q32_4 + Q32_5
Consequences =~ Q34_1 + Q34_2 + Q34_3
VictimReporting =~ Q36_1 + Q36_2 + Q36_3 + Q36_4 + Q36_3 + Q36_4
'
CFA_fit <- cfa(SchoolClimate, data = Amber_Data)
factor_scores <- lavPredict(CFA_fit)
head(factor_scores)
## Engagement Inclusion Connectedness TeacherEquality StudentTeacher
## [1,] -0.1714299 -0.28604248 -0.18498486 0.17779219 0.064986686
## [2,] 0.2962407 0.08146759 0.60640636 -0.13086379 0.327815505
## [3,] -0.4263977 -0.45793970 -1.08381070 -0.73294464 -0.820569784
## [4,] 0.2531592 0.06914535 0.08055541 -0.34116434 -0.002346095
## [5,] -0.2820191 -0.26296212 -0.69819044 -0.03444876 -0.557708718
## [6,] -0.2012932 -0.26271246 -0.41071650 0.36053453 -0.161072290
## AdminEquality Diversity HarmfulBehavior StudentEquality SocialNorms
## [1,] -0.02498823 -0.143421466 0.1188448 0.4165735 -0.31063553
## [2,] 0.41125430 0.385120001 -0.4741450 -0.7736334 0.05287428
## [3,] -1.34450230 -0.462351256 0.4744494 0.3013083 -0.68396148
## [4,] -0.44954650 0.235147051 -1.0985636 -0.8254559 1.18742251
## [5,] -0.02466601 -0.267414472 0.3780341 0.4669578 0.17882328
## [6,] 0.23620343 0.002912749 -0.4554898 -0.4631378 -0.15747767
## RulePerception RuleEquality Consequences VictimReporting
## [1,] -0.13464411 -0.08187331 -0.001083222 0.3124637
## [2,] 0.62712724 0.51907989 0.339808551 1.2720722
## [3,] -0.31198958 -2.69122411 0.330998774 -0.9150837
## [4,] 0.56386004 -0.35858037 0.510845633 0.3912116
## [5,] 0.05029757 0.42705661 0.914821717 -1.2885697
## [6,] 0.30784535 0.37954604 0.206408809 -0.4609822
factor_scores_df <- data.frame(factor_scores)
2.Determine the number of factors
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:lavaan':
##
## cor2cov
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(nFactors)
## Warning: package 'nFactors' was built under R version 4.4.1
## Loading required package: lattice
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
# Compute the correlation matrix
cor_matrix <- cor(factor_scores_df)
# Perform eigenvalue decomposition
eigenvalues <- eigen(cor_matrix)$values
# Kaiser criterion (eigenvalues > 1)
kaiser_criterion <- sum(eigenvalues > 1)
cat("Number of factors based on Kaiser criterion:", kaiser_criterion, "\n")
## Number of factors based on Kaiser criterion: 3
# Scree plot
scree(factor_scores_df)
# Conducting EFA with 2 factors
efa_results <- fa(factor_scores_df, nfactors = 3, rotate = "promax" )
## Loading required namespace: GPArotation
# View the results
summary(efa_results)
##
## Factor analysis with Call: fa(r = factor_scores_df, nfactors = 3, rotate = "promax")
##
## Test of the hypothesis that 3 factors are sufficient.
## The degrees of freedom for the model is 52 and the objective function was 1.61
## The number of observations was 662 with Chi Square = 1052.8 with prob < 1.8e-186
##
## The root mean square of the residuals (RMSA) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## Tucker Lewis Index of factoring reliability = 0.781
## RMSEA index = 0.171 and the 10 % confidence intervals are 0.162 0.18
## BIC = 715.04
## With factor correlations of
## MR1 MR2 MR3
## MR1 1.00 0.45 -0.51
## MR2 0.45 1.00 -0.23
## MR3 -0.51 -0.23 1.00
print(efa_results)
## Factor Analysis using method = minres
## Call: fa(r = factor_scores_df, nfactors = 3, rotate = "promax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## Engagement 0.79 -0.03 -0.25 0.87 0.128 1.2
## Inclusion 0.77 0.17 0.07 0.68 0.322 1.1
## Connectedness 0.94 0.03 0.02 0.89 0.106 1.0
## TeacherEquality -0.07 0.93 -0.09 0.86 0.142 1.0
## StudentTeacher 0.85 0.07 -0.02 0.81 0.190 1.0
## AdminEquality -0.03 0.93 0.00 0.84 0.162 1.0
## Diversity 0.79 -0.01 -0.14 0.75 0.247 1.1
## HarmfulBehavior -0.13 0.16 0.92 0.92 0.083 1.1
## StudentEquality 0.04 0.00 0.80 0.61 0.394 1.0
## SocialNorms 0.77 -0.25 -0.08 0.54 0.461 1.2
## RulePerception 0.64 0.17 0.20 0.43 0.572 1.3
## RuleEquality 0.08 0.66 0.06 0.47 0.527 1.0
## Consequences 0.00 -0.17 0.30 0.15 0.854 1.6
## VictimReporting 0.73 -0.03 -0.01 0.52 0.480 1.0
##
## MR1 MR2 MR3
## SS loadings 5.17 2.34 1.83
## Proportion Var 0.37 0.17 0.13
## Cumulative Var 0.37 0.54 0.67
## Proportion Explained 0.55 0.25 0.20
## Cumulative Proportion 0.55 0.80 1.00
##
## With factor correlations of
## MR1 MR2 MR3
## MR1 1.00 0.45 -0.51
## MR2 0.45 1.00 -0.23
## MR3 -0.51 -0.23 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 91 with the objective function = 12.36 with Chi Square = 8101
## df of the model are 52 and the objective function was 1.61
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 662 with the empirical chi square 203.89 with prob < 7.3e-20
## The total n.obs was 662 with Likelihood Chi Square = 1052.8 with prob < 1.8e-186
##
## Tucker Lewis Index of factoring reliability = 0.781
## RMSEA index = 0.171 and the 90 % confidence intervals are 0.162 0.18
## BIC = 715.04
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of (regression) scores with factors 0.98 0.96 0.96
## Multiple R square of scores with factors 0.96 0.93 0.93
## Minimum correlation of possible factor scores 0.93 0.85 0.86
Additional Code:
Splitting a file to get a 50% random subsample
# Create an example data frame
set.seed(123) # For reproducibility
df <- data.frame(
ID = 1:100,
Value = rnorm(100)
)
# Define the number of rows to select (50% of the total rows)
n <- nrow(df)
n_split <- n / 2
# Create a random sample of row indices
indices <- sample(1:n, n_split)
# Split the data frame into two parts
subsample_1 <- df[indices, ]
subsample_2 <- df[-indices, ]
# Check the size of the subsamples
cat("Number of rows in subsample 1:", nrow(subsample_1), "\n")
## Number of rows in subsample 1: 50
cat("Number of rows in subsample 2:", nrow(subsample_2), "\n")
## Number of rows in subsample 2: 50