library(psych)
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(foreign)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Importing files from spss and removing sid
faces <- read.spss("/Users/KKW4/Desktop/Research projects/FACES - EFA analysis/T1 to T4 FACES Caregiver and Youth Raw & Totals SRP_w_arrest.sav", to.data.frame = TRUE)
Full analysis
Correlation matrix for relevant variables
fac[, c(68:87, 152:171, 205)] <- sapply(fac[, c(68:87, 152:171, 205)], as.numeric)#changing to numeric
cor_fac <- cor(fac[, c(68:87, 152:171, 205)], use = "pairwise.complete.obs") #including relevant data for analysis
corrplot(cor_fac, method = "number")

Removing NAs for complete analysis
fac_t4 <- subset(fac[ , c(68:87, 152:171, 205)])
fac_t4 <- na.omit(fac_t4)
Selecting T4 data and Running Kaiser-Meyer-Olkin adequcy check
Bartlett test of sphericity
cortest.bartlett(X)
## R was not square, finding R from data
## $chisq
## [1] 2956.052
##
## $p.value
## [1] 2.33647e-238
##
## $df
## [1] 820
Checking determinance
det(cor(X))
## [1] 1.726971e-10
Parallel analysis suggests that the number of factors = 4, but we
are testing 3.
Factor analysis using FA - based on the scree plot and our
discussion, we are now using 3 factors.
Factor loading matrix - this provides a statistical categorization
of items into constructs or factors. Some items may not be included in
the factor. You can determine whether that’s true and we can reexamine
our EFA
Constructing correlation coefficients between two factors loadings-
would this coefficient may provide a construct score from the two
loadings based on factor loadings suitable for the R&R? If they are
different than the scores, should we rely on these as “construct scores”
from the factor loadings?
factor_loadings <- round(fit$loadings[ 1:40,], 2)
cor_load <- cor(factor_loadings, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load
## Factor1 Factor2 Factor3
## Factor1 1.0000000 -0.44743143 -0.53464575
## Factor2 -0.4474314 1.00000000 -0.07559533
## Factor3 -0.5346458 -0.07559533 1.00000000
factor_scores <- as.data.frame(factanal.none$scores)
cor_score <- cor(factor_scores, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score
## Factor1 Factor2 Factor3
## Factor1 1.000000e+00 -9.020683e-17 -1.079572e-16
## Factor2 -9.020683e-17 1.000000e+00 -3.863483e-17
## Factor3 -1.079572e-16 -3.863483e-17 1.000000e+00
Selecting data and Appending scores to full file for analysis
fac_t4 %>%
select(arrestT4) -> arrest_var
fac_score_arrest <- cbind(arrest_var, factor_scores)
Correlations between factor scores, including the outcome variable,
arrest
cor_score_arrest <- cor(fac_score_arrest, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest
## arrestT4 Factor1 Factor2 Factor3
## arrestT4 1.00000000 -3.912859e-02 2.118218e-02 5.175604e-02
## Factor1 -0.03912859 1.000000e+00 -9.020683e-17 -1.079572e-16
## Factor2 0.02118218 -9.020683e-17 1.000000e+00 -3.863483e-17
## Factor3 0.05175604 -1.079572e-16 -3.863483e-17 1.000000e+00
Logit with factor scores as independent variables and arrest as the
outcome
fac_score_arrest$arrest_bin[fac_score_arrest$arrestT4 == 1] <- "0"
fac_score_arrest$arrest_bin[fac_score_arrest$arrestT4 == 2] <- "1"
fac_mod <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(3) = 0.685, p = 0.877
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.005
## AIC = 150.468, BIC = 162.429
##
## Standard errors: MLE
## ---------------------------------------------------
## Est. S.E. z val. p
## ----------------- -------- ------- -------- -------
## (Intercept) -1.456 0.212 -6.878 0.000
## Factor1 -0.103 0.216 -0.477 0.633
## Factor2 0.059 0.226 0.260 0.795
## Factor3 0.149 0.238 0.627 0.531
## ---------------------------------------------------
Odds ratios and 95% CIs - these are simply exponentiated
coefficients from above with CIs
exp(cbind(OR = coef(fac_mod), confint(fac_mod)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2331066 0.1508530 0.3472036
## Factor1 0.9018792 0.5871916 1.3789767
## Factor2 1.0605937 0.6818503 1.6630395
## Factor3 1.1606457 0.7257767 1.8543706
Logit with Factors 1 and 3 as moderators (interaction effects)
fac_mod2 <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor3 + Factor1 * Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod2, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(4) = 0.686, p = 0.953
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.005
## AIC = 152.466, BIC = 167.419
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## --------------------- -------- ------- -------- -------
## (Intercept) -1.456 0.212 -6.878 0.000
## Factor1 -0.102 0.219 -0.467 0.640
## Factor2 0.060 0.228 0.263 0.793
## Factor3 0.149 0.238 0.626 0.531
## Factor1:Factor3 -0.010 0.285 -0.035 0.972
## -------------------------------------------------------
Odds ratios for factors 1 and 3 as moderators
exp(cbind(OR = coef(fac_mod2), confint(fac_mod2)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2331135 0.1508594 0.3472106
## Factor1 0.9028803 0.5849506 1.3862619
## Factor2 1.0618124 0.6797835 1.6740071
## Factor3 1.1604482 0.7251940 1.8543086
## Factor1:Factor3 0.9900885 0.5619919 1.7393777
Logit with Factors 1 and 3 as unadjusted moderators (interaction
effects)
fac_mod3 <- glm(as.numeric(arrest_bin) ~ Factor1 * Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod3, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(3) = 0.617, p = 0.893
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.004
## AIC = 150.536, BIC = 162.497
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## --------------------- -------- ------- -------- -------
## (Intercept) -1.455 0.212 -6.879 0.000
## Factor1 -0.102 0.218 -0.469 0.639
## Factor3 0.149 0.238 0.626 0.531
## Factor1:Factor3 0.001 0.283 0.003 0.998
## -------------------------------------------------------
Odds ratios for factors 1 and 3 as unadjusted moderators
exp(cbind(OR = coef(fac_mod3), confint(fac_mod3)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2333328 0.1510654 0.3474448
## Factor1 0.9026011 0.5849763 1.3853227
## Factor3 1.1606267 0.7251614 1.8547780
## Factor1:Factor3 1.0007901 0.5708951 1.7482452
Youth analysis
Correlation matrix for relevant variables - youth
Scree plot - this plot determines relatedness using correlations -
youth
This is based on 2 factors
library(ggplot2)
fafitfree_y <- fa(fac_yt4, nfactors = 2, rotate = "none")
n_factors <- length(fafitfree_y$e.values)
scree_y <- data.frame(
Factor_n = as.factor(1:n_factors),
Eigenvalue = fafitfree_y$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) +
geom_point() + geom_line() +
xlab("Number of factors") +
ylab("Initial eigenvalue") +
labs( title = "Scree Plot",
subtitle = "(Based on the unreduced correlation matrix)")

Parallel analysis - youth
The scree plot will show how many factors to consider where the FA
actual data line bows
par_y <- fa.parallel(fac_yt4, fa = "fa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
Parallel analysis suggests that the number of factors = 2 for youth
questions.
Factor analysis using FA -, we are now using 2 factors.
fa.none_y <- fa(r=fac_yt4,
nfactors = 2,
# covar = FALSE, SMC = TRUE,
fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
max.iter=100, # (50 is the default, but we have changed it to 100
rotate="oblimin") # none rotation
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : I
## am sorry, to do these rotations requires the GPArotation package to be installed
fa.none_y
## Factor Analysis using method = pa
## Call: fa(r = fac_yt4, nfactors = 2, rotate = "oblimin", max.iter = 100,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## fac4y001 0.63 -0.19 0.436 0.56 1.2
## fac4y002 0.64 -0.15 0.438 0.56 1.1
## fac4y003 0.63 -0.29 0.475 0.53 1.4
## fac4y004 0.55 -0.05 0.302 0.70 1.0
## fac4y005 0.72 -0.18 0.558 0.44 1.1
## fac4y006 0.52 0.37 0.408 0.59 1.8
## fac4y007 0.68 0.06 0.471 0.53 1.0
## fac4y008 0.64 0.33 0.528 0.47 1.5
## fac4y009 0.83 -0.14 0.704 0.30 1.1
## fac4y010 0.61 0.14 0.386 0.61 1.1
## fac4y011 0.78 -0.31 0.704 0.30 1.3
## fac4y012 0.44 0.25 0.261 0.74 1.6
## fac4y013 0.76 -0.16 0.601 0.40 1.1
## fac4y014 0.37 0.63 0.533 0.47 1.6
## fac4y015 0.81 -0.20 0.705 0.29 1.1
## fac4y016 0.61 0.11 0.389 0.61 1.1
## fac4y017 0.73 0.12 0.551 0.45 1.1
## fac4y018 0.38 0.41 0.313 0.69 2.0
## fac4y019 0.75 -0.21 0.608 0.39 1.2
## fac4y020 0.36 0.50 0.384 0.62 1.8
## arrestT4 -0.02 0.21 0.043 0.96 1.0
##
## PA1 PA2
## SS loadings 8.17 1.63
## Proportion Var 0.39 0.08
## Cumulative Var 0.39 0.47
## Proportion Explained 0.83 0.17
## Cumulative Proportion 0.83 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 210 with the objective function = 12.12 with Chi Square = 1686.27
## df of the model are 169 and the objective function was 2.36
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 148 with the empirical chi square 202.61 with prob < 0.04
## The total n.obs was 148 with Likelihood Chi Square = 325.36 with prob < 5.6e-12
##
## Tucker Lewis Index of factoring reliability = 0.867
## RMSEA index = 0.079 and the 90 % confidence intervals are 0.066 0.092
## BIC = -519.16
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.97 0.87
## Multiple R square of scores with factors 0.95 0.76
## Minimum correlation of possible factor scores 0.90 0.52
Factor analysis using factanal method - youth
We explored a 0.4 cutoff. Some items load for more than 1 factor. It
seems the ideal cutoff is 0.3 vs. 0.4
factanal.none_y <- factanal(fac_yt4, factors=2, scores = c("regression"), rotation = "none")
print(factanal.none_y, cut = .3)
##
## Call:
## factanal(x = fac_yt4, factors = 2, scores = c("regression"), rotation = "none")
##
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008
## 0.571 0.605 0.547 0.723 0.461 0.582 0.515 0.471
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016
## 0.278 0.613 0.280 0.732 0.377 0.454 0.284 0.613
## fac4y017 fac4y018 fac4y019 fac4y020 arrestT4
## 0.447 0.707 0.353 0.656 0.965
##
## Loadings:
## Factor1 Factor2
## fac4y001 0.642
## fac4y002 0.626
## fac4y003 0.634
## fac4y004 0.524
## fac4y005 0.725
## fac4y006 0.492 0.418
## fac4y007 0.689
## fac4y008 0.618 0.383
## fac4y009 0.843
## fac4y010 0.585
## fac4y011 0.811
## fac4y012 0.409 0.317
## fac4y013 0.779
## fac4y014 0.318 0.667
## fac4y015 0.833
## fac4y016 0.600
## fac4y017 0.718
## fac4y018 0.357 0.406
## fac4y019 0.784
## fac4y020 0.329 0.486
## arrestT4
##
## Factor1 Factor2
## SS loadings 8.112 1.655
## Proportion Var 0.386 0.079
## Cumulative Var 0.386 0.465
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 322.27 on 169 degrees of freedom.
## The p-value is 1.19e-11
Factor loading matrix for youth data - this provides a statistical
categorization of items into constructs or factors. Some items may not
be included in the factor. You can determine whether that’s true and we
can reexamine our EFA
We’re exploring a 0.4 cutoff. Although some factors overlap, this
provides a diagram of items and factors at ≥0.4. The adaptability items
that load (with a ≥0.4 cutoff) in the 2nd factor are relevant to family
leadership, based on the scoring sheet.
fa.diagram(fa.none_y, cut = 0.4)

Factor matrix - youth
We have reverted back to a 0.3 cutoff here and 2 factors.
nfacts_y <- 2
fit_y <- factanal(fac_yt4,nfacts_y, rotation="none")
print(fit_y, digits=3, cutoff=0.3, sort=TRUE)
##
## Call:
## factanal(x = fac_yt4, factors = nfacts_y, rotation = "none")
##
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008
## 0.571 0.605 0.547 0.723 0.461 0.582 0.515 0.471
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016
## 0.278 0.613 0.280 0.732 0.377 0.454 0.284 0.613
## fac4y017 fac4y018 fac4y019 fac4y020 arrestT4
## 0.447 0.707 0.353 0.656 0.965
##
## Loadings:
## Factor1 Factor2
## fac4y001 0.642
## fac4y002 0.626
## fac4y003 0.634
## fac4y004 0.524
## fac4y005 0.725
## fac4y007 0.689
## fac4y008 0.618 0.383
## fac4y009 0.843
## fac4y010 0.585
## fac4y011 0.811
## fac4y013 0.779
## fac4y015 0.833
## fac4y016 0.600
## fac4y017 0.718
## fac4y019 0.784
## fac4y014 0.318 0.667
## fac4y006 0.492 0.418
## fac4y012 0.409 0.317
## fac4y018 0.357 0.406
## fac4y020 0.329 0.486
## arrestT4
##
## Factor1 Factor2
## SS loadings 8.112 1.655
## Proportion Var 0.386 0.079
## Cumulative Var 0.386 0.465
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 322.27 on 169 degrees of freedom.
## The p-value is 1.19e-11
round(fit_y$loadings[ 1:21,], 2)
## Factor1 Factor2
## fac4y001 0.64 -0.13
## fac4y002 0.63 -0.06
## fac4y003 0.63 -0.23
## fac4y004 0.52 0.05
## fac4y005 0.73 -0.11
## fac4y006 0.49 0.42
## fac4y007 0.69 0.10
## fac4y008 0.62 0.38
## fac4y009 0.84 -0.10
## fac4y010 0.59 0.21
## fac4y011 0.81 -0.25
## fac4y012 0.41 0.32
## fac4y013 0.78 -0.13
## fac4y014 0.32 0.67
## fac4y015 0.83 -0.15
## fac4y016 0.60 0.17
## fac4y017 0.72 0.19
## fac4y018 0.36 0.41
## fac4y019 0.78 -0.18
## fac4y020 0.33 0.49
## arrestT4 -0.03 0.18
Constructing correlation coefficient between two factors loadings
for youth data. This may be helpful as a”construct score” from the
factor loadings
factor_loadings_y <- round(fit_y$loadings[ 1:21,], 2)
cor_load_y <- cor(factor_loadings_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load_y
## Factor1 Factor2
## Factor1 1.0000000 -0.6710401
## Factor2 -0.6710401 1.0000000
Factor score correlations - youth
More exploration about the adapability factor could be considered in
future analyses. The regression analysis below indicates there being a
relationship with arrest.
factor_scores_y <- as.data.frame(factanal.none_y$scores)
cor_score_y <- cor(factor_scores_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_y
## Factor1 Factor2
## Factor1 1.000000e+00 -2.335205e-16
## Factor2 -2.335205e-16 1.000000e+00
Selecting data and Appending scores to full file for analysis -
youth
fac_yt4 %>%
select(arrestT4) -> arrest_var_y
fac_score_arrest_y <- cbind(arrest_var_y, factor_scores_y)
##recoding outcome var to 0 and 1 for easier analysis
fac_score_arrest_y$arrest_bin[fac_score_arrest_y$arrestT4 == 1] <- "0"
fac_score_arrest_y$arrest_bin[fac_score_arrest_y$arrestT4 == 2] <- "1"
Correlations between factor scores, including the outcome variable,
arrest - youth
fac_score_arrest_y$arrest_bin <- as.numeric(fac_score_arrest_y$arrest_bin)
cor_score_arrest_y <- cor(fac_score_arrest_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest_y
## arrestT4 Factor1 Factor2 arrest_bin
## arrestT4 1.00000000 -3.172266e-02 2.103972e-01 1.00000000
## Factor1 -0.03172266 1.000000e+00 -2.335205e-16 -0.03172266
## Factor2 0.21039724 -2.335205e-16 1.000000e+00 0.21039724
## arrest_bin 1.00000000 -3.172266e-02 2.103972e-01 1.00000000
Logit model with factor scores as independent variables and arrest
as the outcome - youth
Based on regression coefficients and ORs, for every unit increase of
factor 2 (perceived adapability items), there is a 1.91 increased odds
of arrest among the youth respondents. Does that make sense in the
context of your previous analysis? This relationship is also
statistically significant, but is there any material significance?
fac_mod_y <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2, data = fac_score_arrest_y, family = "binomial")
summ(fac_mod_y, digits = 3)
## MODEL INFO:
## Observations: 148
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(2) = 6.970, p = 0.031
## Pseudo-R² (Cragg-Uhler) = 0.073
## Pseudo-R² (McFadden) = 0.048
## AIC = 145.470, BIC = 154.462
##
## Standard errors: MLE
## ---------------------------------------------------
## Est. S.E. z val. p
## ----------------- -------- ------- -------- -------
## (Intercept) -1.508 0.224 -6.722 0.000
## Factor1 -0.112 0.217 -0.517 0.605
## Factor2 0.651 0.259 2.516 0.012
## ---------------------------------------------------
Odds ratios and 95% CIs - these are simply exponentiated
coefficients from above with CIs - youth
exp(cbind(OR = coef(fac_mod_y), confint(fac_mod_y)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2213368 0.1387275 0.3361995
## Factor1 0.8940108 0.5814047 1.3683460
## Factor2 1.9165451 1.1722047 3.2514448
Logit with Factors 1 and 2 as adjusted moderators (interaction
effects) - youth
Factor 2 maintains similar effect size and statistical
significance.
fac_mod2_y <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor1 * Factor2, data = fac_score_arrest_y, family = "binomial")
summ(fac_mod2_y, digits = 3)
## MODEL INFO:
## Observations: 148
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(3) = 8.077, p = 0.044
## Pseudo-R² (Cragg-Uhler) = 0.085
## Pseudo-R² (McFadden) = 0.055
## AIC = 146.364, BIC = 158.352
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## --------------------- -------- ------- -------- -------
## (Intercept) -1.507 0.223 -6.747 0.000
## Factor1 -0.028 0.235 -0.117 0.907
## Factor2 0.685 0.258 2.659 0.008
## Factor1:Factor2 -0.250 0.239 -1.044 0.297
## -------------------------------------------------------
Odds ratios and 95% CIs - with factors 1 and 2 as moderators -
youth
Factor 2 is a better predictor of arrest for the youth data
exp(cbind(OR = coef(fac_mod2_y), confint(fac_mod2_y)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2215872 0.1393188 0.336191
## Factor1 0.9728510 0.6142687 1.556043
## Factor2 1.9829450 1.2139886 3.357340
## Factor1:Factor2 0.7788246 0.4806163 1.239481
Adult/Caregiver analysis
Correlation matrix for relevant variables - adult/caregiver
fac_cg[, c(152:171, 205)] <- sapply(fac_cg[, c(152:171, 205)], as.numeric)#changing to numeric
cor_fac_cg <- cor(fac_cg[, c(152:171, 205)], use = "pairwise.complete.obs") #including relevant data for analysis
corrplot(cor_fac_cg, method = "number")

Parallel analysis - adult/caregiver
The scree plot will show how many factors to consider where the FA
actual data line bows
par_cg <- fa.parallel(fac_cg_t4, fa = "fa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
par_cg
## Call: fa.parallel(x = fac_cg_t4, fa = "fa")
## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
##
## Eigen Values of
##
## eigen values of factors
## [1] 4.36 1.99 0.59 0.45 0.31 0.17 0.07 0.04 -0.04 -0.06 -0.11 -0.12
## [13] -0.14 -0.20 -0.27 -0.30 -0.37 -0.38 -0.47 -0.54 -0.60
##
## eigen values of simulated factors
## [1] 0.81 0.59 0.51 0.44 0.35 0.29 0.23 0.16 0.11 0.05 -0.01 -0.06
## [13] -0.09 -0.15 -0.20 -0.25 -0.30 -0.35 -0.39 -0.44 -0.50
##
## eigen values of components
## [1] 5.02 2.89 1.40 1.26 1.19 0.91 0.88 0.85 0.81 0.71 0.68 0.64 0.60 0.54 0.49
## [16] 0.45 0.42 0.38 0.34 0.30 0.23
##
## eigen values of simulated components
## [1] NA
Parallel analysis suggests that the number of factors = 2 for
adult/caregiver questions.
Factor analysis using FA, we are now using 2 factors.
fa.none_cg <- fa(r=fac_cg_t4,
nfactors = 2,
# covar = FALSE, SMC = TRUE,
fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
max.iter=100, # (50 is the default, but we have changed it to 100
rotate="oblimin") # none rotation
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : I
## am sorry, to do these rotations requires the GPArotation package to be installed
fa.none_cg
## Factor Analysis using method = pa
## Call: fa(r = fac_cg_t4, nfactors = 2, rotate = "oblimin", max.iter = 100,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## fac4p001 0.56 0.04 0.316 0.68 1.0
## fac4p002 0.38 0.28 0.221 0.78 1.8
## fac4p003 0.47 -0.05 0.222 0.78 1.0
## fac4p004 0.29 0.35 0.208 0.79 1.9
## fac4p005 0.36 -0.11 0.143 0.86 1.2
## fac4p006 0.15 0.38 0.164 0.84 1.3
## fac4p007 0.54 0.02 0.297 0.70 1.0
## fac4p008 0.53 0.54 0.571 0.43 2.0
## fac4p009 0.67 -0.07 0.456 0.54 1.0
## fac4p010 0.49 0.08 0.244 0.76 1.1
## fac4p011 0.73 -0.19 0.571 0.43 1.1
## fac4p012 0.04 0.51 0.261 0.74 1.0
## fac4p013 0.69 -0.21 0.514 0.49 1.2
## fac4p014 0.11 0.55 0.317 0.68 1.1
## fac4p015 0.57 -0.36 0.458 0.54 1.7
## fac4p016 0.40 0.12 0.176 0.82 1.2
## fac4p017 0.68 -0.02 0.458 0.54 1.0
## fac4p018 0.02 0.60 0.365 0.63 1.0
## fac4p019 0.52 -0.32 0.371 0.63 1.7
## fac4p020 0.02 0.55 0.303 0.70 1.0
## arrestT4 0.03 0.11 0.014 0.99 1.2
##
## PA1 PA2
## SS loadings 4.42 2.23
## Proportion Var 0.21 0.11
## Cumulative Var 0.21 0.32
## Proportion Explained 0.66 0.34
## Cumulative Proportion 0.66 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 210 with the objective function = 6.55 with Chi Square = 970.73
## df of the model are 169 and the objective function was 1.74
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 157 with the empirical chi square 253.62 with prob < 2.7e-05
## The total n.obs was 157 with Likelihood Chi Square = 254.97 with prob < 2.1e-05
##
## Tucker Lewis Index of factoring reliability = 0.858
## RMSEA index = 0.057 and the 90 % confidence intervals are 0.042 0.071
## BIC = -599.54
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.94 0.88
## Multiple R square of scores with factors 0.89 0.78
## Minimum correlation of possible factor scores 0.77 0.56
Factor analysis using factanal method - adult/caregiver
Here, we changed the cutoff to 0.4. Some factors overlap and appear
to drop out.
factanal.none_cg <- factanal(fac_cg_t4, factors=2, scores = c("regression"), rotation = "none")
print(factanal.none_cg, cut = 0.4)
##
## Call:
## factanal(x = fac_cg_t4, factors = 2, scores = c("regression"), rotation = "none")
##
## Uniquenesses:
## fac4p001 fac4p002 fac4p003 fac4p004 fac4p005 fac4p006 fac4p007 fac4p008
## 0.695 0.801 0.775 0.805 0.861 0.817 0.695 0.420
## fac4p009 fac4p010 fac4p011 fac4p012 fac4p013 fac4p014 fac4p015 fac4p016
## 0.544 0.765 0.419 0.751 0.478 0.674 0.535 0.813
## fac4p017 fac4p018 fac4p019 fac4p020 arrestT4
## 0.564 0.642 0.612 0.701 0.986
##
## Loadings:
## Factor1 Factor2
## fac4p001 0.550
## fac4p002
## fac4p003 0.473
## fac4p004
## fac4p005
## fac4p006 0.404
## fac4p007 0.550
## fac4p008 0.510 0.566
## fac4p009 0.675
## fac4p010 0.473
## fac4p011 0.744
## fac4p012 0.498
## fac4p013 0.699
## fac4p014 0.562
## fac4p015 0.593
## fac4p016 0.409
## fac4p017 0.660
## fac4p018 0.598
## fac4p019 0.545
## fac4p020 0.547
## arrestT4
##
## Factor1 Factor2
## SS loadings 4.409 2.237
## Proportion Var 0.210 0.107
## Cumulative Var 0.210 0.317
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 254.25 on 169 degrees of freedom.
## The p-value is 2.42e-05
Factor loading matrix for adult/caregiver data - this provides a
statistical categorization of items into constructs or factors. Some
items may not be included in the factor. You can determine whether
that’s true and we can reexamine our EFA
Factor matrix - adult/caregiver
You can see the numeric change due to the increased cutoff in factor
loadings here.
nfacts_cg <- 2
fit_cg <- factanal(fac_cg_t4,nfacts_cg, rotation="none")
print(fit_cg, digits=3, cutoff=0.4, sort=TRUE)
##
## Call:
## factanal(x = fac_cg_t4, factors = nfacts_cg, rotation = "none")
##
## Uniquenesses:
## fac4p001 fac4p002 fac4p003 fac4p004 fac4p005 fac4p006 fac4p007 fac4p008
## 0.695 0.801 0.775 0.805 0.861 0.817 0.695 0.420
## fac4p009 fac4p010 fac4p011 fac4p012 fac4p013 fac4p014 fac4p015 fac4p016
## 0.544 0.765 0.419 0.751 0.478 0.674 0.535 0.813
## fac4p017 fac4p018 fac4p019 fac4p020 arrestT4
## 0.564 0.642 0.612 0.701 0.986
##
## Loadings:
## Factor1 Factor2
## fac4p001 0.550
## fac4p007 0.550
## fac4p009 0.675
## fac4p011 0.744
## fac4p013 0.699
## fac4p015 0.593
## fac4p017 0.660
## fac4p019 0.545
## fac4p008 0.510 0.566
## fac4p014 0.562
## fac4p018 0.598
## fac4p020 0.547
## fac4p002
## fac4p003 0.473
## fac4p004
## fac4p005
## fac4p006 0.404
## fac4p010 0.473
## fac4p012 0.498
## fac4p016 0.409
## arrestT4
##
## Factor1 Factor2
## SS loadings 4.409 2.237
## Proportion Var 0.210 0.107
## Cumulative Var 0.210 0.317
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 254.25 on 169 degrees of freedom.
## The p-value is 2.42e-05
Total of 21 factors - adult/caregiver
round(fit_cg$loadings[ 1:21,], 2)
## Factor1 Factor2
## fac4p001 0.55 0.05
## fac4p002 0.35 0.28
## fac4p003 0.47 -0.03
## fac4p004 0.26 0.36
## fac4p005 0.36 -0.10
## fac4p006 0.14 0.40
## fac4p007 0.55 0.04
## fac4p008 0.51 0.57
## fac4p009 0.67 -0.04
## fac4p010 0.47 0.11
## fac4p011 0.74 -0.17
## fac4p012 0.02 0.50
## fac4p013 0.70 -0.18
## fac4p014 0.10 0.56
## fac4p015 0.59 -0.34
## fac4p016 0.41 0.14
## fac4p017 0.66 0.00
## fac4p018 0.01 0.60
## fac4p019 0.55 -0.30
## fac4p020 0.00 0.55
## arrestT4 0.03 0.11
dim(factanal.none_cg$scores)
## [1] 157 2
facscores_cg <- factanal.none_cg$scores
Constructing coefficient between two factors loadings for
adult/caregiver data. This may serve as an option for a separate
“construct score” from the factor loadings.
factor_loadings_cg <- round(fit_cg$loadings[ 1:21,], 2)
cor_load_cg <- cor(factor_loadings_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load_cg
## Factor1 Factor2
## Factor1 1.0000000 -0.7567962
## Factor2 -0.7567962 1.0000000
Factor score correlations - adult/caregiver
factor_scores_cg <- as.data.frame(factanal.none_cg$scores)
cor_score_cg <- cor(factor_scores_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_cg
## Factor1 Factor2
## Factor1 1.000000e+00 4.814896e-16
## Factor2 4.814896e-16 1.000000e+00
Selecting data and Appending scores to full file for analysis -
adult/caregiver
fac_cg_t4 %>%
select(arrestT4) -> arrest_var_cg
fac_score_arrest_cg <- cbind(arrest_var_cg, factor_scores_cg)
Correlations between factor scores, including the outcome variable,
arrest - adult/caregiver
cor_score_arrest_cg <- cor(fac_score_arrest_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest_cg
## arrestT4 Factor1 Factor2
## arrestT4 1.0000000 3.127920e-02 1.284758e-01
## Factor1 0.0312792 1.000000e+00 4.814896e-16
## Factor2 0.1284758 4.814896e-16 1.000000e+00
Logit Model with factor scores as independent variables and arrest
as the outcome - adult/caregiver
In these results, we can see in the coefficients and ORs below that
there are no factors associated with arrest among the caregiver group.
That seems like a relevant outcome.
##recoding arrest variable
fac_score_arrest_cg$arrest_bin[fac_score_arrest_cg$arrestT4 == 1] <- "0"
fac_score_arrest_cg$arrest_bin[fac_score_arrest_cg$arrestT4 == 2] <- "1"
fac_mod_cg <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2, data = fac_score_arrest_cg, family = "binomial")
summ(fac_mod_cg, digits = 3)
## MODEL INFO:
## Observations: 157
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## χ²(2) = 2.709, p = 0.258
## Pseudo-R² (Cragg-Uhler) = 0.027
## Pseudo-R² (McFadden) = 0.017
## AIC = 159.301, BIC = 168.470
##
## Standard errors: MLE
## ---------------------------------------------------
## Est. S.E. z val. p
## ----------------- -------- ------- -------- -------
## (Intercept) -1.434 0.206 -6.966 0.000
## Factor1 0.079 0.211 0.372 0.710
## Factor2 0.362 0.228 1.590 0.112
## ---------------------------------------------------
Odds ratios and 95% CIs - these are simply exponentiated
coefficients from above with CIs - adult_caregiver
exp(cbind(OR = coef(fac_mod_cg), confint(fac_mod_cg)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2382879 0.1560396 0.3510328
## Factor1 1.0818597 0.7143731 1.6450557
## Factor2 1.4362186 0.9214228 2.2623396