library(psych)
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(foreign)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.5      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()

Importing files from spss and removing sid

faces <- read.spss("/Users/KKW4/Desktop/Research projects/FACES - EFA analysis/T1 to T4 FACES Caregiver and Youth Raw & Totals SRP_w_arrest.sav", to.data.frame = TRUE)

Full analysis

Correlation matrix for relevant variables

fac[, c(68:87, 152:171, 205)] <- sapply(fac[, c(68:87, 152:171, 205)], as.numeric)#changing to numeric
cor_fac <- cor(fac[, c(68:87, 152:171, 205)], use = "pairwise.complete.obs") #including relevant data for analysis
corrplot(cor_fac, method = "number")

Removing NAs for complete analysis

fac_t4 <- subset(fac[ , c(68:87, 152:171, 205)])

fac_t4 <- na.omit(fac_t4)

Selecting T4 data and Running Kaiser-Meyer-Olkin adequcy check

Bartlett test of sphericity

cortest.bartlett(X)
## R was not square, finding R from data
## $chisq
## [1] 2956.052
## 
## $p.value
## [1] 2.33647e-238
## 
## $df
## [1] 820

Checking determinance

det(cor(X))
## [1] 1.726971e-10

Scree plot - this plot determines relatedness using correlations

This is based on the change to 3 factors.

library(ggplot2)

fafitfree <- fa(fac_t4, nfactors = 3, rotate = "none")
n_factors <- length(fafitfree$e.values)
scree     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot", 
        subtitle = "(Based on the unreduced correlation matrix)")

## Parallel analysis ## The scree plot will show how many factors to consider where the FA actual data line bows

par <- fa.parallel(X, fa = "fa")

## Parallel analysis suggests that the number of factors =  4  and the number of components =  NA

Parallel analysis suggests that the number of factors = 4, but we are testing 3.

Factor analysis using FA - based on the scree plot and our discussion, we are now using 3 factors.

We are including only 3 factors below. Additionally, we are cutting off covariances at 0.3. I’ve tested higher cutoffs (0.4 and 0.5), but the items either aren’t correlated with any factor or they drop out.

factanal.none <- factanal(X, factors=3, scores = c("regression"), rotation = "none")
print(factanal.none, cutoff = 0.3)
## 
## Call:
## factanal(x = X, factors = 3, scores = c("regression"), rotation = "none")
## 
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008 
##    0.570    0.559    0.581    0.668    0.444    0.675    0.522    0.617 
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016 
##    0.285    0.620    0.320    0.787    0.385    0.884    0.301    0.626 
## fac4y017 fac4y018 fac4y019 fac4y020 fac4p001 fac4p002 fac4p003 fac4p004 
##    0.476    0.866    0.390    0.882    0.724    0.758    0.762    0.774 
## fac4p005 fac4p006 fac4p007 fac4p008 fac4p009 fac4p010 fac4p011 fac4p012 
##    0.830    0.867    0.715    0.466    0.550    0.780    0.435    0.711 
## fac4p013 fac4p014 fac4p015 fac4p016 fac4p017 fac4p018 fac4p019 fac4p020 
##    0.479    0.676    0.529    0.799    0.565    0.656    0.556    0.725 
## arrestT4 
##    0.996 
## 
## Loadings:
##          Factor1 Factor2 Factor3
## fac4y001  0.656                 
## fac4y002  0.612                 
## fac4y003  0.625                 
## fac4y004  0.498                 
## fac4y005  0.744                 
## fac4y006  0.429  -0.344         
## fac4y007  0.686                 
## fac4y008  0.579                 
## fac4y009  0.838                 
## fac4y010  0.542                 
## fac4y011  0.824                 
## fac4y012  0.377                 
## fac4y013  0.780                 
## fac4y014                        
## fac4y015  0.821                 
## fac4y016  0.587                 
## fac4y017  0.682                 
## fac4y018  0.328                 
## fac4y019  0.772                 
## fac4y020                        
## fac4p001          0.453         
## fac4p002          0.361   0.313 
## fac4p003          0.424         
## fac4p004                  0.389 
## fac4p005          0.369         
## fac4p006                  0.338 
## fac4p007  0.361   0.390         
## fac4p008          0.430   0.561 
## fac4p009  0.322   0.588         
## fac4p010          0.431         
## fac4p011  0.364   0.642         
## fac4p012                  0.536 
## fac4p013  0.306   0.629         
## fac4p014                  0.563 
## fac4p015  0.301   0.545         
## fac4p016          0.390         
## fac4p017  0.304   0.584         
## fac4p018                  0.583 
## fac4p019  0.354   0.489         
## fac4p020                  0.523 
## arrestT4                        
## 
##                Factor1 Factor2 Factor3
## SS loadings      8.811   4.126   2.251
## Proportion Var   0.215   0.101   0.055
## Cumulative Var   0.215   0.316   0.370
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 1088.72 on 700 degrees of freedom.
## The p-value is 2.02e-19

Factor loading matrix - this provides a statistical categorization of items into constructs or factors. Some items may not be included in the factor. You can determine whether that’s true and we can reexamine our EFA

The correlations/covariances mentioned above are replicated here. Items are not correlated at higher cutoffs or they drop out of the diagram.

fa.diagram(fa.none, cut = .3)

## Factor matrix ### The loadings for factors and items all correspond at a 0.3 cutoff. However, as mentioned in previous sections there are missing or non-loaded factors when increasing the cutoff. Should we keep this at 0.3 or test other cutoff levels?

nfacts <- 3
fit <- factanal(X,nfacts, rotation="none")
print(fit, digits=3, cutoff=0.3, sort=TRUE)
## 
## Call:
## factanal(x = X, factors = nfacts, rotation = "none")
## 
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008 
##    0.570    0.559    0.581    0.668    0.444    0.675    0.522    0.617 
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016 
##    0.285    0.620    0.320    0.787    0.385    0.884    0.301    0.626 
## fac4y017 fac4y018 fac4y019 fac4y020 fac4p001 fac4p002 fac4p003 fac4p004 
##    0.476    0.866    0.390    0.882    0.724    0.758    0.762    0.774 
## fac4p005 fac4p006 fac4p007 fac4p008 fac4p009 fac4p010 fac4p011 fac4p012 
##    0.830    0.867    0.715    0.466    0.550    0.780    0.435    0.711 
## fac4p013 fac4p014 fac4p015 fac4p016 fac4p017 fac4p018 fac4p019 fac4p020 
##    0.479    0.676    0.529    0.799    0.565    0.656    0.556    0.725 
## arrestT4 
##    0.996 
## 
## Loadings:
##          Factor1 Factor2 Factor3
## fac4y001  0.656                 
## fac4y002  0.612                 
## fac4y003  0.625                 
## fac4y005  0.744                 
## fac4y007  0.686                 
## fac4y008  0.579                 
## fac4y009  0.838                 
## fac4y010  0.542                 
## fac4y011  0.824                 
## fac4y013  0.780                 
## fac4y015  0.821                 
## fac4y016  0.587                 
## fac4y017  0.682                 
## fac4y019  0.772                 
## fac4p009  0.322   0.588         
## fac4p011  0.364   0.642         
## fac4p013  0.306   0.629         
## fac4p015  0.301   0.545         
## fac4p017  0.304   0.584         
## fac4p008          0.430   0.561 
## fac4p012                  0.536 
## fac4p014                  0.563 
## fac4p018                  0.583 
## fac4p020                  0.523 
## fac4y004  0.498                 
## fac4y006  0.429  -0.344         
## fac4y012  0.377                 
## fac4y014                        
## fac4y018  0.328                 
## fac4y020                        
## fac4p001          0.453         
## fac4p002          0.361   0.313 
## fac4p003          0.424         
## fac4p004                  0.389 
## fac4p005          0.369         
## fac4p006                  0.338 
## fac4p007  0.361   0.390         
## fac4p010          0.431         
## fac4p016          0.390         
## fac4p019  0.354   0.489         
## arrestT4                        
## 
##                Factor1 Factor2 Factor3
## SS loadings      8.811   4.126   2.251
## Proportion Var   0.215   0.101   0.055
## Cumulative Var   0.215   0.316   0.370
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 1088.72 on 700 degrees of freedom.
## The p-value is 2.02e-19

Constructing correlation coefficients between two factors loadings- would this coefficient may provide a construct score from the two loadings based on factor loadings suitable for the R&R? If they are different than the scores, should we rely on these as “construct scores” from the factor loadings?

factor_loadings <- round(fit$loadings[ 1:40,], 2)
cor_load <- cor(factor_loadings, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load
##            Factor1     Factor2     Factor3
## Factor1  1.0000000 -0.44743143 -0.53464575
## Factor2 -0.4474314  1.00000000 -0.07559533
## Factor3 -0.5346458 -0.07559533  1.00000000
factor_scores <- as.data.frame(factanal.none$scores)
cor_score <- cor(factor_scores, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score
##               Factor1       Factor2       Factor3
## Factor1  1.000000e+00 -9.020683e-17 -1.079572e-16
## Factor2 -9.020683e-17  1.000000e+00 -3.863483e-17
## Factor3 -1.079572e-16 -3.863483e-17  1.000000e+00

Selecting data and Appending scores to full file for analysis

fac_t4 %>%
  select(arrestT4) -> arrest_var
fac_score_arrest <- cbind(arrest_var, factor_scores)

Correlations between factor scores, including the outcome variable, arrest

cor_score_arrest <- cor(fac_score_arrest, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest
##             arrestT4       Factor1       Factor2       Factor3
## arrestT4  1.00000000 -3.912859e-02  2.118218e-02  5.175604e-02
## Factor1  -0.03912859  1.000000e+00 -9.020683e-17 -1.079572e-16
## Factor2   0.02118218 -9.020683e-17  1.000000e+00 -3.863483e-17
## Factor3   0.05175604 -1.079572e-16 -3.863483e-17  1.000000e+00

Logit with factor scores as independent variables and arrest as the outcome

fac_score_arrest$arrest_bin[fac_score_arrest$arrestT4 == 1] <- "0"
fac_score_arrest$arrest_bin[fac_score_arrest$arrestT4 == 2] <- "1"


fac_mod <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(3) = 0.685, p = 0.877
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.005
## AIC = 150.468, BIC = 162.429 
## 
## Standard errors: MLE
## ---------------------------------------------------
##                       Est.    S.E.   z val.       p
## ----------------- -------- ------- -------- -------
## (Intercept)         -1.456   0.212   -6.878   0.000
## Factor1             -0.103   0.216   -0.477   0.633
## Factor2              0.059   0.226    0.260   0.795
## Factor3              0.149   0.238    0.627   0.531
## ---------------------------------------------------

Odds ratios and 95% CIs - these are simply exponentiated coefficients from above with CIs

exp(cbind(OR = coef(fac_mod), confint(fac_mod)))
## Waiting for profiling to be done...
##                    OR     2.5 %    97.5 %
## (Intercept) 0.2331066 0.1508530 0.3472036
## Factor1     0.9018792 0.5871916 1.3789767
## Factor2     1.0605937 0.6818503 1.6630395
## Factor3     1.1606457 0.7257767 1.8543706

Logit with Factors 1 and 3 as moderators (interaction effects)

fac_mod2 <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor3 +  Factor1 * Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod2, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(4) = 0.686, p = 0.953
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.005
## AIC = 152.466, BIC = 167.419 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                           Est.    S.E.   z val.       p
## --------------------- -------- ------- -------- -------
## (Intercept)             -1.456   0.212   -6.878   0.000
## Factor1                 -0.102   0.219   -0.467   0.640
## Factor2                  0.060   0.228    0.263   0.793
## Factor3                  0.149   0.238    0.626   0.531
## Factor1:Factor3         -0.010   0.285   -0.035   0.972
## -------------------------------------------------------

Odds ratios for factors 1 and 3 as moderators

exp(cbind(OR = coef(fac_mod2), confint(fac_mod2)))
## Waiting for profiling to be done...
##                        OR     2.5 %    97.5 %
## (Intercept)     0.2331135 0.1508594 0.3472106
## Factor1         0.9028803 0.5849506 1.3862619
## Factor2         1.0618124 0.6797835 1.6740071
## Factor3         1.1604482 0.7251940 1.8543086
## Factor1:Factor3 0.9900885 0.5619919 1.7393777

Logit with Factors 1 and 3 as unadjusted moderators (interaction effects)

fac_mod3 <- glm(as.numeric(arrest_bin) ~ Factor1 * Factor3, data = fac_score_arrest, family = "binomial")
summ(fac_mod3, digits = 3)
## MODEL INFO:
## Observations: 147
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(3) = 0.617, p = 0.893
## Pseudo-R² (Cragg-Uhler) = 0.007
## Pseudo-R² (McFadden) = 0.004
## AIC = 150.536, BIC = 162.497 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                           Est.    S.E.   z val.       p
## --------------------- -------- ------- -------- -------
## (Intercept)             -1.455   0.212   -6.879   0.000
## Factor1                 -0.102   0.218   -0.469   0.639
## Factor3                  0.149   0.238    0.626   0.531
## Factor1:Factor3          0.001   0.283    0.003   0.998
## -------------------------------------------------------

Odds ratios for factors 1 and 3 as unadjusted moderators

exp(cbind(OR = coef(fac_mod3), confint(fac_mod3)))
## Waiting for profiling to be done...
##                        OR     2.5 %    97.5 %
## (Intercept)     0.2333328 0.1510654 0.3474448
## Factor1         0.9026011 0.5849763 1.3853227
## Factor3         1.1606267 0.7251614 1.8547780
## Factor1:Factor3 1.0007901 0.5708951 1.7482452

Youth analysis

Correlation matrix for relevant variables - youth

This matrix provides correlations between each item. Most are positively correlated with the exception of the outcome variable, arrest.

fac_y[, c(68:87, 205)] <- sapply(fac_y[, c(68:87, 205)], as.numeric)#changing to numeric
cor_fac_y <- cor(fac_y[, c(68:87, 205)], use = "pairwise.complete.obs") #including relevant data for analysis
corrplot(cor_fac_y, method = "number")

## In order to conduct this analysis, we have to remove NAs - youth

fac_yt4 <- subset(fac_y[ , c(68:87, 205)])

fac_yt4 <- na.omit(fac_yt4)

Scree plot - this plot determines relatedness using correlations - youth

This is based on 2 factors

library(ggplot2)

fafitfree_y <- fa(fac_yt4, nfactors = 2, rotate = "none")
n_factors <- length(fafitfree_y$e.values)
scree_y     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree_y$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot", 
        subtitle = "(Based on the unreduced correlation matrix)")

Parallel analysis - youth

The scree plot will show how many factors to consider where the FA actual data line bows

par_y <- fa.parallel(fac_yt4, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

Parallel analysis suggests that the number of factors = 2 for youth questions.

Factor analysis using FA -, we are now using 2 factors.

fa.none_y <- fa(r=fac_yt4, 
 nfactors = 2, 
 # covar = FALSE, SMC = TRUE,
 fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
 max.iter=100, # (50 is the default, but we have changed it to 100
 rotate="oblimin") # none rotation
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : I
## am sorry, to do these rotations requires the GPArotation package to be installed
fa.none_y
## Factor Analysis using method =  pa
## Call: fa(r = fac_yt4, nfactors = 2, rotate = "oblimin", max.iter = 100, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##            PA1   PA2    h2   u2 com
## fac4y001  0.63 -0.19 0.436 0.56 1.2
## fac4y002  0.64 -0.15 0.438 0.56 1.1
## fac4y003  0.63 -0.29 0.475 0.53 1.4
## fac4y004  0.55 -0.05 0.302 0.70 1.0
## fac4y005  0.72 -0.18 0.558 0.44 1.1
## fac4y006  0.52  0.37 0.408 0.59 1.8
## fac4y007  0.68  0.06 0.471 0.53 1.0
## fac4y008  0.64  0.33 0.528 0.47 1.5
## fac4y009  0.83 -0.14 0.704 0.30 1.1
## fac4y010  0.61  0.14 0.386 0.61 1.1
## fac4y011  0.78 -0.31 0.704 0.30 1.3
## fac4y012  0.44  0.25 0.261 0.74 1.6
## fac4y013  0.76 -0.16 0.601 0.40 1.1
## fac4y014  0.37  0.63 0.533 0.47 1.6
## fac4y015  0.81 -0.20 0.705 0.29 1.1
## fac4y016  0.61  0.11 0.389 0.61 1.1
## fac4y017  0.73  0.12 0.551 0.45 1.1
## fac4y018  0.38  0.41 0.313 0.69 2.0
## fac4y019  0.75 -0.21 0.608 0.39 1.2
## fac4y020  0.36  0.50 0.384 0.62 1.8
## arrestT4 -0.02  0.21 0.043 0.96 1.0
## 
##                        PA1  PA2
## SS loadings           8.17 1.63
## Proportion Var        0.39 0.08
## Cumulative Var        0.39 0.47
## Proportion Explained  0.83 0.17
## Cumulative Proportion 0.83 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  210  with the objective function =  12.12 with Chi Square =  1686.27
## df of  the model are 169  and the objective function was  2.36 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic n.obs is  148 with the empirical chi square  202.61  with prob <  0.04 
## The total n.obs was  148  with Likelihood Chi Square =  325.36  with prob <  5.6e-12 
## 
## Tucker Lewis Index of factoring reliability =  0.867
## RMSEA index =  0.079  and the 90 % confidence intervals are  0.066 0.092
## BIC =  -519.16
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.97 0.87
## Multiple R square of scores with factors          0.95 0.76
## Minimum correlation of possible factor scores     0.90 0.52

Factor analysis using factanal method - youth

We explored a 0.4 cutoff. Some items load for more than 1 factor. It seems the ideal cutoff is 0.3 vs. 0.4

factanal.none_y <- factanal(fac_yt4, factors=2, scores = c("regression"), rotation = "none")
print(factanal.none_y, cut = .3)
## 
## Call:
## factanal(x = fac_yt4, factors = 2, scores = c("regression"),     rotation = "none")
## 
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008 
##    0.571    0.605    0.547    0.723    0.461    0.582    0.515    0.471 
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016 
##    0.278    0.613    0.280    0.732    0.377    0.454    0.284    0.613 
## fac4y017 fac4y018 fac4y019 fac4y020 arrestT4 
##    0.447    0.707    0.353    0.656    0.965 
## 
## Loadings:
##          Factor1 Factor2
## fac4y001  0.642         
## fac4y002  0.626         
## fac4y003  0.634         
## fac4y004  0.524         
## fac4y005  0.725         
## fac4y006  0.492   0.418 
## fac4y007  0.689         
## fac4y008  0.618   0.383 
## fac4y009  0.843         
## fac4y010  0.585         
## fac4y011  0.811         
## fac4y012  0.409   0.317 
## fac4y013  0.779         
## fac4y014  0.318   0.667 
## fac4y015  0.833         
## fac4y016  0.600         
## fac4y017  0.718         
## fac4y018  0.357   0.406 
## fac4y019  0.784         
## fac4y020  0.329   0.486 
## arrestT4                
## 
##                Factor1 Factor2
## SS loadings      8.112   1.655
## Proportion Var   0.386   0.079
## Cumulative Var   0.386   0.465
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 322.27 on 169 degrees of freedom.
## The p-value is 1.19e-11

Factor loading matrix for youth data - this provides a statistical categorization of items into constructs or factors. Some items may not be included in the factor. You can determine whether that’s true and we can reexamine our EFA

We’re exploring a 0.4 cutoff. Although some factors overlap, this provides a diagram of items and factors at ≥0.4. The adaptability items that load (with a ≥0.4 cutoff) in the 2nd factor are relevant to family leadership, based on the scoring sheet.

fa.diagram(fa.none_y, cut = 0.4)

Factor matrix - youth

We have reverted back to a 0.3 cutoff here and 2 factors.

nfacts_y <- 2
fit_y <- factanal(fac_yt4,nfacts_y, rotation="none")
print(fit_y, digits=3, cutoff=0.3, sort=TRUE)
## 
## Call:
## factanal(x = fac_yt4, factors = nfacts_y, rotation = "none")
## 
## Uniquenesses:
## fac4y001 fac4y002 fac4y003 fac4y004 fac4y005 fac4y006 fac4y007 fac4y008 
##    0.571    0.605    0.547    0.723    0.461    0.582    0.515    0.471 
## fac4y009 fac4y010 fac4y011 fac4y012 fac4y013 fac4y014 fac4y015 fac4y016 
##    0.278    0.613    0.280    0.732    0.377    0.454    0.284    0.613 
## fac4y017 fac4y018 fac4y019 fac4y020 arrestT4 
##    0.447    0.707    0.353    0.656    0.965 
## 
## Loadings:
##          Factor1 Factor2
## fac4y001  0.642         
## fac4y002  0.626         
## fac4y003  0.634         
## fac4y004  0.524         
## fac4y005  0.725         
## fac4y007  0.689         
## fac4y008  0.618   0.383 
## fac4y009  0.843         
## fac4y010  0.585         
## fac4y011  0.811         
## fac4y013  0.779         
## fac4y015  0.833         
## fac4y016  0.600         
## fac4y017  0.718         
## fac4y019  0.784         
## fac4y014  0.318   0.667 
## fac4y006  0.492   0.418 
## fac4y012  0.409   0.317 
## fac4y018  0.357   0.406 
## fac4y020  0.329   0.486 
## arrestT4                
## 
##                Factor1 Factor2
## SS loadings      8.112   1.655
## Proportion Var   0.386   0.079
## Cumulative Var   0.386   0.465
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 322.27 on 169 degrees of freedom.
## The p-value is 1.19e-11
round(fit_y$loadings[ 1:21,], 2)
##          Factor1 Factor2
## fac4y001    0.64   -0.13
## fac4y002    0.63   -0.06
## fac4y003    0.63   -0.23
## fac4y004    0.52    0.05
## fac4y005    0.73   -0.11
## fac4y006    0.49    0.42
## fac4y007    0.69    0.10
## fac4y008    0.62    0.38
## fac4y009    0.84   -0.10
## fac4y010    0.59    0.21
## fac4y011    0.81   -0.25
## fac4y012    0.41    0.32
## fac4y013    0.78   -0.13
## fac4y014    0.32    0.67
## fac4y015    0.83   -0.15
## fac4y016    0.60    0.17
## fac4y017    0.72    0.19
## fac4y018    0.36    0.41
## fac4y019    0.78   -0.18
## fac4y020    0.33    0.49
## arrestT4   -0.03    0.18

Constructing correlation coefficient between two factors loadings for youth data. This may be helpful as a”construct score” from the factor loadings

factor_loadings_y <- round(fit_y$loadings[ 1:21,], 2)
cor_load_y <- cor(factor_loadings_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load_y
##            Factor1    Factor2
## Factor1  1.0000000 -0.6710401
## Factor2 -0.6710401  1.0000000

Factor score correlations - youth

More exploration about the adapability factor could be considered in future analyses. The regression analysis below indicates there being a relationship with arrest.

factor_scores_y <- as.data.frame(factanal.none_y$scores)
cor_score_y <- cor(factor_scores_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_y
##               Factor1       Factor2
## Factor1  1.000000e+00 -2.335205e-16
## Factor2 -2.335205e-16  1.000000e+00

Selecting data and Appending scores to full file for analysis - youth

fac_yt4 %>%
  select(arrestT4) -> arrest_var_y
fac_score_arrest_y <- cbind(arrest_var_y, factor_scores_y)

##recoding outcome var to 0 and 1 for easier analysis
fac_score_arrest_y$arrest_bin[fac_score_arrest_y$arrestT4 == 1] <- "0"
fac_score_arrest_y$arrest_bin[fac_score_arrest_y$arrestT4 == 2] <- "1"

Correlations between factor scores, including the outcome variable, arrest - youth

fac_score_arrest_y$arrest_bin <- as.numeric(fac_score_arrest_y$arrest_bin)
cor_score_arrest_y <- cor(fac_score_arrest_y, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest_y
##               arrestT4       Factor1       Factor2  arrest_bin
## arrestT4    1.00000000 -3.172266e-02  2.103972e-01  1.00000000
## Factor1    -0.03172266  1.000000e+00 -2.335205e-16 -0.03172266
## Factor2     0.21039724 -2.335205e-16  1.000000e+00  0.21039724
## arrest_bin  1.00000000 -3.172266e-02  2.103972e-01  1.00000000

Logit model with factor scores as independent variables and arrest as the outcome - youth

Based on regression coefficients and ORs, for every unit increase of factor 2 (perceived adapability items), there is a 1.91 increased odds of arrest among the youth respondents. Does that make sense in the context of your previous analysis? This relationship is also statistically significant, but is there any material significance?

fac_mod_y <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2, data = fac_score_arrest_y, family = "binomial")
summ(fac_mod_y, digits = 3)
## MODEL INFO:
## Observations: 148
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(2) = 6.970, p = 0.031
## Pseudo-R² (Cragg-Uhler) = 0.073
## Pseudo-R² (McFadden) = 0.048
## AIC = 145.470, BIC = 154.462 
## 
## Standard errors: MLE
## ---------------------------------------------------
##                       Est.    S.E.   z val.       p
## ----------------- -------- ------- -------- -------
## (Intercept)         -1.508   0.224   -6.722   0.000
## Factor1             -0.112   0.217   -0.517   0.605
## Factor2              0.651   0.259    2.516   0.012
## ---------------------------------------------------

Odds ratios and 95% CIs - these are simply exponentiated coefficients from above with CIs - youth

exp(cbind(OR = coef(fac_mod_y), confint(fac_mod_y)))
## Waiting for profiling to be done...
##                    OR     2.5 %    97.5 %
## (Intercept) 0.2213368 0.1387275 0.3361995
## Factor1     0.8940108 0.5814047 1.3683460
## Factor2     1.9165451 1.1722047 3.2514448

Logit with Factors 1 and 2 as adjusted moderators (interaction effects) - youth

Factor 2 maintains similar effect size and statistical significance.

fac_mod2_y <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2 + Factor1 * Factor2, data = fac_score_arrest_y, family = "binomial")
summ(fac_mod2_y, digits = 3)
## MODEL INFO:
## Observations: 148
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(3) = 8.077, p = 0.044
## Pseudo-R² (Cragg-Uhler) = 0.085
## Pseudo-R² (McFadden) = 0.055
## AIC = 146.364, BIC = 158.352 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                           Est.    S.E.   z val.       p
## --------------------- -------- ------- -------- -------
## (Intercept)             -1.507   0.223   -6.747   0.000
## Factor1                 -0.028   0.235   -0.117   0.907
## Factor2                  0.685   0.258    2.659   0.008
## Factor1:Factor2         -0.250   0.239   -1.044   0.297
## -------------------------------------------------------

Odds ratios and 95% CIs - with factors 1 and 2 as moderators - youth

Factor 2 is a better predictor of arrest for the youth data

exp(cbind(OR = coef(fac_mod2_y), confint(fac_mod2_y)))
## Waiting for profiling to be done...
##                        OR     2.5 %   97.5 %
## (Intercept)     0.2215872 0.1393188 0.336191
## Factor1         0.9728510 0.6142687 1.556043
## Factor2         1.9829450 1.2139886 3.357340
## Factor1:Factor2 0.7788246 0.4806163 1.239481

Adult/Caregiver analysis

Correlation matrix for relevant variables - adult/caregiver

fac_cg[, c(152:171, 205)] <- sapply(fac_cg[, c(152:171, 205)], as.numeric)#changing to numeric
cor_fac_cg <- cor(fac_cg[, c(152:171, 205)], use = "pairwise.complete.obs") #including relevant data for analysis
corrplot(cor_fac_cg, method = "number")

Scree plot - this plot determines relatedness using correlations - adult/caregiver

Again, here we are working with only 2 factors.

library(ggplot2)

fafitfree_cg <- fa(fac_cg_t4, nfactors = 2, rotate = "none")
n_factors_cg <- length(fafitfree_cg$e.values)
scree_cg     <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafitfree_cg$e.values)
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot", 
        subtitle = "(Based on the unreduced correlation matrix)")

Parallel analysis - adult/caregiver

The scree plot will show how many factors to consider where the FA actual data line bows

par_cg <- fa.parallel(fac_cg_t4, fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
par_cg
## Call: fa.parallel(x = fac_cg_t4, fa = "fa")
## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA 
## 
##  Eigen Values of 
## 
##  eigen values of factors
##  [1]  4.36  1.99  0.59  0.45  0.31  0.17  0.07  0.04 -0.04 -0.06 -0.11 -0.12
## [13] -0.14 -0.20 -0.27 -0.30 -0.37 -0.38 -0.47 -0.54 -0.60
## 
##  eigen values of simulated factors
##  [1]  0.81  0.59  0.51  0.44  0.35  0.29  0.23  0.16  0.11  0.05 -0.01 -0.06
## [13] -0.09 -0.15 -0.20 -0.25 -0.30 -0.35 -0.39 -0.44 -0.50
## 
##  eigen values of components 
##  [1] 5.02 2.89 1.40 1.26 1.19 0.91 0.88 0.85 0.81 0.71 0.68 0.64 0.60 0.54 0.49
## [16] 0.45 0.42 0.38 0.34 0.30 0.23
## 
##  eigen values of simulated components
## [1] NA

Parallel analysis suggests that the number of factors = 2 for adult/caregiver questions.

Factor analysis using FA, we are now using 2 factors.

fa.none_cg <- fa(r=fac_cg_t4, 
 nfactors = 2, 
 # covar = FALSE, SMC = TRUE,
 fm="pa", # type of factor analysis we want to use (“pa” is principal axis factoring)
 max.iter=100, # (50 is the default, but we have changed it to 100
 rotate="oblimin") # none rotation
## Loading required namespace: GPArotation
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : I
## am sorry, to do these rotations requires the GPArotation package to be installed
fa.none_cg
## Factor Analysis using method =  pa
## Call: fa(r = fac_cg_t4, nfactors = 2, rotate = "oblimin", max.iter = 100, 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##           PA1   PA2    h2   u2 com
## fac4p001 0.56  0.04 0.316 0.68 1.0
## fac4p002 0.38  0.28 0.221 0.78 1.8
## fac4p003 0.47 -0.05 0.222 0.78 1.0
## fac4p004 0.29  0.35 0.208 0.79 1.9
## fac4p005 0.36 -0.11 0.143 0.86 1.2
## fac4p006 0.15  0.38 0.164 0.84 1.3
## fac4p007 0.54  0.02 0.297 0.70 1.0
## fac4p008 0.53  0.54 0.571 0.43 2.0
## fac4p009 0.67 -0.07 0.456 0.54 1.0
## fac4p010 0.49  0.08 0.244 0.76 1.1
## fac4p011 0.73 -0.19 0.571 0.43 1.1
## fac4p012 0.04  0.51 0.261 0.74 1.0
## fac4p013 0.69 -0.21 0.514 0.49 1.2
## fac4p014 0.11  0.55 0.317 0.68 1.1
## fac4p015 0.57 -0.36 0.458 0.54 1.7
## fac4p016 0.40  0.12 0.176 0.82 1.2
## fac4p017 0.68 -0.02 0.458 0.54 1.0
## fac4p018 0.02  0.60 0.365 0.63 1.0
## fac4p019 0.52 -0.32 0.371 0.63 1.7
## fac4p020 0.02  0.55 0.303 0.70 1.0
## arrestT4 0.03  0.11 0.014 0.99 1.2
## 
##                        PA1  PA2
## SS loadings           4.42 2.23
## Proportion Var        0.21 0.11
## Cumulative Var        0.21 0.32
## Proportion Explained  0.66 0.34
## Cumulative Proportion 0.66 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  210  with the objective function =  6.55 with Chi Square =  970.73
## df of  the model are 169  and the objective function was  1.74 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic n.obs is  157 with the empirical chi square  253.62  with prob <  2.7e-05 
## The total n.obs was  157  with Likelihood Chi Square =  254.97  with prob <  2.1e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.858
## RMSEA index =  0.057  and the 90 % confidence intervals are  0.042 0.071
## BIC =  -599.54
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.94 0.88
## Multiple R square of scores with factors          0.89 0.78
## Minimum correlation of possible factor scores     0.77 0.56

Factor analysis using factanal method - adult/caregiver

Here, we changed the cutoff to 0.4. Some factors overlap and appear to drop out.

factanal.none_cg <- factanal(fac_cg_t4, factors=2, scores = c("regression"), rotation = "none")
print(factanal.none_cg, cut = 0.4)
## 
## Call:
## factanal(x = fac_cg_t4, factors = 2, scores = c("regression"),     rotation = "none")
## 
## Uniquenesses:
## fac4p001 fac4p002 fac4p003 fac4p004 fac4p005 fac4p006 fac4p007 fac4p008 
##    0.695    0.801    0.775    0.805    0.861    0.817    0.695    0.420 
## fac4p009 fac4p010 fac4p011 fac4p012 fac4p013 fac4p014 fac4p015 fac4p016 
##    0.544    0.765    0.419    0.751    0.478    0.674    0.535    0.813 
## fac4p017 fac4p018 fac4p019 fac4p020 arrestT4 
##    0.564    0.642    0.612    0.701    0.986 
## 
## Loadings:
##          Factor1 Factor2
## fac4p001  0.550         
## fac4p002                
## fac4p003  0.473         
## fac4p004                
## fac4p005                
## fac4p006          0.404 
## fac4p007  0.550         
## fac4p008  0.510   0.566 
## fac4p009  0.675         
## fac4p010  0.473         
## fac4p011  0.744         
## fac4p012          0.498 
## fac4p013  0.699         
## fac4p014          0.562 
## fac4p015  0.593         
## fac4p016  0.409         
## fac4p017  0.660         
## fac4p018          0.598 
## fac4p019  0.545         
## fac4p020          0.547 
## arrestT4                
## 
##                Factor1 Factor2
## SS loadings      4.409   2.237
## Proportion Var   0.210   0.107
## Cumulative Var   0.210   0.317
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 254.25 on 169 degrees of freedom.
## The p-value is 2.42e-05

Factor loading matrix for adult/caregiver data - this provides a statistical categorization of items into constructs or factors. Some items may not be included in the factor. You can determine whether that’s true and we can reexamine our EFA

Changing the cutoff to 0.4 leaves us with non-correlated items and factors. See below.

fa.diagram(fa.none_cg, cut = 0.4)

Factor matrix - adult/caregiver

You can see the numeric change due to the increased cutoff in factor loadings here.

nfacts_cg <- 2
fit_cg <- factanal(fac_cg_t4,nfacts_cg, rotation="none")
print(fit_cg, digits=3, cutoff=0.4, sort=TRUE)
## 
## Call:
## factanal(x = fac_cg_t4, factors = nfacts_cg, rotation = "none")
## 
## Uniquenesses:
## fac4p001 fac4p002 fac4p003 fac4p004 fac4p005 fac4p006 fac4p007 fac4p008 
##    0.695    0.801    0.775    0.805    0.861    0.817    0.695    0.420 
## fac4p009 fac4p010 fac4p011 fac4p012 fac4p013 fac4p014 fac4p015 fac4p016 
##    0.544    0.765    0.419    0.751    0.478    0.674    0.535    0.813 
## fac4p017 fac4p018 fac4p019 fac4p020 arrestT4 
##    0.564    0.642    0.612    0.701    0.986 
## 
## Loadings:
##          Factor1 Factor2
## fac4p001  0.550         
## fac4p007  0.550         
## fac4p009  0.675         
## fac4p011  0.744         
## fac4p013  0.699         
## fac4p015  0.593         
## fac4p017  0.660         
## fac4p019  0.545         
## fac4p008  0.510   0.566 
## fac4p014          0.562 
## fac4p018          0.598 
## fac4p020          0.547 
## fac4p002                
## fac4p003  0.473         
## fac4p004                
## fac4p005                
## fac4p006          0.404 
## fac4p010  0.473         
## fac4p012          0.498 
## fac4p016  0.409         
## arrestT4                
## 
##                Factor1 Factor2
## SS loadings      4.409   2.237
## Proportion Var   0.210   0.107
## Cumulative Var   0.210   0.317
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 254.25 on 169 degrees of freedom.
## The p-value is 2.42e-05

Total of 21 factors - adult/caregiver

round(fit_cg$loadings[ 1:21,], 2)
##          Factor1 Factor2
## fac4p001    0.55    0.05
## fac4p002    0.35    0.28
## fac4p003    0.47   -0.03
## fac4p004    0.26    0.36
## fac4p005    0.36   -0.10
## fac4p006    0.14    0.40
## fac4p007    0.55    0.04
## fac4p008    0.51    0.57
## fac4p009    0.67   -0.04
## fac4p010    0.47    0.11
## fac4p011    0.74   -0.17
## fac4p012    0.02    0.50
## fac4p013    0.70   -0.18
## fac4p014    0.10    0.56
## fac4p015    0.59   -0.34
## fac4p016    0.41    0.14
## fac4p017    0.66    0.00
## fac4p018    0.01    0.60
## fac4p019    0.55   -0.30
## fac4p020    0.00    0.55
## arrestT4    0.03    0.11
dim(factanal.none_cg$scores)
## [1] 157   2
facscores_cg <- factanal.none_cg$scores

Constructing coefficient between two factors loadings for adult/caregiver data. This may serve as an option for a separate “construct score” from the factor loadings.

factor_loadings_cg <- round(fit_cg$loadings[ 1:21,], 2)
cor_load_cg <- cor(factor_loadings_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_load_cg
##            Factor1    Factor2
## Factor1  1.0000000 -0.7567962
## Factor2 -0.7567962  1.0000000

Factor score correlations - adult/caregiver

factor_scores_cg <- as.data.frame(factanal.none_cg$scores)
cor_score_cg <- cor(factor_scores_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_cg
##              Factor1      Factor2
## Factor1 1.000000e+00 4.814896e-16
## Factor2 4.814896e-16 1.000000e+00

Selecting data and Appending scores to full file for analysis - adult/caregiver

fac_cg_t4 %>%
  select(arrestT4) -> arrest_var_cg
fac_score_arrest_cg <- cbind(arrest_var_cg, factor_scores_cg)

Correlations between factor scores, including the outcome variable, arrest - adult/caregiver

cor_score_arrest_cg <- cor(fac_score_arrest_cg, use = "pairwise.complete.obs") #including relevant data for analysis
cor_score_arrest_cg
##           arrestT4      Factor1      Factor2
## arrestT4 1.0000000 3.127920e-02 1.284758e-01
## Factor1  0.0312792 1.000000e+00 4.814896e-16
## Factor2  0.1284758 4.814896e-16 1.000000e+00

Logit Model with factor scores as independent variables and arrest as the outcome - adult/caregiver

In these results, we can see in the coefficients and ORs below that there are no factors associated with arrest among the caregiver group. That seems like a relevant outcome.

##recoding arrest variable
fac_score_arrest_cg$arrest_bin[fac_score_arrest_cg$arrestT4 == 1] <- "0"
fac_score_arrest_cg$arrest_bin[fac_score_arrest_cg$arrestT4 == 2] <- "1"

fac_mod_cg <- glm(as.numeric(arrest_bin) ~ Factor1 + Factor2, data = fac_score_arrest_cg, family = "binomial")
summ(fac_mod_cg, digits = 3)
## MODEL INFO:
## Observations: 157
## Dependent Variable: as.numeric(arrest_bin)
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## χ²(2) = 2.709, p = 0.258
## Pseudo-R² (Cragg-Uhler) = 0.027
## Pseudo-R² (McFadden) = 0.017
## AIC = 159.301, BIC = 168.470 
## 
## Standard errors: MLE
## ---------------------------------------------------
##                       Est.    S.E.   z val.       p
## ----------------- -------- ------- -------- -------
## (Intercept)         -1.434   0.206   -6.966   0.000
## Factor1              0.079   0.211    0.372   0.710
## Factor2              0.362   0.228    1.590   0.112
## ---------------------------------------------------

Odds ratios and 95% CIs - these are simply exponentiated coefficients from above with CIs - adult_caregiver

exp(cbind(OR = coef(fac_mod_cg), confint(fac_mod_cg)))
## Waiting for profiling to be done...
##                    OR     2.5 %    97.5 %
## (Intercept) 0.2382879 0.1560396 0.3510328
## Factor1     1.0818597 0.7143731 1.6450557
## Factor2     1.4362186 0.9214228 2.2623396