#Began by setting working directory, creating a chunk, and reading in data.
data <- read.csv("Data_HW1.csv")
#Installing necessary packages and loading libraries that I'll need
#install.packages("lavaan")
#install.packages("semTools")
#install.packages("semPlot")
library(psych)
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
library(semTools)
##
## ###############################################################################
## This is semTools 0.5-6
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
##
## Attaching package: 'semTools'
## The following objects are masked from 'package:psych':
##
## reliability, skew
library(semPlot)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ tidyr 1.2.1 ✔ forcats 0.5.2
## ✔ readr 2.1.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ readr::clipboard() masks semTools::clipboard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#Calculating descriptives for my data set, beginning with the gender of my participants. I'll have (1=male, 2=female)
Gender <- factor(data$sexrsp, levels = 1:2, labels = c("Male", "Female"))
summary(Gender)
## Male Female
## 4637 5051
#gender is fairly well distributed, slightly more females.
#Next I'll go through and look at the age of my participants. Using the mutate function to make a new column because the age data is #only provided within the data by birth year
data <- mutate(data, Age = 103-(data$byear))
psych::describe(data$Age)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 9688 64.16 0.51 64 64.1 0 63 66 3 1.1 2.71 0.01
#Now I'll look at the education levels of my participants.
library(tidyverse) #needed to change the code
data$educ92 <- as.numeric(data$educ92)
psych::describe(data$educ92)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8452 13.61 2.26 12 13.21 0 12 21 9 1.14 0.04 0.02
#this looks a bit deceptive because the SD is 2.26. Might be better off reporting the median.
#And finally I'll look at the nationality data. Beginning with nationality on the father's side. I know this variable was coded with a #ton of options from reviewing the codebook. Beginning here by checking to see how R #coded the variable.
glimpse(data$ge096fa)
## int [1:9688] NA 14 4 NA 2 14 14 NA 14 14 ...
#R coded this as an interval, so I'll change it to a factor since I know this variable has a ton of levels to it
data$ge096fa <- as.factor(data$ge096fa)
summary(data$ge096fa)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 633 108 52 463 549 166 128 182 6 82 81 231 4 3417 474 202
## 17 18 19 20 22 23 24 25 26 27 28 29 30 31 32 33
## 43 21 47 4 20 10 91 3 8 21 55 2 1 1 1 1
## 34 35 36 37 38 39 40 41 42 NA's
## 1 5 16 11 20 9 16 11 4 2488
#Repeating this same process for the nationality on the mother's side
glimpse(data$ge095ma)
## int [1:9688] NA 4 1 NA 41 14 4 NA 14 14 ...
data$ge095ma <- as.factor(data$ge095ma)
summary(data$ge095ma)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 622 91 38 495 554 151 104 168 28 75 81 218 14 3106 527 192
## 17 18 19 20 22 23 24 25 26 27 28 30 32 33 35 36
## 48 31 59 3 50 7 80 5 8 32 36 1 1 1 2 16
## 37 38 39 40 41 42 43 44 46 47 NA's
## 8 58 26 27 32 11 3 1 1 1 2676
#Now that the descriptives are done I'll truncate to my identity items only
dataT <- data[grep("in501rer$", colnames(data)):grep("in507rer$", colnames(data))]
#Renaming my variables so I can tell which is which when I'm analyzing the outputs
dataT <- dataT %>%
rename(ID_work = in501rer,
ID_relig = in502rer,
ID_fam = in503rer,
ID_volunt = in504rer,
ID_union = in505rer,
ID_political = in506rer,
ID_nation = in507rer)
#Creating a correlation matrix
cor_matrix <- cor(dataT[, c("ID_work", "ID_relig", "ID_fam", "ID_volunt", "ID_union", "ID_political", "ID_nation")], use = "complete", method = "pearson")
print(cor_matrix)
## ID_work ID_relig ID_fam ID_volunt ID_union ID_political
## ID_work 1.0000000 0.2792627 0.3178501 0.3187617 0.3245953 0.2274303
## ID_relig 0.2792627 1.0000000 0.3957867 0.4068193 0.3168857 0.2216130
## ID_fam 0.3178501 0.3957867 1.0000000 0.3038079 0.2239655 0.1543646
## ID_volunt 0.3187617 0.4068193 0.3038079 1.0000000 0.6495276 0.3410393
## ID_union 0.3245953 0.3168857 0.2239655 0.6495276 1.0000000 0.4479959
## ID_political 0.2274303 0.2216130 0.1543646 0.3410393 0.4479959 1.0000000
## ID_nation 0.2614723 0.2816966 0.2044126 0.3231119 0.3972244 0.5039401
## ID_nation
## ID_work 0.2614723
## ID_relig 0.2816966
## ID_fam 0.2044126
## ID_volunt 0.3231119
## ID_union 0.3972244
## ID_political 0.5039401
## ID_nation 1.0000000
#What jumps out right way is that volunteerism and union/group membership have the strongest correlation of this #matrix with a .649 #correlation, which makes sense as I would theoretically expect those identity elements to share similarity with one another. I'm also #seeing that nationality/ethnicity and political identity are correlating more strongly, and then there's a weaker but consistent #correlation relationship with #religion/family/volunteerism. Interestingly, I'm seeing political identity and family correlate the #weakest.
#Creating a covariance matrix
cov_matrix <- cov(dataT[, c("ID_work", "ID_relig", "ID_fam", "ID_volunt", "ID_union", "ID_political", "ID_nation")] ,use = "complete")
print(cov_matrix)
## ID_work ID_relig ID_fam ID_volunt ID_union ID_political
## ID_work 3.4519497 1.0836549 0.8668438 1.0220569 1.0973510 0.7619236
## ID_relig 1.0836549 4.3620590 1.2133700 1.4663040 1.2042580 0.8345877
## ID_fam 0.8668438 1.2133700 2.1546295 0.7695947 0.5981891 0.4085685
## ID_volunt 1.0220569 1.4663040 0.7695947 2.9781975 2.0396021 1.0612363
## ID_union 1.0973510 1.2042580 0.5981891 2.0396021 3.3108706 1.4698602
## ID_political 0.7619236 0.8345877 0.4085685 1.0612363 1.4698602 3.2513359
## ID_nation 0.9173476 1.1109728 0.5665912 1.0529450 1.3648441 1.7158757
## ID_nation
## ID_work 0.9173476
## ID_relig 1.1109728
## ID_fam 0.5665912
## ID_volunt 1.0529450
## ID_union 1.3648441
## ID_political 1.7158757
## ID_nation 3.5657587
#What sticks out to me here is that nationality/ethnicity and political identity are co-varying together more
#strongly than most of the the others. I saw a correlation relationship with those two variables in the correlation matrix as well. #The .64 correlation I saw above is also standing out here, between volunteerism and
#union/group membership, showing that these two variables also co-vary together.
#Now I'm going to create a null model syntax for my variables
model0 <- '
ID_work ~~ ID_work
ID_relig ~~ ID_relig
ID_fam ~~ ID_fam
ID_volunt ~~ ID_volunt
ID_union ~~ ID_union
ID_political ~~ ID_political
ID_nation ~~ ID_nation'
#Fitting the null model
fit0 <- cfa(model0, data = dataT)
#view summary
summary(fit0, standard = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 9 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 7
##
## Used Total
## Number of observations 6312 9688
##
## Model Test User Model:
##
## Test statistic 11105.204
## Degrees of freedom 21
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 11105.204
## Degrees of freedom 21
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.000
## Tucker-Lewis Index (TLI) -0.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -88628.385
## Loglikelihood unrestricted model (H1) -83075.783
##
## Akaike (AIC) 177270.770
## Bayesian (BIC) 177318.022
## Sample-size adjusted Bayesian (SABIC) 177295.778
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.289
## 90 Percent confidence interval - lower 0.285
## 90 Percent confidence interval - upper 0.294
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.300
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ID_work 3.451 0.061 56.178 0.000 3.451 1.000
## ID_relig 4.361 0.078 56.178 0.000 4.361 1.000
## ID_fam 2.154 0.038 56.178 0.000 2.154 1.000
## ID_volunt 2.978 0.053 56.178 0.000 2.978 1.000
## ID_union 3.310 0.059 56.178 0.000 3.310 1.000
## ID_political 3.251 0.058 56.178 0.000 3.251 1.000
## ID_nation 3.565 0.063 56.178 0.000 3.565 1.000
#This is what I'll use to compare against my other models. I suspect that my null model will not be my best
#fitting model, as it does not fit well.
#Installing and loading GAIPE
#install.packages("GAIPE")
library(GAIPE)
#Putting in the values for my Null model so that I can find my confidence intervals
rmsea <- 0.2894
df <- 21
N <- 6312
clevel <- 0.90
# Calculate the CI for the Null model RMSEA
ci <- CI.RMSEA(rmsea, df, N, clevel)
ci
## $Lower.CI
## [1] 0.2848927
##
## $RMSEA
## [1] 0.2894
##
## $Upper.CI
## [1] 0.2939333
#Now I'm going to build my two factor model. First, I'll write the model syntax. What this does is allow me to
#define my latent variables. F1 is going to represent my variables that are more intrinsically salient. And then
#F2 is going to represent my variables that are more externally salient.
CFA_ID<-
'F1 = ~ ID_work + ID_relig + ID_fam
F2 = ~ ID_volunt + ID_union + ID_political + ID_nation'
#Now I'm going to fit the model, and plot it
CFA_IDfit <- cfa(CFA_ID, data = dataT)
summary(CFA_IDfit, standard = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 37 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 15
##
## Used Total
## Number of observations 6312 9688
##
## Model Test User Model:
##
## Test statistic 1283.951
## Degrees of freedom 13
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 11105.204
## Degrees of freedom 21
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.885
## Tucker-Lewis Index (TLI) 0.815
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -83717.758
## Loglikelihood unrestricted model (H1) -83075.783
##
## Akaike (AIC) 167465.517
## Bayesian (BIC) 167566.770
## Sample-size adjusted Bayesian (SABIC) 167519.104
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.124
## 90 Percent confidence interval - lower 0.119
## 90 Percent confidence interval - upper 0.130
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.058
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 =~
## ID_work 1.000 0.984 0.530
## ID_relig 1.352 0.047 28.747 0.000 1.330 0.637
## ID_fam 0.839 0.030 27.522 0.000 0.825 0.562
## F2 =~
## ID_volunt 1.000 1.315 0.762
## ID_union 1.122 0.021 52.958 0.000 1.475 0.811
## ID_political 0.754 0.019 39.452 0.000 0.991 0.550
## ID_nation 0.755 0.020 37.750 0.000 0.992 0.526
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 ~~
## F2 0.896 0.035 25.726 0.000 0.693 0.693
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ID_work 2.484 0.054 45.658 0.000 2.484 0.720
## .ID_relig 2.594 0.069 37.437 0.000 2.594 0.595
## .ID_fam 1.474 0.034 43.616 0.000 1.474 0.684
## .ID_volunt 1.249 0.034 36.875 0.000 1.249 0.419
## .ID_union 1.134 0.037 30.436 0.000 1.134 0.342
## .ID_political 2.269 0.045 50.640 0.000 2.269 0.698
## .ID_nation 2.580 0.050 51.314 0.000 2.580 0.724
## F1 0.968 0.053 18.210 0.000 1.000 1.000
## F2 1.729 0.054 31.734 0.000 1.000 1.000
semPaths(CFA_IDfit)

# Then I'm running additional code to find my factor pattern and factor correlations. My $lambda is showing me
#my pathways
inspect(cfa(CFA_IDfit, dataT), what = "std")
## $lambda
## F1 F2
## ID_work 0.530 0.000
## ID_relig 0.637 0.000
## ID_fam 0.562 0.000
## ID_volunt 0.000 0.762
## ID_union 0.000 0.811
## ID_political 0.000 0.550
## ID_nation 0.000 0.526
##
## $theta
## ID_wrk ID_rlg ID_fam ID_vln ID_unn ID_plt ID_ntn
## ID_work 0.720
## ID_relig 0.000 0.595
## ID_fam 0.000 0.000 0.684
## ID_volunt 0.000 0.000 0.000 0.419
## ID_union 0.000 0.000 0.000 0.000 0.342
## ID_political 0.000 0.000 0.000 0.000 0.000 0.698
## ID_nation 0.000 0.000 0.000 0.000 0.000 0.000 0.724
##
## $psi
## F1 F2
## F1 1.000
## F2 0.693 1.000
#right away it looks like my F2 might be better served being split between another latent variable, volunt/union hang better together #and political/nation hang better together.
#Building my one factor model. Re-writing the model syntax. F3 is going to see if identity is better explained
#by these variables within a single factor model
CFA_ID2<-
'F3 = ~ ID_work + ID_relig + ID_fam + ID_volunt + ID_union + ID_political + ID_nation'
#Now I'm going to fit the model, and plot it
CFA_ID2fit <- cfa(CFA_ID2, data = dataT)
summary(CFA_ID2fit, standard = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 33 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 14
##
## Used Total
## Number of observations 6312 9688
##
## Model Test User Model:
##
## Test statistic 1858.735
## Degrees of freedom 14
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 11105.204
## Degrees of freedom 21
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.834
## Tucker-Lewis Index (TLI) 0.750
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -84005.150
## Loglikelihood unrestricted model (H1) -83075.783
##
## Akaike (AIC) 168038.301
## Bayesian (BIC) 168132.804
## Sample-size adjusted Bayesian (SABIC) 168088.315
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.144
## 90 Percent confidence interval - lower 0.139
## 90 Percent confidence interval - upper 0.150
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.072
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F3 =~
## ID_work 1.000 0.840 0.452
## ID_relig 1.241 0.046 26.990 0.000 1.042 0.499
## ID_fam 0.689 0.030 23.317 0.000 0.579 0.394
## ID_volunt 1.563 0.048 32.499 0.000 1.312 0.760
## ID_union 1.693 0.052 32.716 0.000 1.421 0.781
## ID_political 1.170 0.041 28.303 0.000 0.982 0.545
## ID_nation 1.198 0.043 27.973 0.000 1.006 0.533
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ID_work 2.746 0.052 52.954 0.000 2.746 0.796
## .ID_relig 3.276 0.063 51.999 0.000 3.276 0.751
## .ID_fam 1.820 0.034 53.875 0.000 1.820 0.845
## .ID_volunt 1.256 0.033 38.064 0.000 1.256 0.422
## .ID_union 1.291 0.036 35.663 0.000 1.291 0.390
## .ID_political 2.286 0.045 50.833 0.000 2.286 0.703
## .ID_nation 2.554 0.050 51.167 0.000 2.554 0.716
## F3 0.705 0.041 17.178 0.000 1.000 1.000
semPaths(CFA_ID2fit)

#I can see right away that my fit is worse here. My RMSEA increased to 0.144 and my CLI and TLI dropped
#Running additional code to find my factor pattern and factor correlations.
inspect(cfa(CFA_ID2fit, dataT), what = "std")
## $lambda
## F3
## ID_work 0.452
## ID_relig 0.499
## ID_fam 0.394
## ID_volunt 0.760
## ID_union 0.781
## ID_political 0.545
## ID_nation 0.533
##
## $theta
## ID_wrk ID_rlg ID_fam ID_vln ID_unn ID_plt ID_ntn
## ID_work 0.796
## ID_relig 0.000 0.751
## ID_fam 0.000 0.000 0.845
## ID_volunt 0.000 0.000 0.000 0.422
## ID_union 0.000 0.000 0.000 0.000 0.390
## ID_political 0.000 0.000 0.000 0.000 0.000 0.703
## ID_nation 0.000 0.000 0.000 0.000 0.000 0.000 0.716
##
## $psi
## F3
## F3 1
#Here the union variable looks like it's loading quite high.
#Creating an alternate drop model and dropping nationality/ethnicity from my model completely. This variable had a
#strong correlation with political identity in my matrices.
Alt_Model <-
'F1 = ~ ID_ work + ID_relig + ID_fam
F2 = ~ + ID_volunt + ID_union + ID_political
'
# Fitting my alternative model and plotting it
Alt_Model_fit <- cfa((Alt_Model), data = dataT)
summary(Alt_Model_fit, standard = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 39 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Used Total
## Number of observations 6318 9688
##
## Model Test User Model:
##
## Test statistic 424.107
## Degrees of freedom 8
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 8723.780
## Degrees of freedom 15
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.952
## Tucker-Lewis Index (TLI) 0.910
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -71589.550
## Loglikelihood unrestricted model (H1) -71377.496
##
## Akaike (AIC) 143205.100
## Bayesian (BIC) 143292.865
## Sample-size adjusted Bayesian (SABIC) 143251.554
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.091
## 90 Percent confidence interval - lower 0.083
## 90 Percent confidence interval - upper 0.098
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.992
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.037
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 =~
## ID_work 1.000 0.972 0.523
## ID_relig 1.374 0.048 28.351 0.000 1.335 0.639
## ID_fam 0.856 0.031 27.289 0.000 0.832 0.567
## F2 =~
## ID_volunt 1.000 1.379 0.799
## ID_union 1.073 0.022 49.246 0.000 1.479 0.813
## ID_political 0.647 0.018 35.501 0.000 0.891 0.494
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 ~~
## F2 0.909 0.036 25.328 0.000 0.678 0.678
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ID_work 2.505 0.055 45.843 0.000 2.505 0.726
## .ID_relig 2.580 0.070 36.887 0.000 2.580 0.591
## .ID_fam 1.462 0.034 43.064 0.000 1.462 0.679
## .ID_volunt 1.079 0.037 28.797 0.000 1.079 0.362
## .ID_union 1.125 0.042 26.776 0.000 1.125 0.340
## .ID_political 2.460 0.047 51.978 0.000 2.460 0.756
## F1 0.945 0.053 17.919 0.000 1.000 1.000
## F2 1.901 0.059 32.231 0.000 1.000 1.000
semPaths(Alt_Model_fit)

#Immediately see an improved fit! RMSEA fell to 0.091
#Running additional code to find my factor pattern and factor correlations.
inspect(cfa(Alt_Model_fit, dataT), what = "std")
## $lambda
## F1 F2
## ID_work 0.523 0.000
## ID_relig 0.639 0.000
## ID_fam 0.567 0.000
## ID_volunt 0.000 0.799
## ID_union 0.000 0.813
## ID_political 0.000 0.494
##
## $theta
## ID_wrk ID_rlg ID_fam ID_vln ID_unn ID_plt
## ID_work 0.726
## ID_relig 0.000 0.591
## ID_fam 0.000 0.000 0.679
## ID_volunt 0.000 0.000 0.000 0.362
## ID_union 0.000 0.000 0.000 0.000 0.340
## ID_political 0.000 0.000 0.000 0.000 0.000 0.756
##
## $psi
## F1 F2
## F1 1.000
## F2 0.678 1.000
#union is still sticking out
#Expected var/cov matrix for my alternative model with ethnicity/nationality dropped
SMat_Alt_Model <- fitted(Alt_Model_fit)
SMat_Alt_Model
## $cov
## ID_wrk ID_rlg ID_fam ID_vln ID_unn ID_plt
## ID_work 3.449
## ID_relig 1.298 4.364
## ID_fam 0.809 1.111 2.154
## ID_volunt 0.909 1.249 0.778 2.980
## ID_union 0.975 1.339 0.835 2.039 3.312
## ID_political 0.588 0.807 0.503 1.229 1.318 3.254
#I'm still seeing that union and volunteerism co-vary strongly together
#Residual matrix for my alternative model with ethnicity/nationality dropped
SMat_Alt_Model <- resid(Alt_Model_fit)
SMat_Alt_Model
## $type
## [1] "raw"
##
## $cov
## ID_wrk ID_rlg ID_fam ID_vln ID_unn ID_plt
## ID_work 0.000
## ID_relig -0.215 0.000
## ID_fam 0.057 0.104 0.000
## ID_volunt 0.113 0.220 -0.008 0.000
## ID_union 0.123 -0.135 -0.237 0.002 0.000
## ID_political 0.175 0.030 -0.092 -0.165 0.152 0.000
#And here they stick out to me again, the residual is at 0.002 for volunteer and union, which would typically be good, but I'm #suspicious of it because these two variable have shown strong covariance and correlation throughout
#I want to test to see what happens if I drop union/group membership as well from my model
Alt_Model2 <-
'F1 = ~ ID_work + ID_relig + ID_fam
F2 = ~ + ID_volunt + ID_political
'
# Fitting my second alternative model and plotting it
Alt_Model2_fit <- cfa((Alt_Model2), data = dataT)
summary(Alt_Model2_fit, standard = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 40 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Used Total
## Number of observations 6370 9688
##
## Model Test User Model:
##
## Test statistic 133.487
## Degrees of freedom 4
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 4521.566
## Degrees of freedom 10
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.971
## Tucker-Lewis Index (TLI) 0.928
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -61326.441
## Loglikelihood unrestricted model (H1) -61259.697
##
## Akaike (AIC) 122674.881
## Bayesian (BIC) 122749.234
## Sample-size adjusted Bayesian (SABIC) 122714.279
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.071
## 90 Percent confidence interval - lower 0.061
## 90 Percent confidence interval - upper 0.082
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.090
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.027
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 =~
## ID_work 1.000 0.932 0.502
## ID_relig 1.467 0.052 28.173 0.000 1.368 0.655
## ID_fam 0.903 0.033 27.125 0.000 0.842 0.574
## F2 =~
## ID_volunt 1.000 1.324 0.767
## ID_political 0.608 0.027 22.232 0.000 0.804 0.445
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F1 ~~
## F2 0.958 0.037 25.612 0.000 0.776 0.776
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ID_work 2.582 0.054 47.424 0.000 2.582 0.748
## .ID_relig 2.491 0.069 35.846 0.000 2.491 0.571
## .ID_fam 1.444 0.034 43.070 0.000 1.444 0.671
## .ID_volunt 1.227 0.076 16.166 0.000 1.227 0.412
## .ID_political 2.622 0.054 48.865 0.000 2.622 0.802
## F1 0.869 0.050 17.327 0.000 1.000 1.000
## F2 1.752 0.087 20.096 0.000 1.000 1.000
semPaths(Alt_Model2_fit)

#Even better! RMSEA is now 0.071 and my CFI is at 0.971 with TLI not too far behind at 0.928.
#Running my final factor pattern and factor correlations.
inspect(cfa(Alt_Model2_fit, dataT), what = "std")
## $lambda
## F1 F2
## ID_work 0.502 0.000
## ID_relig 0.655 0.000
## ID_fam 0.574 0.000
## ID_volunt 0.000 0.767
## ID_political 0.000 0.445
##
## $theta
## ID_wrk ID_rlg ID_fam ID_vln ID_plt
## ID_work 0.748
## ID_relig 0.000 0.571
## ID_fam 0.000 0.000 0.671
## ID_volunt 0.000 0.000 0.000 0.412
## ID_political 0.000 0.000 0.000 0.000 0.802
##
## $psi
## F1 F2
## F1 1.000
## F2 0.776 1.000
#I like how my variables are hanging for F1, they look okay. But the variables for F2 are even more spread apart now. This is still not the ideal model for this data.