PSYC 214 HW#1

library(psych)
library(lavaan)

## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.

## 
## Attaching package: 'lavaan'

## The following object is masked from 'package:psych':
## 
##     cor2cov

library(semTools)

##

## ###############################################################################

## This is semTools 0.5-6

## All users of R (or SEM) are invited to submit functions or ideas for functions.

## ###############################################################################

## 
## Attaching package: 'semTools'

## The following objects are masked from 'package:psych':
## 
##     reliability, skew

library(semPlot)
library(naniar)
library(dplyr) #loading packages

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

PersonalIdentity<-read.csv("WLS_HW1_CFA_rev23.csv",header=T,sep=",") #reading in data file

PI1 <- dplyr::select(PersonalIdentity, idpub, sexrsp, byear, educ92, natfth, ge096fa, ge095ma, in501rer, in502rer, in503rer, in504rer, in505rer, in506rer, in507rer) #subsetting variables of interest

vis_miss(PI1) #viewing missingness. There does seem to be quite a bit of missingess; however, missing data will be excluded from analyses.

## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
##   Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.

describe(PI1) #finding descriptives of variables of interest. Females make up about half of the sample. Participants' age is reaching emerging adulthood. Mean number of years of education based on highest degree earned was 13.61. Majority of participants' parental nationality were predominately white.Skewness and kurtosis do not seem to be an issue, they are all acceptable. Data seem to meet normality assumptions

##          vars    n      mean      sd   median   trimmed      mad    min    max
## idpub       1 9688 916831.23 9803.71 916803.5 916798.82 12724.41 900021 933957
## sexrsp      2 9688      1.52    0.50      2.0      1.53     0.00      1      2
## byear       3 9688     38.84    0.51     39.0     38.90     0.00     37     40
## educ92      4 8452     13.61    2.26     12.0     13.21     0.00     12     21
## natfth      5 9410     11.61    6.35     14.0     11.58     1.48      1     41
## ge096fa     6 7200     11.50    6.33     14.0     11.50     1.48      1     42
## ge095ma     7 7012     11.85    7.15     14.0     11.51     1.48      1     47
## in501rer    8 6508      4.92    1.86      5.0      5.15     1.48      1      7
## in502rer    9 6628      4.69    2.09      5.0      4.86     2.97      1      7
## in503rer   10 6581      5.84    1.47      6.0      6.12     1.48      1      7
## in504rer   11 6574      3.83    1.73      4.0      3.83     1.48      1      7
## in505rer   12 6489      3.27    1.82      3.0      3.15     2.97      1      7
## in506rer   13 6569      3.17    1.81      3.0      3.04     1.48      1      7
## in507rer   14 6573      3.18    1.90      3.0      3.02     2.97      1      7
##          range  skew kurtosis    se
## idpub    33936  0.02    -1.21 99.60
## sexrsp       1 -0.09    -1.99  0.01
## byear        3 -1.10     2.71  0.01
## educ92       9  1.14     0.04  0.02
## natfth      40  0.75     3.13  0.07
## ge096fa     41  0.72     3.13  0.07
## ge095ma     46  1.20     3.96  0.09
## in501rer     6 -0.82    -0.36  0.02
## in502rer     6 -0.50    -1.09  0.03
## in503rer     6 -1.55     2.04  0.02
## in504rer     6 -0.02    -0.87  0.02
## in505rer     6  0.33    -0.98  0.02
## in506rer     6  0.39    -0.93  0.02
## in507rer     6  0.43    -0.96  0.02

Sex <- factor(PI1$sexrsp, levels = 1:2, labels = c("Male", "Female")) #turning sex into a factor
summary(Sex) #getting summary/frequency of sex

##   Male Female 
##   4637   5051

var(PI1$sexrsp)

## [1] 0.2495692

var(PI1$byear)

## [1] 0.2566144

var(PI1$in501rer, na.rm=TRUE)

## [1] 3.463347

var(PI1$in502rer, na.rm=TRUE)

## [1] 4.349543

var(PI1$in503rer, na.rm=TRUE)

## [1] 2.14743

var(PI1$in504rer, na.rm=TRUE)

## [1] 2.989324

var(PI1$in505rer, na.rm=TRUE)

## [1] 3.323253

var(PI1$in506rer, na.rm=TRUE)

## [1] 3.280475

var(PI1$in507rer, na.rm=TRUE) #finding variances of participants' sex, birth year, and identity measures. All variances do not warrant concern.

## [1] 3.593664

cor_matrix <- cor(PI1[, c("sexrsp", "byear", "educ92", "ge096fa", "ge095ma", "in501rer", "in502rer", "in503rer", "in504rer", "in505rer", "in506rer", "in507rer")],use = "complete.obs")
print(cor_matrix) #correlation matrix. No identity item shared a coefficient large enough to warrant concern for multicollinearity. The relationships between demographic variables and identity variables are much weaker than the identity variables with themselves.

##                sexrsp         byear       educ92      ge096fa      ge095ma
## sexrsp    1.000000000  0.1138806444 -0.162533776  0.026662324  0.003370268
## byear     0.113880644  1.0000000000  0.128222962 -0.016097271 -0.004332427
## educ92   -0.162533776  0.1282229615  1.000000000 -0.050767992 -0.033842045
## ge096fa   0.026662324 -0.0160972708 -0.050767992  1.000000000  0.254314241
## ge095ma   0.003370268 -0.0043324272 -0.033842045  0.254314241  1.000000000
## in501rer -0.068227346 -0.0024026596  0.057349085  0.004723738 -0.009813353
## in502rer  0.190242593 -0.0026990695 -0.136521597  0.053316114  0.010900563
## in503rer  0.059791596  0.0002852176 -0.041904767  0.029809285  0.018775213
## in504rer  0.048198020  0.0072050635  0.001453911  0.008161050 -0.003042774
## in505rer -0.021504377 -0.0326697638 -0.031779412  0.012461162 -0.004688384
## in506rer -0.050525169 -0.0322035240  0.055836301  0.020349392 -0.011474062
## in507rer -0.017799725 -0.0521535454 -0.036304601  0.050904857  0.051532626
##              in501rer    in502rer      in503rer     in504rer     in505rer
## sexrsp   -0.068227346  0.19024259  0.0597915957  0.048198020 -0.021504377
## byear    -0.002402660 -0.00269907  0.0002852176  0.007205063 -0.032669764
## educ92    0.057349085 -0.13652160 -0.0419047667  0.001453911 -0.031779412
## ge096fa   0.004723738  0.05331611  0.0298092850  0.008161050  0.012461162
## ge095ma  -0.009813353  0.01090056  0.0187752133 -0.003042774 -0.004688384
## in501rer  1.000000000  0.26810466  0.3062585032  0.310688270  0.314206072
## in502rer  0.268104663  1.00000000  0.3944631997  0.405896025  0.314786385
## in503rer  0.306258503  0.39446320  1.0000000000  0.303159717  0.222182602
## in504rer  0.310688270  0.40589602  0.3031597167  1.000000000  0.644023379
## in505rer  0.314206072  0.31478638  0.2221826021  0.644023379  1.000000000
## in506rer  0.223421528  0.21119632  0.1504190754  0.334008047  0.435272824
## in507rer  0.255355382  0.27594593  0.1938785053  0.311975932  0.384454068
##             in506rer    in507rer
## sexrsp   -0.05052517 -0.01779972
## byear    -0.03220352 -0.05215355
## educ92    0.05583630 -0.03630460
## ge096fa   0.02034939  0.05090486
## ge095ma  -0.01147406  0.05153263
## in501rer  0.22342153  0.25535538
## in502rer  0.21119632  0.27594593
## in503rer  0.15041908  0.19387851
## in504rer  0.33400805  0.31197593
## in505rer  0.43527282  0.38445407
## in506rer  1.00000000  0.49664345
## in507rer  0.49664345  1.00000000

cov_matrix <- cov(PI1[, c("sexrsp", "byear", "educ92", "ge096fa", "ge095ma","in501rer", "in502rer", "in503rer", "in504rer", "in505rer", "in506rer", "in507rer")],use = "complete.obs")
print(cov_matrix) #covariance matrix. Matrix appears normal, no causes of concern for identity items.

##               sexrsp         byear       educ92     ge096fa     ge095ma
## sexrsp    0.24910749  0.0276323242 -0.187901266  0.08318199  0.01197815
## byear     0.02763232  0.2363459807  0.144388500 -0.04891750 -0.01499814
## educ92   -0.18790127  0.1443885003  5.365193422 -0.73505631 -0.55818889
## ge096fa   0.08318199 -0.0489175041 -0.735056307 39.07291932 11.31984594
## ge095ma   0.01197815 -0.0149981361 -0.558188892 11.31984594 50.70652052
## in501rer -0.06271171 -0.0021511125  0.244633518  0.05437766 -0.12869038
## in502rer  0.19750059 -0.0027293262 -0.657750688  0.69320818  0.16145374
## in503rer  0.04326092  0.0002010077 -0.140707857  0.27011688  0.19381126
## in504rer  0.04125482  0.0060070883  0.005775405  0.08748550 -0.03715812
## in505rer -0.01949464 -0.0288479544 -0.133700635  0.14147881 -0.06063871
## in506rer -0.04532184 -0.0281373917  0.232442566  0.22861025 -0.14684374
## in507rer -0.01669194 -0.0476385041 -0.157999071  0.59785700  0.68946836
##              in501rer     in502rer      in503rer     in504rer    in505rer
## sexrsp   -0.062711710  0.197500594  0.0432609212  0.041254819 -0.01949464
## byear    -0.002151113 -0.002729326  0.0002010077  0.006007088 -0.02884795
## educ92    0.244633518 -0.657750688 -0.1407078574  0.005775405 -0.13370063
## ge096fa   0.054377663  0.693208177  0.2701168768  0.087485500  0.14147881
## ge095ma  -0.128690379  0.161453737  0.1938112643 -0.037158121 -0.06063871
## in501rer  3.391513443  1.026995164  0.8176118131  0.981236568  1.05100858
## in502rer  1.026995164  4.326478205  1.1894220460  1.447885284  1.18926357
## in503rer  0.817611813  1.189422046  2.1014761293  0.753677966  0.58501526
## in504rer  0.981236568  1.447885284  0.7536779665  2.941062081  2.00608184
## in505rer  1.051008580  1.189263573  0.5850152623  2.006081837  3.29905711
## in506rer  0.739482868  0.789514168  0.3918965786  1.029473928  1.42089765
## in507rer  0.883572020  1.078428678  0.5280706883  1.005248317  1.31201725
##             in506rer    in507rer
## sexrsp   -0.04532184 -0.01669194
## byear    -0.02813739 -0.04763850
## educ92    0.23244257 -0.15799907
## ge096fa   0.22861025  0.59785700
## ge095ma  -0.14684374  0.68946836
## in501rer  0.73948287  0.88357202
## in502rer  0.78951417  1.07842868
## in503rer  0.39189658  0.52807069
## in504rer  1.02947393  1.00524832
## in505rer  1.42089765  1.31201725
## in506rer  3.23007535  1.67707006
## in507rer  1.67707006  3.53020834

PI1 <- mutate(PI1, Age = 103-(PI1$byear)) #turning birth year into age
psych::describe(PI1$Age) #getting summary of age. Participants' age is reaching emerging adulthood, this will be useful in highlighting in the intro/discussion sections.

##    vars    n  mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 9688 64.16 0.51     64    64.1   0  63  66     3  1.1     2.71 0.01

glimpse(PI1$ge096fa) #looking at fathers' nationality

##  int [1:9688] NA 14 4 NA 2 14 14 NA 14 14 ...

PI1$ge096fa <- as.factor(PI1$ge096fa) #turning fathers' nationality into a factor
summary(PI1$ge096fa) #looking at summary/frequency of fathers' nationality. Predominately white, non-Hispanic Americans and consisted mainly of German, English, Irish, Scandinavian, Polish, and Czech ancestry.

##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##  633  108   52  463  549  166  128  182    6   82   81  231    4 3417  474  202 
##   17   18   19   20   22   23   24   25   26   27   28   29   30   31   32   33 
##   43   21   47    4   20   10   91    3    8   21   55    2    1    1    1    1 
##   34   35   36   37   38   39   40   41   42 NA's 
##    1    5   16   11   20    9   16   11    4 2488

glimpse(PI1$ge095ma) #looking at mothers' nationality

##  int [1:9688] NA 4 1 NA 41 14 4 NA 14 14 ...

PI1$ge095ma <- as.factor(PI1$ge095ma) #turning mothers' nationality into a factor
summary(PI1$ge095ma) #looking at summary/frequency of mothers' nationality. Predominately white, non-Hispanic Americans and consisted mainly of German, English, Irish, Scandinavian, Polish, and Czech ancestry

##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##  622   91   38  495  554  151  104  168   28   75   81  218   14 3106  527  192 
##   17   18   19   20   22   23   24   25   26   27   28   30   32   33   35   36 
##   48   31   59    3   50    7   80    5    8   32   36    1    1    1    2   16 
##   37   38   39   40   41   42   43   44   46   47 NA's 
##    8   58   26   27   32   11    3    1    1    1 2676

null_model <- "
  in501rer ~~ in501rer
  in502rer ~~ in502rer
  in503rer ~~ in503rer
  in504rer ~~ in504rer
  in505rer ~~ in505rer
  in506rer ~~ in506rer
  in507rer ~~ in507rer"

null_model_fit <- cfa(null_model, data = PI1)
summary(null_model_fit, standard = TRUE, fit.measures = TRUE)

## lavaan 0.6.15 ended normally after 9 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##                                                   Used       Total
##   Number of observations                          6312        9688
## 
## Model Test User Model:
##                                                        
##   Test statistic                              11105.204
##   Degrees of freedom                                 21
##   P-value (Chi-square)                            0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11105.204
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.000
##   Tucker-Lewis Index (TLI)                      -0.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -88628.385
##   Loglikelihood unrestricted model (H1)     -83075.783
##                                                       
##   Akaike (AIC)                              177270.770
##   Bayesian (BIC)                            177318.022
##   Sample-size adjusted Bayesian (SABIC)     177295.778
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.289
##   90 Percent confidence interval - lower         0.285
##   90 Percent confidence interval - upper         0.294
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.300
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     in501rer          3.451    0.061   56.178    0.000    3.451    1.000
##     in502rer          4.361    0.078   56.178    0.000    4.361    1.000
##     in503rer          2.154    0.038   56.178    0.000    2.154    1.000
##     in504rer          2.978    0.053   56.178    0.000    2.978    1.000
##     in505rer          3.310    0.059   56.178    0.000    3.310    1.000
##     in506rer          3.251    0.058   56.178    0.000    3.251    1.000
##     in507rer          3.565    0.063   56.178    0.000    3.565    1.000

semPaths(null_model_fit)

anova(null_model_fit) #null/baseline model. Null model had a high χ2 value, thus was uninterpretable. In addition, the corresponding p-value was highly significant. All other fit indices (AIC, BIC, CFI, TLI, RMSEA) were not acceptable. This model is the worst fitting model.

## Chi-Squared Test Statistic (unscaled)
## 
##           Df    AIC    BIC Chisq Chisq diff Df diff Pr(>Chisq)    
## Saturated  0                   0                                  
## Model     21 177271 177318 11105      11105      21  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

inspect(cfa(null_model_fit, PI1), what = "std") #null model factor patterns and correlations

## $lambda
##         
## in501rer
## in502rer
## in503rer
## in504rer
## in505rer
## in506rer
## in507rer
## 
## $theta
##          in501r in502r in503r in504r in505r in506r in507r
## in501rer      1                                          
## in502rer      0      1                                   
## in503rer      0      0      1                            
## in504rer      0      0      0      1                     
## in505rer      0      0      0      0      1              
## in506rer      0      0      0      0      0      1       
## in507rer      0      0      0      0      0      0      1
## 
## $psi
## <0 x 0 matrix>

model_1f <- "
  F1 = ~ in501rer + in502rer + in503rer + in504rer + in505rer + in506rer + in507rer"

model_1f_fit <- cfa(model_1f, data = PI1)
summary(model_1f_fit, standard = TRUE, fit.measures = TRUE)

## lavaan 0.6.15 ended normally after 33 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
## 
##                                                   Used       Total
##   Number of observations                          6312        9688
## 
## Model Test User Model:
##                                                       
##   Test statistic                              1858.735
##   Degrees of freedom                                14
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11105.204
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.834
##   Tucker-Lewis Index (TLI)                       0.750
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -84005.150
##   Loglikelihood unrestricted model (H1)     -83075.783
##                                                       
##   Akaike (AIC)                              168038.301
##   Bayesian (BIC)                            168132.804
##   Sample-size adjusted Bayesian (SABIC)     168088.315
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.144
##   90 Percent confidence interval - lower         0.139
##   90 Percent confidence interval - upper         0.150
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.072
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F1 =~                                                                 
##     in501rer          1.000                               0.840    0.452
##     in502rer          1.241    0.046   26.990    0.000    1.042    0.499
##     in503rer          0.689    0.030   23.317    0.000    0.579    0.394
##     in504rer          1.563    0.048   32.499    0.000    1.312    0.760
##     in505rer          1.693    0.052   32.716    0.000    1.421    0.781
##     in506rer          1.170    0.041   28.303    0.000    0.982    0.545
##     in507rer          1.198    0.043   27.973    0.000    1.006    0.533
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .in501rer          2.746    0.052   52.954    0.000    2.746    0.796
##    .in502rer          3.276    0.063   51.999    0.000    3.276    0.751
##    .in503rer          1.820    0.034   53.875    0.000    1.820    0.845
##    .in504rer          1.256    0.033   38.064    0.000    1.256    0.422
##    .in505rer          1.291    0.036   35.663    0.000    1.291    0.390
##    .in506rer          2.286    0.045   50.833    0.000    2.286    0.703
##    .in507rer          2.554    0.050   51.167    0.000    2.554    0.716
##     F1                0.705    0.041   17.178    0.000    1.000    1.000

semPaths(model_1f_fit)

anova(model_1f_fit) #one-factor model. One-factor model had a high χ2 value, thus was uninterpretable. In addition, the corresponding p-value was highly significant. All other fit indices (AIC, BIC, CFI, TLI, RMSEA) were not acceptable. This model did not fit the data well.

## Chi-Squared Test Statistic (unscaled)
## 
##           Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## Saturated  0                  0.0                                  
## Model     14 168038 168133 1858.7     1858.7      14  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

inspect(cfa(model_1f_fit, PI1), what = "std") #one-factor model factor patterns and correlations

## $lambda
##             F1
## in501rer 0.452
## in502rer 0.499
## in503rer 0.394
## in504rer 0.760
## in505rer 0.781
## in506rer 0.545
## in507rer 0.533
## 
## $theta
##          in501r in502r in503r in504r in505r in506r in507r
## in501rer  0.796                                          
## in502rer  0.000  0.751                                   
## in503rer  0.000  0.000  0.845                            
## in504rer  0.000  0.000  0.000  0.422                     
## in505rer  0.000  0.000  0.000  0.000  0.390              
## in506rer  0.000  0.000  0.000  0.000  0.000  0.703       
## in507rer  0.000  0.000  0.000  0.000  0.000  0.000  0.716
## 
## $psi
##    F1
## F1  1

model_2f <- "
  F1 = ~ in501rer + in502rer + in503rer 
  F2 = ~ in504rer + in505rer + in506rer + in507rer"

model_2f_fit <- cfa(model_2f, data = PI1)
summary(model_2f_fit, standard = TRUE, fit.measures = TRUE)

## lavaan 0.6.15 ended normally after 37 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
## 
##                                                   Used       Total
##   Number of observations                          6312        9688
## 
## Model Test User Model:
##                                                       
##   Test statistic                              1283.951
##   Degrees of freedom                                13
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11105.204
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.885
##   Tucker-Lewis Index (TLI)                       0.815
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -83717.758
##   Loglikelihood unrestricted model (H1)     -83075.783
##                                                       
##   Akaike (AIC)                              167465.517
##   Bayesian (BIC)                            167566.770
##   Sample-size adjusted Bayesian (SABIC)     167519.104
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.124
##   90 Percent confidence interval - lower         0.119
##   90 Percent confidence interval - upper         0.130
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.058
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F1 =~                                                                 
##     in501rer          1.000                               0.984    0.530
##     in502rer          1.352    0.047   28.747    0.000    1.330    0.637
##     in503rer          0.839    0.030   27.522    0.000    0.825    0.562
##   F2 =~                                                                 
##     in504rer          1.000                               1.315    0.762
##     in505rer          1.122    0.021   52.958    0.000    1.475    0.811
##     in506rer          0.754    0.019   39.452    0.000    0.991    0.550
##     in507rer          0.755    0.020   37.750    0.000    0.992    0.526
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F1 ~~                                                                 
##     F2                0.896    0.035   25.726    0.000    0.693    0.693
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .in501rer          2.484    0.054   45.658    0.000    2.484    0.720
##    .in502rer          2.594    0.069   37.437    0.000    2.594    0.595
##    .in503rer          1.474    0.034   43.616    0.000    1.474    0.684
##    .in504rer          1.249    0.034   36.875    0.000    1.249    0.419
##    .in505rer          1.134    0.037   30.436    0.000    1.134    0.342
##    .in506rer          2.269    0.045   50.640    0.000    2.269    0.698
##    .in507rer          2.580    0.050   51.314    0.000    2.580    0.724
##     F1                0.968    0.053   18.210    0.000    1.000    1.000
##     F2                1.729    0.054   31.734    0.000    1.000    1.000

semPaths(model_2f_fit)

anova(model_2f_fit) #two-factor model. Two-factor model had a high χ2 value, thus was uninterpretable. In addition, the corresponding p-value was highly significant. All other fit indices (AIC, BIC, CFI, TLI, RMSEA) were not acceptable. This model did not fit the data well.

## Chi-Squared Test Statistic (unscaled)
## 
##           Df    AIC    BIC Chisq Chisq diff Df diff Pr(>Chisq)    
## Saturated  0                   0                                  
## Model     13 167466 167567  1284       1284      13  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

inspect(cfa(model_2f_fit, PI1), what = "std") #two-factor model factor patterns and correlations.Based on factor pattern, factor structure, and factor correlations, factor one and factor two had a moderate, positive association with one another, r = .69. In addition, volunteer identity and org/group identity contributed the most to factor two. Finally, the variances for volunteer identity and org/group identity were explained the most by the two factors.

## $lambda
##             F1    F2
## in501rer 0.530 0.000
## in502rer 0.637 0.000
## in503rer 0.562 0.000
## in504rer 0.000 0.762
## in505rer 0.000 0.811
## in506rer 0.000 0.550
## in507rer 0.000 0.526
## 
## $theta
##          in501r in502r in503r in504r in505r in506r in507r
## in501rer  0.720                                          
## in502rer  0.000  0.595                                   
## in503rer  0.000  0.000  0.684                            
## in504rer  0.000  0.000  0.000  0.419                     
## in505rer  0.000  0.000  0.000  0.000  0.342              
## in506rer  0.000  0.000  0.000  0.000  0.000  0.698       
## in507rer  0.000  0.000  0.000  0.000  0.000  0.000  0.724
## 
## $psi
##       F1    F2
## F1 1.000      
## F2 0.693 1.000

Model3_2F_UNCOR<-
  'FamWorkRelEth = ~ in501rer + in502rer + in503rer + in507rer
  Altru = ~ in504rer + in505rer + in506rer
  Altru ~~ 0*FamWorkRelEth
' #alternative two-factor model
  
Model3_2F_UNCOR_fit <- cfa(Model3_2F_UNCOR, data = PI1)
summary(Model3_2F_UNCOR_fit, standard = TRUE, fit.measures = TRUE)

## lavaan 0.6.15 ended normally after 42 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
## 
##                                                   Used       Total
##   Number of observations                          6312        9688
## 
## Model Test User Model:
##                                                       
##   Test statistic                              3540.379
##   Degrees of freedom                                14
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11105.204
##   Degrees of freedom                                21
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.682
##   Tucker-Lewis Index (TLI)                       0.523
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -84845.972
##   Loglikelihood unrestricted model (H1)     -83075.783
##                                                       
##   Akaike (AIC)                              169719.945
##   Bayesian (BIC)                            169814.448
##   Sample-size adjusted Bayesian (SABIC)     169769.959
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.200
##   90 Percent confidence interval - lower         0.194
##   90 Percent confidence interval - upper         0.205
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.212
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   FamWorkRelEth =~                                                      
##     in501rer          1.000                               0.936    0.504
##     in502rer          1.399    0.056   25.151    0.000    1.309    0.627
##     in503rer          0.948    0.038   25.143    0.000    0.887    0.604
##     in507rer          0.855    0.040   21.560    0.000    0.800    0.424
##   Altru =~                                                              
##     in504rer          1.000                               1.213    0.703
##     in505rer          1.385    0.039   35.257    0.000    1.681    0.924
##     in506rer          0.721    0.020   35.499    0.000    0.874    0.485
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   FamWorkRelEth ~~                                                      
##     Altru             0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .in501rer          2.576    0.057   44.887    0.000    2.576    0.746
##    .in502rer          2.648    0.078   34.156    0.000    2.648    0.607
##    .in503rer          1.367    0.038   36.439    0.000    1.367    0.635
##    .in507rer          2.925    0.060   49.057    0.000    2.925    0.821
##    .in504rer          1.505    0.046   32.755    0.000    1.505    0.506
##    .in505rer          0.486    0.072    6.734    0.000    0.486    0.147
##    .in506rer          2.486    0.048   51.455    0.000    2.486    0.765
##     FamWorkRelEth     0.876    0.054   16.368    0.000    1.000    1.000
##     Altru             1.472    0.059   24.939    0.000    1.000    1.000

semPaths(Model3_2F_UNCOR_fit)

anova(Model3_2F_UNCOR_fit) #fitting model, plotting. Alternative two-factor model (ethnicity/nationality moved to factor 1 and uncorrelated factor one and two) had a high χ2 value, thus was uninterpretable. In addition, the corresponding p-value was highly significant. All other fit indices (AIC, BIC, CFI, TLI, RMSEA) were not acceptable. This model did not fit the data well.

## Chi-Squared Test Statistic (unscaled)
## 
##           Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## Saturated  0                  0.0                                  
## Model     14 169720 169814 3540.4     3540.4      14  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#The full two-factor model had lower AIC and BIC values than any of the other tested models. In addition, the two-factor model had CFI and TLI values in unacceptable ranges (.885 and .815, respectively). Finally, the two-factor model had an RMSEA at an unacceptable level (.124, CI [.119, .130]). The two-factor model fitted the data the best in comparison to the other tested models; however, this model was still not a good fit to the data.

inspect(cfa(Model3_2F_UNCOR_fit , PI1), what = "std") #alternative two-factor model factor patterns and correlations

## $lambda
##          FmWrRE Altru
## in501rer  0.504 0.000
## in502rer  0.627 0.000
## in503rer  0.604 0.000
## in507rer  0.424 0.000
## in504rer  0.000 0.703
## in505rer  0.000 0.924
## in506rer  0.000 0.485
## 
## $theta
##          in501r in502r in503r in507r in504r in505r in506r
## in501rer  0.746                                          
## in502rer  0.000  0.607                                   
## in503rer  0.000  0.000  0.635                            
## in507rer  0.000  0.000  0.000  0.821                     
## in504rer  0.000  0.000  0.000  0.000  0.506              
## in505rer  0.000  0.000  0.000  0.000  0.000  0.147       
## in506rer  0.000  0.000  0.000  0.000  0.000  0.000  0.765
## 
## $psi
##               FmWrRE Altru
## FamWorkRelEth      1      
## Altru              0     1

sessionInfo()

## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.0.10   naniar_0.6.1   semPlot_1.1.6  semTools_0.5-6 lavaan_0.6-15 
## [6] psych_2.2.5   
## 
## loaded via a namespace (and not attached):
##   [1] nlme_3.1-160        RColorBrewer_1.1-3  mi_1.1             
##   [4] tools_4.2.2         backports_1.4.1     bslib_0.4.0        
##   [7] utf8_1.2.2          R6_2.5.1            rpart_4.1.19       
##  [10] Hmisc_4.7-1         DBI_1.1.3           colorspace_2.0-3   
##  [13] nnet_7.3-18         withr_2.5.0         tidyselect_1.1.2   
##  [16] gridExtra_2.3       mnormt_2.1.0        compiler_4.2.2     
##  [19] fdrtool_1.2.17      qgraph_1.9.4        cli_3.4.0          
##  [22] htmlTable_2.4.1     labeling_0.4.2      sass_0.4.2         
##  [25] scales_1.2.1        checkmate_2.1.0     quadprog_1.5-8     
##  [28] pbapply_1.7-0       sem_3.1-15          stringr_1.4.1      
##  [31] digest_0.6.29       pbivnorm_0.6.0      foreign_0.8-83     
##  [34] minqa_1.2.5         rmarkdown_2.16      base64enc_0.1-3    
##  [37] jpeg_0.1-9          pkgconfig_2.0.3     htmltools_0.5.5    
##  [40] lme4_1.1-30         lisrelToR_0.1.5     highr_0.9          
##  [43] fastmap_1.1.0       htmlwidgets_1.6.2   rlang_1.0.6        
##  [46] rstudioapi_0.14     farver_2.1.1        jquerylib_0.1.4    
##  [49] generics_0.1.3      jsonlite_1.8.0      gtools_3.9.4       
##  [52] zip_2.2.2           magrittr_2.0.3      OpenMx_2.21.8      
##  [55] Formula_1.2-4       interp_1.1-3        Matrix_1.5-1       
##  [58] Rcpp_1.0.9          munsell_0.5.0       fansi_1.0.3        
##  [61] abind_1.4-5         rockchalk_1.8.157   visdat_0.5.3       
##  [64] lifecycle_1.0.3     stringi_1.7.8       yaml_2.3.5         
##  [67] carData_3.0-5       MASS_7.3-58.1       plyr_1.8.7         
##  [70] grid_4.2.2          parallel_4.2.2      deldir_1.0-6       
##  [73] lattice_0.20-45     kutils_1.70         splines_4.2.2      
##  [76] knitr_1.40          pillar_1.8.1        igraph_1.4.2       
##  [79] boot_1.3-28         corpcor_1.6.10      reshape2_1.4.4     
##  [82] stats4_4.2.2        XML_3.99-0.14       glue_1.6.2         
##  [85] evaluate_0.16       latticeExtra_0.6-30 RcppParallel_5.1.7 
##  [88] data.table_1.14.2   png_0.1-7           vctrs_0.5.1        
##  [91] nloptr_2.0.3        tidyr_1.2.1         gtable_0.3.1       
##  [94] purrr_0.3.4         assertthat_0.2.1    cachem_1.0.6       
##  [97] ggplot2_3.4.0       xfun_0.33           openxlsx_4.2.5.1   
## [100] xtable_1.8-4        coda_0.19-4         glasso_1.11        
## [103] survival_3.4-0      tibble_3.1.8        arm_1.13-1         
## [106] cluster_2.1.4       ellipsis_0.3.2

citation("lavaan")

## 
## To cite lavaan in publications use:
## 
##   Yves Rosseel (2012). lavaan: An R Package for Structural Equation
##   Modeling. Journal of Statistical Software, 48(2), 1-36.
##   https://doi.org/10.18637/jss.v048.i02
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{lavaan}: An {R} Package for Structural Equation Modeling},
##     author = {Yves Rosseel},
##     journal = {Journal of Statistical Software},
##     year = {2012},
##     volume = {48},
##     number = {2},
##     pages = {1--36},
##     doi = {10.18637/jss.v048.i02},
##   }

citation("semTools")

## 
## The maintainer and *primary* contributors to this package are listed as
## authors, but this package is a collaborative work. The maintainer(s)
## cannot take credit for others' contributions. Whenever possible, please
## cite the paper(s) associated with the development of a particular
## function (e.g., permuteMeasEq or parcelAllocation) or tutorials about
## how to use them (e.g., probe2WayMC and related functions), which are
## listed in the References section of its associated help page.
## Otherwise, please use the following citation for the package as a
## whole:
## 
##   Jorgensen, T. D., Pornprasertmanit, S., Schoemann, A. M., & Rosseel,
##   Y. (2022). semTools: Useful tools for structural equation modeling. R
##   package version 0.5-6. Retrieved from
##   https://CRAN.R-project.org/package=semTools
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {\texttt{semTools}: {U}seful tools for structural equation modeling},
##     author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel},
##     year = {2022},
##     note = {R package version 0.5-6},
##     url = {https://CRAN.R-project.org/package=semTools},
##   }

citation("semPlot")

## 
## To cite package 'semPlot' in publications use:
## 
##   Epskamp S (2022). _semPlot: Path Diagrams and Visual Analysis of
##   Various SEM Packages' Output_. R package version 1.1.6,
##   <https://CRAN.R-project.org/package=semPlot>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {semPlot: Path Diagrams and Visual Analysis of Various SEM Packages'
## Output},
##     author = {Sacha Epskamp},
##     year = {2022},
##     note = {R package version 1.1.6},
##     url = {https://CRAN.R-project.org/package=semPlot},
##   }

citation("psych")

## 
## To cite the psych package in publications use:
## 
##   Revelle, W. (2022) psych: Procedures for Personality and
##   Psychological Research, Northwestern University, Evanston, Illinois,
##   USA, https://CRAN.R-project.org/package=psych Version = 2.2.5.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {William Revelle},
##     organization = { Northwestern University},
##     address = { Evanston, Illinois},
##     year = {2022},
##     note = {R package version 2.2.5},
##     url = {https://CRAN.R-project.org/package=psych},
##   }

citation("naniar")

## 
## To cite package 'naniar' in publications use:
## 
##   Tierney N, Cook D, McBain M, Fay C (2021). _naniar: Data Structures,
##   Summaries, and Visualisations for Missing Data_. R package version
##   0.6.1, <https://CRAN.R-project.org/package=naniar>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {naniar: Data Structures, Summaries, and Visualisations for Missing Data},
##     author = {Nicholas Tierney and Di Cook and Miles McBain and Colin Fay},
##     year = {2021},
##     note = {R package version 0.6.1},
##     url = {https://CRAN.R-project.org/package=naniar},
##   }

citation("dplyr") #getting package citations

## 
## To cite package 'dplyr' in publications use:
## 
##   Wickham H, François R, Henry L, Müller K (2022). _dplyr: A Grammar of
##   Data Manipulation_. R package version 1.0.10,
##   <https://CRAN.R-project.org/package=dplyr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dplyr: A Grammar of Data Manipulation},
##     author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
##     year = {2022},
##     note = {R package version 1.0.10},
##     url = {https://CRAN.R-project.org/package=dplyr},
##   }

PSYC 214 HW#1

Destyni Cravens

2023-04-26