Data Assignment 2 Answer Key 2025

setwd("C:/Work Files/Teaching/PSY 8190/Data Assignments/Data Assignment 2")
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggplot2)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
library(lmerTest)
Warning: package 'lmerTest' was built under R version 4.5.1

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
library(lavaan)
This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.
library(psych)

Attaching package: 'psych'

The following object is masked from 'package:lavaan':

    cor2cov

The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(VIM)
Warning: package 'VIM' was built under R version 4.5.1
Loading required package: colorspace
Warning: package 'colorspace' was built under R version 4.5.1
Loading required package: grid
VIM is ready to use.

Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

Attaching package: 'VIM'

The following object is masked from 'package:datasets':

    sleep
Assn_2_Data <-read.csv("job_performance_long.csv")

Assn_2_Data_Wide <-read.csv("job_performance_wide.csv")
head(Assn_2_Data,15)
   id wave performance_continuous gender qualifications
1   1    1               1.408250      1              3
2   1    2               2.129363      1              3
3   1    3               2.797420      1              3
4   1    4               2.453070      1              3
5   1    5               2.529704      1              3
6   1    6               2.338785      1              3
7   2    1               2.038001      0              5
8   2    2               1.564303      0              5
9   2    3               1.888694      0              5
10  2    4               2.870881      0              5
11  2    5               3.141473      0              5
12  2    6               2.595409      0              5
13  3    1               2.921425      0              4
14  3    2               3.760158      0              4
15  3    3               3.958660      0              4
names(Assn_2_Data)
[1] "id"                     "wave"                   "performance_continuous"
[4] "gender"                 "qualifications"        
str(Assn_2_Data)
'data.frame':   3600 obs. of  5 variables:
 $ id                    : int  1 1 1 1 1 1 2 2 2 2 ...
 $ wave                  : int  1 2 3 4 5 6 1 2 3 4 ...
 $ performance_continuous: num  1.41 2.13 2.8 2.45 2.53 ...
 $ gender                : int  1 1 1 1 1 1 0 0 0 0 ...
 $ qualifications        : int  3 3 3 3 3 3 5 5 5 5 ...
Assn_2_Data$gender <- factor(Assn_2_Data$gender,
                             levels = c(0,1),
                             labels = c("Female","Male"))

Assn_2_Data$qualifications <- factor(Assn_2_Data$qualifications,
                             levels = c(1,2,3,4,5),
                             labels = c("None","Unsuitable","Suitable","Aligned","Exceptional"),ordered = TRUE)

str(Assn_2_Data$gender)
 Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 1 1 1 1 ...
str(Assn_2_Data$qualifications)
 Ord.factor w/ 5 levels "None"<"Unsuitable"<..: 3 3 3 3 3 3 5 5 5 5 ...
table(Assn_2_Data$gender)

Female   Male 
  1830   1770 
table(Assn_2_Data$qualifications)

       None  Unsuitable    Suitable     Aligned Exceptional 
        438         744        1038         972         408 
mean_data <- Assn_2_Data %>%
  group_by(wave) %>%
  summarise(mean_performance = mean(performance_continuous), 
            se_performance = sd(performance_continuous) / sqrt(n()))

mean_traj_plot <-ggplot(mean_data, aes(x = wave, y = mean_performance)) +
  geom_line(color = "blue", linewidth = 1) +
  geom_point(color = "blue", size = 2) +
  geom_errorbar(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), width = 0.2) +
  labs(title = "Mean Trajectory Over Time", 
       x = "Wave", 
       y = "Performance") +
  theme_minimal()
mean_traj_plot

mean_traj_plot + 
  ylim(0,6) 

hist(Assn_2_Data$performance_continuous)

ggplot(Assn_2_Data, aes(x = wave, y = performance_continuous, group = id, color = gender)) +
  geom_line(alpha = 0.5) +
  geom_point(alpha = 0.5) +
  labs(title = "Individual Trajectories (Spaghetti Plot)", 
       x = "Wave", 
       y = "Performance") +
  theme_minimal()

describe(Assn_2_Data[,-1])
                       vars    n mean   sd median trimmed  mad   min  max range
wave                      1 3600 3.50 1.71   3.50    3.50 2.22  1.00 6.00  5.00
performance_continuous    2 3600 2.68 1.00   2.66    2.67 0.97 -0.68 7.11  7.79
gender*                   3 3600 1.49 0.50   1.00    1.49 0.00  1.00 2.00  1.00
qualifications*           4 3600 3.05 1.19   3.00    3.06 1.48  1.00 5.00  4.00
                        skew kurtosis   se
wave                    0.00    -1.27 0.03
performance_continuous  0.16     0.18 0.02
gender*                 0.03    -2.00 0.01
qualifications*        -0.12    -0.87 0.02
aggr_plot <- aggr(Assn_2_Data[,-1], col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, labels=names(Assn_2_Data[,-1]), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))


 Variables sorted by number of missings: 
               Variable Count
                   wave     0
 performance_continuous     0
                 gender     0
         qualifications     0
UC_model <- lmer(performance_continuous ~ wave  + 
                (1 + wave | id), data = Assn_2_Data)

# Summary of the model
summary(UC_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: performance_continuous ~ wave + (1 + wave | id)
   Data: Assn_2_Data

REML criterion at convergence: 6892.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.9065 -0.5784 -0.0302  0.5874  3.6305 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 id       (Intercept) 0.44856  0.6697        
          wave        0.02071  0.1439   -0.06
 Residual             0.21140  0.4598        
Number of obs: 3600, groups:  id, 600

Fixed effects:
             Estimate Std. Error        df t value Pr(>|t|)    
(Intercept) 2.159e+00  3.245e-02 5.990e+02   66.53   <2e-16 ***
wave        1.485e-01  7.393e-03 5.990e+02   20.09   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
     (Intr)
wave -0.336
Cond_model <- lmer(performance_continuous ~ wave + gender + qualifications  + wave:qualifications + wave:gender + wave:gender:qualifications
              +  (1 + wave | id), data = Assn_2_Data)

# Summary of the model
summary(Cond_model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
performance_continuous ~ wave + gender + qualifications + wave:qualifications +  
    wave:gender + wave:gender:qualifications + (1 + wave | id)
   Data: Assn_2_Data

REML criterion at convergence: 6734.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.8147 -0.5851 -0.0408  0.5912  3.6394 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 id       (Intercept) 0.4195   0.6477        
          wave        0.0132   0.1149   -0.09
 Residual             0.2114   0.4598        
Number of obs: 3600, groups:  id, 600

Fixed effects:
                                   Estimate Std. Error         df t value
(Intercept)                        2.324942   0.045998 593.987662  50.544
wave                               0.182212   0.009656 599.338049  18.871
genderMale                        -0.324492   0.063457 593.987797  -5.114
qualifications.L                  -0.201932   0.087807 593.987743  -2.300
qualifications.Q                   0.021205   0.080657 593.987907   0.263
qualifications.C                   0.017503   0.071741 593.987813   0.244
qualifications^4                   0.078756   0.063235 593.987880   1.245
wave:qualifications.L              0.233208   0.023748 640.175652   9.820
wave:qualifications.Q             -0.013509   0.021949 640.387962  -0.615
wave:qualifications.C             -0.037472   0.019600 640.510573  -1.912
wave:qualifications^4              0.016680   0.017552 640.911320   0.950
wave:genderMale                   -0.081882   0.013866 611.327578  -5.905
wave:genderMale:qualifications.L  -0.093329   0.033070 590.003337  -2.822
wave:genderMale:qualifications.Q  -0.010108   0.030358 590.003337  -0.333
wave:genderMale:qualifications.C   0.041412   0.026970 590.003337   1.535
wave:genderMale:qualifications^4  -0.017794   0.023754 590.003336  -0.749
                                 Pr(>|t|)    
(Intercept)                       < 2e-16 ***
wave                              < 2e-16 ***
genderMale                       4.27e-07 ***
qualifications.L                  0.02181 *  
qualifications.Q                  0.79271    
qualifications.C                  0.80734    
qualifications^4                  0.21346    
wave:qualifications.L             < 2e-16 ***
wave:qualifications.Q             0.53847    
wave:qualifications.C             0.05633 .  
wave:qualifications^4             0.34232    
wave:genderMale                  5.85e-09 ***
wave:genderMale:qualifications.L  0.00493 ** 
wave:genderMale:qualifications.Q  0.73928    
wave:genderMale:qualifications.C  0.12520    
wave:genderMale:qualifications^4  0.45408    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation matrix not shown by default, as p = 16 > 12.
Use print(x, correlation=TRUE)  or
    vcov(x)        if you need it
names(Assn_2_Data_Wide)
[1] "id"                 "gender"             "qualifications"    
[4] "performance_wave_1" "performance_wave_2" "performance_wave_3"
[7] "performance_wave_4" "performance_wave_5" "performance_wave_6"
UC_LGC <-'
        I=~ 1*performance_wave_1 + 1*performance_wave_2 + 1*performance_wave_3 + 1*performance_wave_4 + 1*performance_wave_5 + 1*performance_wave_6
        S=~ 0*performance_wave_1 + 1*performance_wave_2 + 2*performance_wave_3 + 3*performance_wave_4 + 4*performance_wave_5 + 5*performance_wave_6
'

UC_LGC_fit <- growth(UC_LGC, estimator= "ML", data=Assn_2_Data_Wide, mimic = "Mplus", missing = "FIML")
summary(UC_LGC_fit, fit.measures = TRUE, standardized=TRUE)
lavaan 0.6-19 ended normally after 34 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        11

  Number of observations                           600
  Number of missing patterns                         1

Model Test User Model:
                                                      
  Test statistic                                14.396
  Degrees of freedom                                16
  P-value (Chi-square)                           0.569

Model Test Baseline Model:

  Test statistic                              2361.066
  Degrees of freedom                                15
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    1.000
  Tucker-Lewis Index (TLI)                       1.001
                                                      
  Robust Comparative Fit Index (CFI)             1.000
  Robust Tucker-Lewis Index (TLI)                1.001

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -3800.564
  Loglikelihood unrestricted model (H1)      -3793.366
                                                      
  Akaike (AIC)                                7623.128
  Bayesian (BIC)                              7671.495
  Sample-size adjusted Bayesian (SABIC)       7636.573

Root Mean Square Error of Approximation:

  RMSEA                                          0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.034
  P-value H_0: RMSEA <= 0.050                    0.998
  P-value H_0: RMSEA >= 0.080                    0.000
                                                      
  Robust RMSEA                                   0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.034
  P-value H_0: Robust RMSEA <= 0.050             0.998
  P-value H_0: Robust RMSEA >= 0.080             0.000

Standardized Root Mean Square Residual:

  SRMR                                           0.016

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  I =~                                                                  
    performnc_wv_1    1.000                               0.659    0.774
    performnc_wv_2    1.000                               0.659    0.757
    performnc_wv_3    1.000                               0.659    0.726
    performnc_wv_4    1.000                               0.659    0.667
    performnc_wv_5    1.000                               0.659    0.625
    performnc_wv_6    1.000                               0.659    0.579
  S =~                                                                  
    performnc_wv_1    0.000                               0.000    0.000
    performnc_wv_2    1.000                               0.143    0.164
    performnc_wv_3    2.000                               0.285    0.314
    performnc_wv_4    3.000                               0.428    0.433
    performnc_wv_5    4.000                               0.571    0.541
    performnc_wv_6    5.000                               0.713    0.626

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  I ~~                                                                  
    S                 0.010    0.006    1.542    0.123    0.106    0.106

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    I                 2.327    0.031   74.598    0.000    3.530    3.530
    S                 0.149    0.008   19.221    0.000    1.044    1.044

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .performnc_wv_1    0.291    0.025   11.560    0.000    0.291    0.401
   .performnc_wv_2    0.284    0.020   13.911    0.000    0.284    0.374
   .performnc_wv_3    0.268    0.018   14.528    0.000    0.268    0.326
   .performnc_wv_4    0.299    0.021   14.566    0.000    0.299    0.306
   .performnc_wv_5    0.274    0.021   12.999    0.000    0.274    0.246
   .performnc_wv_6    0.253    0.025    9.972    0.000    0.253    0.195
    I                 0.435    0.034   12.600    0.000    1.000    1.000
    S                 0.020    0.002    9.033    0.000    1.000    1.000
Cond_LGC <-'
        I=~ 1*performance_wave_1 + 1*performance_wave_2 + 1*performance_wave_3 + 1*performance_wave_4 + 1*performance_wave_5 + 1*performance_wave_6
        S=~ 0*performance_wave_1 + 1*performance_wave_2 + 2*performance_wave_3 + 3*performance_wave_4 + 4*performance_wave_5 + 5*performance_wave_6
        
        I ~ gender + qualifications 
        S ~ gender + qualifications
'

Cond_LGC_fit <- growth(Cond_LGC, estimator= "ML", data=Assn_2_Data_Wide, mimic = "Mplus", missing = "FIML")
summary(Cond_LGC_fit, fit.measures = TRUE, standardized=TRUE)
lavaan 0.6-19 ended normally after 38 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        15

  Number of observations                           600
  Number of missing patterns                         1

Model Test User Model:
                                                      
  Test statistic                                25.287
  Degrees of freedom                                24
  P-value (Chi-square)                           0.390

Model Test Baseline Model:

  Test statistic                              2572.279
  Degrees of freedom                                27
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.999
  Tucker-Lewis Index (TLI)                       0.999
                                                      
  Robust Comparative Fit Index (CFI)             0.999
  Robust Tucker-Lewis Index (TLI)                0.999

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -3700.403
  Loglikelihood unrestricted model (H1)      -3687.760
                                                      
  Akaike (AIC)                                7430.806
  Bayesian (BIC)                              7496.760
  Sample-size adjusted Bayesian (SABIC)       7449.139

Root Mean Square Error of Approximation:

  RMSEA                                          0.009
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.035
  P-value H_0: RMSEA <= 0.050                    0.999
  P-value H_0: RMSEA >= 0.080                    0.000
                                                      
  Robust RMSEA                                   0.009
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.035
  P-value H_0: Robust RMSEA <= 0.050             0.999
  P-value H_0: Robust RMSEA >= 0.080             0.000

Standardized Root Mean Square Residual:

  SRMR                                           0.016

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  I =~                                                                  
    performnc_wv_1    1.000                               0.659    0.771
    performnc_wv_2    1.000                               0.659    0.759
    performnc_wv_3    1.000                               0.659    0.725
    performnc_wv_4    1.000                               0.659    0.667
    performnc_wv_5    1.000                               0.659    0.627
    performnc_wv_6    1.000                               0.659    0.577
  S =~                                                                  
    performnc_wv_1    0.000                               0.000    0.000
    performnc_wv_2    1.000                               0.142    0.163
    performnc_wv_3    2.000                               0.283    0.311
    performnc_wv_4    3.000                               0.425    0.430
    performnc_wv_5    4.000                               0.566    0.539
    performnc_wv_6    5.000                               0.708    0.619

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  I ~                                                                   
    gender           -0.394    0.060   -6.544    0.000   -0.599   -0.299
    qualifications   -0.002    0.025   -0.096    0.923   -0.004   -0.004
  S ~                                                                   
    gender           -0.081    0.014   -5.782    0.000   -0.569   -0.284
    qualifications    0.061    0.006   10.361    0.000    0.429    0.510

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
 .I ~~                                                                  
   .S                 0.002    0.006    0.429    0.668    0.034    0.034

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .I                 2.529    0.088   28.579    0.000    3.839    3.839
   .S                 0.004    0.020    0.179    0.858    0.026    0.026

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .performnc_wv_1    0.295    0.025   11.844    0.000    0.295    0.405
   .performnc_wv_2    0.278    0.020   13.958    0.000    0.278    0.370
   .performnc_wv_3    0.271    0.019   14.568    0.000    0.271    0.327
   .performnc_wv_4    0.298    0.020   14.626    0.000    0.298    0.306
   .performnc_wv_5    0.267    0.020   13.179    0.000    0.267    0.241
   .performnc_wv_6    0.266    0.025   10.737    0.000    0.266    0.204
   .I                 0.395    0.032   12.238    0.000    0.910    0.910
   .S                 0.013    0.002    6.989    0.000    0.655    0.655