Setup

# Renaming the SubjectTag columns in CNLSY for merging
CNLSY_S1 <- CNLSY %>% rename_with(~paste0(., "_1"), -SubjectTag)
CNLSY_S2 <- CNLSY %>% rename_with(~paste0(., "_2"), -SubjectTag)

# Merging Links79PairExpanded with CNLSY twice: once for SubjectTag_S1 and once for SubjectTag_S2
merged_df <- Links79PairExpanded %>%
  left_join(CNLSY_S1, by = c("SubjectTag_S1" = "SubjectTag")) %>%
  left_join(CNLSY_S2, by = c("SubjectTag_S2" = "SubjectTag"))

# Creating a 'Relationship' column with distinct categories for DZTs, DZAs, FSTs, and FSAs
merged_df <- merged_df %>%
  mutate(Relationship = case_when(
    RelationshipPath == "Gen2Siblings" & RFull == 1 & EverSharedHouse == FALSE ~ "MZA",
    RelationshipPath == "Gen2Siblings" & RFull == 1 & EverSharedHouse == TRUE ~ "MZT",
    RelationshipPath == "Gen2Siblings" & RFull == 0.5 & CYRB_1 == CYRB_2 & EverSharedHouse == TRUE ~ "DZT",
    RelationshipPath == "Gen2Siblings" & RFull == 0.5 & CYRB_1 == CYRB_2 & EverSharedHouse == FALSE ~ "DZA",
    RelationshipPath == "Gen2Siblings" & RFull == 0.5 & CYRB_1 != CYRB_2 & EverSharedHouse == TRUE ~ "FST",
    RelationshipPath == "Gen2Siblings" & RFull == 0.5 & CYRB_1 != CYRB_2 & EverSharedHouse == FALSE ~ "FSA",
    RelationshipPath == "Gen2Siblings" & RFull == 0.25 & EverSharedHouse == TRUE ~ "HST",
    RelationshipPath == "Gen2Siblings" & RFull == 0.25 & EverSharedHouse == FALSE ~ "HSA",
    RelationshipPath == "Gen2Cousins" & RFull == 0.125 & EverSharedHouse == TRUE ~ "CZT",
    RelationshipPath == "Gen2Cousins" & RFull == 0.125 & EverSharedHouse == FALSE ~ "CZA",
    RelationshipPath == "Gen2Cousins" & RFull == 0.0625 & EverSharedHouse == TRUE ~ "HCZT",
    RelationshipPath == "Gen2Cousins" & RFull == 0.0625 & EverSharedHouse == FALSE ~ "HCZA",
    RelationshipPath == "Gen2Cousins" & RFull == 0 & EverSharedHouse == TRUE ~ "ADT",
    RelationshipPath == "Gen2Cousins" & RFull == 0 & EverSharedHouse == FALSE ~ "ADA",
    RelationshipPath == "Gen2Cousins" & RFull == 0.25 & EverSharedHouse == TRUE ~ "HSCT",
    RelationshipPath == "Gen2Cousins" & RFull == 0.25 & EverSharedHouse == FALSE ~ "HSCA",
    RelationshipPath == "AuntNiece" & RFull == 0.125 & EverSharedHouse == TRUE ~ "HNT",
    RelationshipPath == "AuntNiece" & RFull == 0.125 & EverSharedHouse == FALSE ~ "HNA",
    RelationshipPath == "AuntNiece" & RFull == 0 & EverSharedHouse == TRUE ~ "ADTT",
    RelationshipPath == "AuntNiece" & RFull == 0 & EverSharedHouse == FALSE ~ "ADTA",
    RelationshipPath == "ParentChild" & RFull == 0.5 & EverSharedHouse == FALSE ~ "PCA",
    RelationshipPath == "ParentChild" & RFull == 0.5 & EverSharedHouse == TRUE ~ "PCT",
    RelationshipPath == "AuntNiece" & RFull == 0.25 & EverSharedHouse == FALSE ~ "ANA",
    RelationshipPath == "AuntNiece" & RFull == 0.25 & EverSharedHouse == TRUE ~ "ANT",
    TRUE ~ NA_character_
  )) %>%
  # Dropping rows for unlikely or ambiguous relationships
  filter(!(RelationshipPath == "AuntNiece" & RFull == 0.5) &
         !R %in% c(0.375, 0.75))

merged_df

Rationale

I was asked to output mathematics results with the AC’RE model.

Analysis

First, the ACE model.

ACE <- '

# Additive Genetic Effects

A1 =~ NA * MathStandardized_S1 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S1
A2 =~ NA * MathStandardized_S2 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S2

# Shared Environmental Effects

C1 =~ NA * MathStandardized_S1 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S1
C2 =~ NA * MathStandardized_S2 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S2

# Nonshared Environmental Effects

E1 =~ NA * MathStandardized_S1 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S1
E2 =~ NA * MathStandardized_S2 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S2

# Factor Covariances

A1 ~~ c(0.5, 0.5, 0.25, 0.125, 0.25, 0.5, 0, 1, 0, 0.125, 0.0625, 0.25) * A2 + 0 * C1 + 0 * C2 + 0 * E1 + 0 * E2
A2 ~~ 0 * C1 + 0 * C2 + 0 * E1 + 0 * E2
C1 ~~ c(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) * C2 + 0 * E1 + 0 * E2
C2 ~~ 0 * E1 + 0 * E2
E1 ~~ 0 * E2

# Factor Variances

A1 ~~ 1 * A1
A2 ~~ 1 * A2
C1 ~~ 1 * C1
C2 ~~ 1 * C2
E1 ~~ 1 * E1
E2 ~~ 1 * E2

# Phenotypic Residual Covariances

MathStandardized_S1 ~~ 0 * MathStandardized_S1
MathStandardized_S2 ~~ 0 * MathStandardized_S2'

ACEFit <- sem(ACE, data = merged_df, group = "Relationship", missing = "FIML")
## starting values:
##  [1]        NA 159.45070   0.00000 100.13987 169.70254 174.23263 100.01910
##  [8]  99.94264 174.78439 160.05316 100.26521 100.75576 170.75641 165.29601
## [15]  99.87234 100.90251 127.88686 152.95364  95.34982  95.83328 163.64599
## [22] 187.37561  99.06849  98.98571 125.72350 134.19842 101.51980  99.07685
## [29] 282.65341 205.45341  97.25000  96.02381 144.63037 176.55556  96.99495
## [36]  96.90976 125.97859 164.06581  97.30396  97.66042 161.64376 178.08860
## [43]  97.85884  99.32065 230.65432  67.63889 114.88889 108.00000
standardizedSolution(ACEFit)

Next up, the ADE model.

ADE <- '

# Additive Genetic Effects

A1 =~ NA * MathStandardized_S1 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S1
A2 =~ NA * MathStandardized_S2 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S2

# Dominance Genetic Effects

D1 =~ NA * MathStandardized_S1 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S1
D2 =~ NA * MathStandardized_S2 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S2

# Nonshared Environmental Effects

E1 =~ NA * MathStandardized_S1 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S1
E2 =~ NA * MathStandardized_S2 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S2

# Factor Covariances

A1 ~~ c(0.5, 0.5, 0.25, 0.125, 0.25, 0.5, 0, 1, 0, 0.125, 0.0625, 0.25) * A2 + 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
A2 ~~ 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
D1 ~~ c(0, 0.25, 0, 0, 0.125, 0.25, 0, 1, 0, 0, 0, 0) * D2 + 0 * E1 + 0 * E2
D2 ~~ 0 * E1 + 0 * E2
E1 ~~ 0 * E2

# Factor Variances

A1 ~~ 1 * A1
A2 ~~ 1 * A2
D1 ~~ 1 * D1
D2 ~~ 1 * D2
E1 ~~ 1 * E1
E2 ~~ 1 * E2

# Phenotypic Residual Covariances

MathStandardized_S1 ~~ 0 * MathStandardized_S1
MathStandardized_S2 ~~ 0 * MathStandardized_S2'

ADEFit <- sem(ADE, data = merged_df, group = "Relationship", missing = "FIML")
## starting values:
##  [1]        NA 159.45070   0.00000 100.13987 169.70254 174.23263 100.01910
##  [8]  99.94264 174.78439 160.05316 100.26521 100.75576 170.75641 165.29601
## [15]  99.87234 100.90251 127.88686 152.95364  95.34982  95.83328 163.64599
## [22] 187.37561  99.06849  98.98571 125.72350 134.19842 101.51980  99.07685
## [29] 282.65341 205.45341  97.25000  96.02381 144.63037 176.55556  96.99495
## [36]  96.90976 125.97859 164.06581  97.30396  97.66042 161.64376 178.08860
## [43]  97.85884  99.32065 230.65432  67.63889 114.88889 108.00000
standardizedSolution(ADEFit)

Now to combine them into the ACDE model.

ACDE <- '

# Additive Genetic Effects

A1 =~ NA * MathStandardized_S1 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S1
A2 =~ NA * MathStandardized_S2 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S2

# Shared Environmental Effects

C1 =~ NA * MathStandardized_S1 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S1
C2 =~ NA * MathStandardized_S2 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S2

# Dominance Genetic Effects

D1 =~ NA * MathStandardized_S1 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S1
D2 =~ NA * MathStandardized_S2 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S2

# Nonshared Environmental Effects

E1 =~ NA * MathStandardized_S1 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S1
E2 =~ NA * MathStandardized_S2 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S2

# Factor Covariances

A1 ~~ c(0.5, 0.5, 0.25, 0.125, 0.25, 0.5, 0, 1, 0, 0.125, 0.0625, 0.25) * A2 + 0 * C1 + 0 * C2 + 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
A2 ~~ 0 * C1 + 0 * C2 + 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
C1 ~~ c(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) * C2 + + 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
C2 ~~ 0 * D1 + 0 * D2 + 0 * E1 + 0 * E2
D1 ~~ c(0, 0.25, 0, 0, 0.125, 0.25, 0, 1, 0, 0, 0, 0) * D2 + 0 * E1 + 0 * E2
D2 ~~ 0 * E1 + 0 * E2
E1 ~~ 0 * E2

# Factor Variances

A1 ~~ 1 * A1
A2 ~~ 1 * A2
C1 ~~ 1 * C1
C2 ~~ 1 * C2
D1 ~~ 1 * D1
D2 ~~ 1 * D2
E1 ~~ 1 * E1
E2 ~~ 1 * E2

# Phenotypic Residual Covariances

MathStandardized_S1 ~~ 0 * MathStandardized_S1
MathStandardized_S2 ~~ 0 * MathStandardized_S2'

ACDEFit <- sem(ACDE, data = merged_df, group = "Relationship", missing = "FIML")
## starting values:
##  [1]        NA 159.45070   0.00000 100.13987 169.70254 174.23263 100.01910
##  [8]  99.94264 174.78439 160.05316 100.26521 100.75576 170.75641 165.29601
## [15]  99.87234 100.90251 127.88686 152.95364  95.34982  95.83328 163.64599
## [22] 187.37561  99.06849  98.98571 125.72350 134.19842 101.51980  99.07685
## [29] 282.65341 205.45341  97.25000  96.02381 144.63037 176.55556  96.99495
## [36]  96.90976 125.97859 164.06581  97.30396  97.66042 161.64376 178.08860
## [43]  97.85884  99.32065 230.65432  67.63889 114.88889 108.00000
standardizedSolution(ACDEFit)

At this point, it’s time for the AC’RE model.

ACRE <- '

# Additive Genetic Effects

A1 =~ NA * MathStandardized_S1 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S1
A2 =~ NA * MathStandardized_S2 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S2

# Broad Family Environmental Effects

C1 =~ NA * MathStandardized_S1 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S1
C2 =~ NA * MathStandardized_S2 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S2

# Narrow Rearing Environmental Factors

R1 =~ NA * MathStandardized_S1 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S1
R2 =~ NA * MathStandardized_S2 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S2

# Nonshared Environmental Factors

E1 =~ NA * MathStandardized_S1 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S1
E2 =~ NA * MathStandardized_S2 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S2

# Factor Covariances

A1 ~~ c(0.5, 0.5, 0.25, 0.125, 0.25, 0.5, 0, 1, 0, 0.125, 0.0625, 0.25) * A2 + 0 * C1 + 0 * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2
A2 ~~ 0 * C1 + 0 * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2
C1 ~~ c(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1) * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2
C2 ~~ 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2
R1 ~~ c(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) * R2 + 0 * E1 + 0 * E2
R2 ~~ 0 * E1 + 0 * E2
E1 ~~ 0 * E2

# Factor Variances

A1 ~~ 1 * A1
A2 ~~ 1 * A2
C1 ~~ 1 * C1
C2 ~~ 1 * C2
R1 ~~ 1 * R1
R2 ~~ 1 * R2
E1 ~~ 1 * E1
E2 ~~ 1 * E2

# Phenotypic Residual Covariances

MathStandardized_S1 ~~ 0 * MathStandardized_S1
MathStandardized_S2 ~~ 0 * MathStandardized_S2'

ACREFit <- sem(ACRE, data = merged_df, group = "Relationship", missing = "FIML")
## starting values:
##  [1]        NA 159.45070   0.00000 100.13987 169.70254 174.23263 100.01910
##  [8]  99.94264 174.78439 160.05316 100.26521 100.75576 170.75641 165.29601
## [15]  99.87234 100.90251 127.88686 152.95364  95.34982  95.83328 163.64599
## [22] 187.37561  99.06849  98.98571 125.72350 134.19842 101.51980  99.07685
## [29] 282.65341 205.45341  97.25000  96.02381 144.63037 176.55556  96.99495
## [36]  96.90976 125.97859 164.06581  97.30396  97.66042 161.64376 178.08860
## [43]  97.85884  99.32065 230.65432  67.63889 114.88889 108.00000
standardizedSolution(ACREFit)

And to top it off, I’ll add vertical transmission:

ACREDTV <- '

# Additive Genetic Effects

A1 =~ NA * MathStandardized_S1 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S1
A2 =~ NA * MathStandardized_S2 + c(a, a, a, a, a, a, a, a, a, a, a, a) * MathStandardized_S2

# Broad Family Environmental Effects

C1 =~ NA * MathStandardized_S1 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S1
C2 =~ NA * MathStandardized_S2 + c(b, b, b, b, b, b, b, b, b, b, b, b) * MathStandardized_S2

# Narrow Rearing Environmental Factors

R1 =~ NA * MathStandardized_S1 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S1
R2 =~ NA * MathStandardized_S2 + c(c, c, c, c, c, c, c, c, c, c, c, c) * MathStandardized_S2

# Nonshared Environmental Factors

E1 =~ NA * MathStandardized_S1 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S1
E2 =~ NA * MathStandardized_S2 + c(d, d, d, d, d, d, d, d, d, d, d, d) * MathStandardized_S2

# Vertical Cultural Transmission

V1 =~ NA * MathStandardized_S1 + c(g, g, g, g, g, g, g, g, g, g, g, g) * MathStandardized_S1
V2 =~ NA * MathStandardized_S2 + c(g, g, g, g, g, g, g, g, g, g, g, g) * MathStandardized_S2

# Factor Covariances

A1 ~~ c(0.5, 0.5, 0.25, 0.125, 0.25, 0.5, 0, 1, 0, 0.125, 0.0625, 0.25) * A2 + 0 * C1 + 0 * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
A2 ~~ 0 * C1 + 0 * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
C1 ~~ c(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1) * C2 + 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
C2 ~~ 0 * R1 + 0 * R2 + 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
R1 ~~ c(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) * R2 + 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
R2 ~~ 0 * E1 + 0 * E2 + 0 * V1 + 0 * V2
E1 ~~ 0 * E2 + 0 * V1 + 0 * V2
E2 ~~ 0 * V1 + 0 * V2
V1 ~~ c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) * V2

# Factor Variances

A1 ~~ 1 * A1
A2 ~~ 1 * A2
C1 ~~ 1 * C1
C2 ~~ 1 * C2
R1 ~~ 1 * R1
R2 ~~ 1 * R2
E1 ~~ 1 * E1
E2 ~~ 1 * E2
V1 ~~ 1 * V1
V2 ~~ 1 * V2

# Phenotypic Residual Covariances

MathStandardized_S1 ~~ 0 * MathStandardized_S1
MathStandardized_S2 ~~ 0 * MathStandardized_S2'

ACREDTVFit <- sem(ACREDTV, data = merged_df, group = "Relationship", missing = "FIML")
## starting values:
##  [1]        NA 159.45070   0.00000 100.13987 169.70254 174.23263 100.01910
##  [8]  99.94264 174.78439 160.05316 100.26521 100.75576 170.75641 165.29601
## [15]  99.87234 100.90251 127.88686 152.95364  95.34982  95.83328 163.64599
## [22] 187.37561  99.06849  98.98571 125.72350 134.19842 101.51980  99.07685
## [29] 282.65341 205.45341  97.25000  96.02381 144.63037 176.55556  96.99495
## [36]  96.90976 125.97859 164.06581  97.30396  97.66042 161.64376 178.08860
## [43]  97.85884  99.32065 230.65432  67.63889 114.88889 108.00000
standardizedSolution(ACREDTVFit)

Discussion

More to come soon. For details and expansions on this model and data, see https://rpubs.com/JLLJ/ACRE