library(haven)
## Warning: package 'haven' was built under R version 4.2.2
kid_scoredata <- read_sav("kid_scoredata.sav")
#Print the first 6 rows of the dataset
head(kid_scoredata)
## # A tibble: 6 × 5
##   kid_score mom_hs                           mom_iq mom_work             mom_age
##       <dbl> <dbl+lbl>                         <dbl> <dbl+lbl>              <dbl>
## 1        65 1 [Graduated high school]         121.  4 [Mother worked fu…      27
## 2        98 1 [Graduated high school]          89.4 4 [Mother worked fu…      25
## 3        85 1 [Graduated high school]         115.  4 [Mother worked fu…      27
## 4        83 1 [Graduated high school]          99.4 3 [Mother worked pa…      25
## 5       115 1 [Graduated high school]          92.7 4 [Mother worked fu…      27
## 6        98 0 [Did not graduate high school]  108.  1 [Mother did not w…      18
#mean
df=kid_scoredata
mean(df$mom_hs)
## [1] 0.7857143
mean(df$mom_iq)
## [1] 100
mean(df$mom_age)
## [1] 22.78571
mean(df$kid_score)
## [1] 86.79724
#Compute both the correlation and covariance matrices of the following variables:mom_hs,mom_iq,mom_age, kid_score
(cor(kid_scoredata[, c("mom_hs", "mom_iq", "mom_age", "kid_score")]))
##              mom_hs    mom_iq    mom_age  kid_score
## mom_hs    1.0000000 0.2827094 0.21452839 0.23691643
## mom_iq    0.2827094 1.0000000 0.09160840 0.44827584
## mom_age   0.2145284 0.0916084 1.00000000 0.09199819
## kid_score 0.2369164 0.4482758 0.09199819 1.00000000
(cov(kid_scoredata[, c("mom_hs", "mom_iq", "mom_age", "kid_score")]))
##              mom_hs     mom_iq   mom_age  kid_score
## mom_hs    0.1687562   1.742053 0.2380403   1.986473
## mom_iq    1.7420527 225.000000 3.7116099 137.244279
## mom_age   0.2380403   3.711610 7.2957770   5.071923
## kid_score 1.9864731 137.244279 5.0719235 416.596205
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## Warning: package 'ggplot2' was built under R version 4.2.2
## Warning: package 'tibble' was built under R version 4.2.2
## Warning: package 'tidyr' was built under R version 4.2.2
## Warning: package 'readr' was built under R version 4.2.2
## Warning: package 'purrr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.2
## Warning: package 'stringr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
#Create a new variable that is the product of mom_iq and mom_hs
kid_scoredata<-mutate(kid_scoredata,Product=mom_iq*mom_hs)
head(kid_scoredata)
## # A tibble: 6 × 6
##   kid_score mom_hs                           mom_iq mom_work     mom_age Product
##       <dbl> <dbl+lbl>                         <dbl> <dbl+lbl>      <dbl>   <dbl>
## 1        65 1 [Graduated high school]         121.  4 [Mother w…      27   121. 
## 2        98 1 [Graduated high school]          89.4 4 [Mother w…      25    89.4
## 3        85 1 [Graduated high school]         115.  4 [Mother w…      27   115. 
## 4        83 1 [Graduated high school]          99.4 3 [Mother w…      25    99.4
## 5       115 1 [Graduated high school]          92.7 4 [Mother w…      27    92.7
## 6        98 0 [Did not graduate high school]  108.  1 [Mother d…      18     0
library(lavaan)  
## Warning: package 'lavaan' was built under R version 4.2.2
## This is lavaan 0.6-13
## lavaan is FREE software! Please report any bugs.
# specify the model kid_score~mom_iq+mom_hs+mom_age
lmout <-lm(kid_score ~ mom_iq+mom_hs+mom_age, data=kid_scoredata)
summary(lmout)
## 
## Call:
## lm(formula = kid_score ~ mom_iq + mom_hs + mom_age, data = kid_scoredata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.289 -12.421   2.399  11.223  50.169 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 20.98466    9.13013   2.298   0.0220 *  
## mom_iq       0.56254    0.06065   9.276   <2e-16 ***
## mom_hs       5.64715    2.25766   2.501   0.0127 *  
## mom_age      0.22475    0.33075   0.680   0.4972    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.15 on 430 degrees of freedom
## Multiple R-squared:  0.215,  Adjusted R-squared:  0.2095 
## F-statistic: 39.25 on 3 and 430 DF,  p-value: < 2.2e-16
lavaan_mod <- "kid_score ~ 1 +mom_iq+mom_hs+mom_age"
summary(lavaan_mod)
##    Length     Class      Mode 
##         1 character character
reg_fit <- sem(lavaan_mod, data =kid_scoredata)
reg_fit
## lavaan 0.6.13 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           434
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
# rsquare = TRUE means show r-square, standardized = TRUE means show std. estimates
summary(reg_fit, rsquare = TRUE, standardize = TRUE, header = FALSE)
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   kid_score ~                                                           
##     mom_iq            0.563    0.060    9.319    0.000    0.563    0.413
##     mom_hs            5.647    2.247    2.513    0.012    5.647    0.114
##     mom_age           0.225    0.329    0.683    0.495    0.225    0.030
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .kid_score        20.985    9.088    2.309    0.021   20.985    1.029
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .kid_score       326.279   22.149   14.731    0.000  326.279    0.785
## 
## R-Square:
##                    Estimate
##     kid_score         0.215
#Repeating the regression using the covariances 
ks_mat <- cbind(kid_score = kid_scoredata$kid_score,mom_iq = kid_scoredata$mom_iq,mom_hs = kid_scoredata$mom_hs,mom_age= kid_scoredata$mom_age)
head(ks_mat)
##      kid_score    mom_iq mom_hs mom_age
## [1,]        65 121.11753      1      27
## [2,]        98  89.36188      1      25
## [3,]        85 115.44316      1      27
## [4,]        83  99.44964      1      25
## [5,]       115  92.74571      1      27
## [6,]        98 107.90184      0      18
var_means <- colMeans(ks_mat) # means of each column
var_means
##   kid_score      mom_iq      mom_hs     mom_age 
##  86.7972350 100.0000000   0.7857143  22.7857143
var_Cov <- cov(ks_mat) # covariance matrix
var_Cov
##            kid_score     mom_iq    mom_hs   mom_age
## kid_score 416.596205 137.244279 1.9864731 5.0719235
## mom_iq    137.244279 225.000000 1.7420527 3.7116099
## mom_hs      1.986473   1.742053 0.1687562 0.2380403
## mom_age     5.071923   3.711610 0.2380403 7.2957770
#lavaan fit (using covariances)
cov_fit <- sem(lavaan_mod,sample.cov = var_Cov,sample.mean = var_means, sample.nobs = 434)
cov_fit
## lavaan 0.6.13 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           434
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
summary(cov_fit, rsquare = TRUE, standardize = TRUE, header = FALSE)
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   kid_score ~                                                           
##     mom_iq            0.563    0.060    9.319    0.000    0.563    0.413
##     mom_hs            5.647    2.247    2.513    0.012    5.647    0.114
##     mom_age           0.225    0.329    0.683    0.495    0.225    0.030
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .kid_score        20.985    9.088    2.309    0.021   20.985    1.029
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .kid_score       326.279   22.149   14.731    0.000  326.279    0.785
## 
## R-Square:
##                    Estimate
##     kid_score         0.215
#Results are identical to earlier lavaan fit