library(haven)
## Warning: package 'haven' was built under R version 4.2.2
kid_scoredata <- read_sav("kid_scoredata.sav")
#Print the first 6 rows of the dataset
head(kid_scoredata)
## # A tibble: 6 × 5
## kid_score mom_hs mom_iq mom_work mom_age
## <dbl> <dbl+lbl> <dbl> <dbl+lbl> <dbl>
## 1 65 1 [Graduated high school] 121. 4 [Mother worked fu… 27
## 2 98 1 [Graduated high school] 89.4 4 [Mother worked fu… 25
## 3 85 1 [Graduated high school] 115. 4 [Mother worked fu… 27
## 4 83 1 [Graduated high school] 99.4 3 [Mother worked pa… 25
## 5 115 1 [Graduated high school] 92.7 4 [Mother worked fu… 27
## 6 98 0 [Did not graduate high school] 108. 1 [Mother did not w… 18
#mean
df=kid_scoredata
mean(df$mom_hs)
## [1] 0.7857143
mean(df$mom_iq)
## [1] 100
mean(df$mom_age)
## [1] 22.78571
mean(df$kid_score)
## [1] 86.79724
#Compute both the correlation and covariance matrices of the following variables:mom_hs,mom_iq,mom_age, kid_score
(cor(kid_scoredata[, c("mom_hs", "mom_iq", "mom_age", "kid_score")]))
## mom_hs mom_iq mom_age kid_score
## mom_hs 1.0000000 0.2827094 0.21452839 0.23691643
## mom_iq 0.2827094 1.0000000 0.09160840 0.44827584
## mom_age 0.2145284 0.0916084 1.00000000 0.09199819
## kid_score 0.2369164 0.4482758 0.09199819 1.00000000
(cov(kid_scoredata[, c("mom_hs", "mom_iq", "mom_age", "kid_score")]))
## mom_hs mom_iq mom_age kid_score
## mom_hs 0.1687562 1.742053 0.2380403 1.986473
## mom_iq 1.7420527 225.000000 3.7116099 137.244279
## mom_age 0.2380403 3.711610 7.2957770 5.071923
## kid_score 1.9864731 137.244279 5.0719235 416.596205
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## Warning: package 'ggplot2' was built under R version 4.2.2
## Warning: package 'tibble' was built under R version 4.2.2
## Warning: package 'tidyr' was built under R version 4.2.2
## Warning: package 'readr' was built under R version 4.2.2
## Warning: package 'purrr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.2
## Warning: package 'stringr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#Create a new variable that is the product of mom_iq and mom_hs
kid_scoredata<-mutate(kid_scoredata,Product=mom_iq*mom_hs)
head(kid_scoredata)
## # A tibble: 6 × 6
## kid_score mom_hs mom_iq mom_work mom_age Product
## <dbl> <dbl+lbl> <dbl> <dbl+lbl> <dbl> <dbl>
## 1 65 1 [Graduated high school] 121. 4 [Mother w… 27 121.
## 2 98 1 [Graduated high school] 89.4 4 [Mother w… 25 89.4
## 3 85 1 [Graduated high school] 115. 4 [Mother w… 27 115.
## 4 83 1 [Graduated high school] 99.4 3 [Mother w… 25 99.4
## 5 115 1 [Graduated high school] 92.7 4 [Mother w… 27 92.7
## 6 98 0 [Did not graduate high school] 108. 1 [Mother d… 18 0
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.2.2
## This is lavaan 0.6-13
## lavaan is FREE software! Please report any bugs.
# specify the model kid_score~mom_iq+mom_hs+mom_age
lmout <-lm(kid_score ~ mom_iq+mom_hs+mom_age, data=kid_scoredata)
summary(lmout)
##
## Call:
## lm(formula = kid_score ~ mom_iq + mom_hs + mom_age, data = kid_scoredata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.289 -12.421 2.399 11.223 50.169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.98466 9.13013 2.298 0.0220 *
## mom_iq 0.56254 0.06065 9.276 <2e-16 ***
## mom_hs 5.64715 2.25766 2.501 0.0127 *
## mom_age 0.22475 0.33075 0.680 0.4972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.15 on 430 degrees of freedom
## Multiple R-squared: 0.215, Adjusted R-squared: 0.2095
## F-statistic: 39.25 on 3 and 430 DF, p-value: < 2.2e-16
lavaan_mod <- "kid_score ~ 1 +mom_iq+mom_hs+mom_age"
summary(lavaan_mod)
## Length Class Mode
## 1 character character
reg_fit <- sem(lavaan_mod, data =kid_scoredata)
reg_fit
## lavaan 0.6.13 ended normally after 26 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 434
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
# rsquare = TRUE means show r-square, standardized = TRUE means show std. estimates
summary(reg_fit, rsquare = TRUE, standardize = TRUE, header = FALSE)
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## kid_score ~
## mom_iq 0.563 0.060 9.319 0.000 0.563 0.413
## mom_hs 5.647 2.247 2.513 0.012 5.647 0.114
## mom_age 0.225 0.329 0.683 0.495 0.225 0.030
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .kid_score 20.985 9.088 2.309 0.021 20.985 1.029
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .kid_score 326.279 22.149 14.731 0.000 326.279 0.785
##
## R-Square:
## Estimate
## kid_score 0.215
#Repeating the regression using the covariances
ks_mat <- cbind(kid_score = kid_scoredata$kid_score,mom_iq = kid_scoredata$mom_iq,mom_hs = kid_scoredata$mom_hs,mom_age= kid_scoredata$mom_age)
head(ks_mat)
## kid_score mom_iq mom_hs mom_age
## [1,] 65 121.11753 1 27
## [2,] 98 89.36188 1 25
## [3,] 85 115.44316 1 27
## [4,] 83 99.44964 1 25
## [5,] 115 92.74571 1 27
## [6,] 98 107.90184 0 18
var_means <- colMeans(ks_mat) # means of each column
var_means
## kid_score mom_iq mom_hs mom_age
## 86.7972350 100.0000000 0.7857143 22.7857143
var_Cov <- cov(ks_mat) # covariance matrix
var_Cov
## kid_score mom_iq mom_hs mom_age
## kid_score 416.596205 137.244279 1.9864731 5.0719235
## mom_iq 137.244279 225.000000 1.7420527 3.7116099
## mom_hs 1.986473 1.742053 0.1687562 0.2380403
## mom_age 5.071923 3.711610 0.2380403 7.2957770
#lavaan fit (using covariances)
cov_fit <- sem(lavaan_mod,sample.cov = var_Cov,sample.mean = var_means, sample.nobs = 434)
cov_fit
## lavaan 0.6.13 ended normally after 26 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 434
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
summary(cov_fit, rsquare = TRUE, standardize = TRUE, header = FALSE)
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## kid_score ~
## mom_iq 0.563 0.060 9.319 0.000 0.563 0.413
## mom_hs 5.647 2.247 2.513 0.012 5.647 0.114
## mom_age 0.225 0.329 0.683 0.495 0.225 0.030
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .kid_score 20.985 9.088 2.309 0.021 20.985 1.029
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .kid_score 326.279 22.149 14.731 0.000 326.279 0.785
##
## R-Square:
## Estimate
## kid_score 0.215
#Results are identical to earlier lavaan fit