Opening dataset and loading packages

setting education as a factor

framingham <- as.data.frame(
  framingham %>%
    mutate(education.f = ifelse(education == 1, "Some HS",
                         ifelse(education == 2, "HS Grad/GED",
                         ifelse(education == 3, "Some College",
                         ifelse(education == 4, "College Grad", NA))))
    )
)

# factor levels
framingham$education.f <- factor(
  framingham$education.f,
  levels = c("Some HS", "HS Grad/GED", "Some College", "College Grad")
)
plot(y = framingham$sysBP, x = factor(framingham$education.f),
     ylab = "sysBP", xlab = "Education Level")

psych::describeBy(framingham$sysBP, group = framingham$education.f)
## 
##  Descriptive statistics by group 
## group: Some HS
##    vars    n   mean    sd median trimmed   mad  min max range skew kurtosis
## X1    1 1720 135.85 23.37    131  133.55 20.76 83.5 295 211.5 1.09     2.03
##      se
## X1 0.56
## ------------------------------------------------------------ 
## group: HS Grad/GED
##    vars    n   mean    sd median trimmed   mad  min max range skew kurtosis  se
## X1    1 1253 130.83 21.24  127.5  128.78 18.53 83.5 248 164.5 1.19     2.52 0.6
## ------------------------------------------------------------ 
## group: Some College
##    vars   n   mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 687 129.38 20.69    125  127.18 17.79  85 215   130 1.19     1.89 0.79
## ------------------------------------------------------------ 
## group: College Grad
##    vars   n   mean    sd median trimmed   mad  min max range skew kurtosis   se
## X1    1 473 128.12 19.44    125     126 17.05 93.5 207 113.5 1.04     1.15 0.89
model1 <- lm(sysBP ~ education.f,
             data = framingham,
             na.action = na.exclude)


anova(model1)
## Analysis of Variance Table
## 
## Response: sysBP
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## education.f    3   38493 12830.9  26.811 < 2.2e-16 ***
## Residuals   4129 1976027   478.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

r-squared

summary(model1)$r.squared
## [1] 0.01910762

Model with coefficients and confidence intervals

summary(model1)
## 
## Call:
## lm(formula = sysBP ~ education.f, data = framingham, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.349 -15.828  -3.849  11.151 159.151 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             135.8494     0.5275 257.542  < 2e-16 ***
## education.fHS Grad/GED   -5.0214     0.8125  -6.180 7.03e-10 ***
## education.fSome College  -6.4681     0.9873  -6.551 6.42e-11 ***
## education.fCollege Grad  -7.7321     1.1358  -6.808 1.13e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.88 on 4129 degrees of freedom
##   (105 observations deleted due to missingness)
## Multiple R-squared:  0.01911,    Adjusted R-squared:  0.01839 
## F-statistic: 26.81 on 3 and 4129 DF,  p-value: < 2.2e-16
coef_table <- cbind(
  summary(model1)$coefficients,
  confint(model1)
)


coef_table
##                           Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)             135.849419  0.5274846 257.541969 0.000000e+00
## education.fHS Grad/GED   -5.021406  0.8125156  -6.180073 7.026962e-10
## education.fSome College  -6.468050  0.9873462  -6.550945 6.419748e-11
## education.fCollege Grad  -7.732082  1.1357912  -6.807662 1.134910e-11
##                              2.5 %     97.5 %
## (Intercept)             134.815265 136.883573
## education.fHS Grad/GED   -6.614374  -3.428438
## education.fSome College  -8.403781  -4.532320
## education.fCollege Grad  -9.958845  -5.505320

Calculating cohen’s d

(MSR <- anova(model1)[2, 3])        
## [1] 478.5728
sqrt_MSR <- sqrt(MSR)               


d_tab <- cbind(
  d = coef(model1)[-1],             
  confint(model1)[-1, ]             
) / sqrt_MSR

d_tab
##                                  d      2.5 %     97.5 %
## education.fHS Grad/GED  -0.2295363 -0.3023533 -0.1567192
## education.fSome College -0.2956646 -0.3841499 -0.2071794
## education.fCollege Grad -0.3534455 -0.4552343 -0.2516567

tukey’s hsd test

tukey_results <- TukeyHSD(aov(model1))


tukey_results
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = model1)
## 
## $education.f
##                                diff        lwr        upr     p adj
## HS Grad/GED-Some HS       -5.021406  -7.109635 -2.9331772 0.0000000
## Some College-Some HS      -6.468050  -9.005607 -3.9304933 0.0000000
## College Grad-Some HS      -7.732082 -10.651155 -4.8130099 0.0000000
## Some College-HS Grad/GED  -1.446645  -4.115760  1.2224707 0.5038012
## College Grad-HS Grad/GED  -2.710677  -5.744810  0.3234566 0.0990635
## College Grad-Some College -1.264032  -4.623267  2.0952031 0.7682029

cohen’s d from post hoc differences and CIs

tukey_d <- TukeyHSD(aov(model1))[[1]][, -4] / sqrt(MSR)
tukey_d
##                                  diff        lwr         upr
## HS Grad/GED-Some HS       -0.22953627 -0.3249925 -0.13408009
## Some College-Some HS      -0.29566464 -0.4116603 -0.17966896
## College Grad-Some HS      -0.35344552 -0.4868809 -0.22001017
## Some College-HS Grad/GED  -0.06612837 -0.1881378  0.05588104
## College Grad-HS Grad/GED  -0.12390925 -0.2626042  0.01478571
## College Grad-Some College -0.05778087 -0.2113367  0.09577499