Opening dataset and loading packages
setting education as a factor
framingham <- as.data.frame(
framingham %>%
mutate(education.f = ifelse(education == 1, "Some HS",
ifelse(education == 2, "HS Grad/GED",
ifelse(education == 3, "Some College",
ifelse(education == 4, "College Grad", NA))))
)
)
# factor levels
framingham$education.f <- factor(
framingham$education.f,
levels = c("Some HS", "HS Grad/GED", "Some College", "College Grad")
)
plot(y = framingham$sysBP, x = factor(framingham$education.f),
ylab = "sysBP", xlab = "Education Level")

psych::describeBy(framingham$sysBP, group = framingham$education.f)
##
## Descriptive statistics by group
## group: Some HS
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1720 135.85 23.37 131 133.55 20.76 83.5 295 211.5 1.09 2.03
## se
## X1 0.56
## ------------------------------------------------------------
## group: HS Grad/GED
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1253 130.83 21.24 127.5 128.78 18.53 83.5 248 164.5 1.19 2.52 0.6
## ------------------------------------------------------------
## group: Some College
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 687 129.38 20.69 125 127.18 17.79 85 215 130 1.19 1.89 0.79
## ------------------------------------------------------------
## group: College Grad
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 473 128.12 19.44 125 126 17.05 93.5 207 113.5 1.04 1.15 0.89
model1 <- lm(sysBP ~ education.f,
data = framingham,
na.action = na.exclude)
anova(model1)
## Analysis of Variance Table
##
## Response: sysBP
## Df Sum Sq Mean Sq F value Pr(>F)
## education.f 3 38493 12830.9 26.811 < 2.2e-16 ***
## Residuals 4129 1976027 478.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
r-squared
summary(model1)$r.squared
## [1] 0.01910762
Model with coefficients and confidence intervals
summary(model1)
##
## Call:
## lm(formula = sysBP ~ education.f, data = framingham, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.349 -15.828 -3.849 11.151 159.151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.8494 0.5275 257.542 < 2e-16 ***
## education.fHS Grad/GED -5.0214 0.8125 -6.180 7.03e-10 ***
## education.fSome College -6.4681 0.9873 -6.551 6.42e-11 ***
## education.fCollege Grad -7.7321 1.1358 -6.808 1.13e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.88 on 4129 degrees of freedom
## (105 observations deleted due to missingness)
## Multiple R-squared: 0.01911, Adjusted R-squared: 0.01839
## F-statistic: 26.81 on 3 and 4129 DF, p-value: < 2.2e-16
coef_table <- cbind(
summary(model1)$coefficients,
confint(model1)
)
coef_table
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.849419 0.5274846 257.541969 0.000000e+00
## education.fHS Grad/GED -5.021406 0.8125156 -6.180073 7.026962e-10
## education.fSome College -6.468050 0.9873462 -6.550945 6.419748e-11
## education.fCollege Grad -7.732082 1.1357912 -6.807662 1.134910e-11
## 2.5 % 97.5 %
## (Intercept) 134.815265 136.883573
## education.fHS Grad/GED -6.614374 -3.428438
## education.fSome College -8.403781 -4.532320
## education.fCollege Grad -9.958845 -5.505320
Calculating cohen’s d
(MSR <- anova(model1)[2, 3])
## [1] 478.5728
sqrt_MSR <- sqrt(MSR)
d_tab <- cbind(
d = coef(model1)[-1],
confint(model1)[-1, ]
) / sqrt_MSR
d_tab
## d 2.5 % 97.5 %
## education.fHS Grad/GED -0.2295363 -0.3023533 -0.1567192
## education.fSome College -0.2956646 -0.3841499 -0.2071794
## education.fCollege Grad -0.3534455 -0.4552343 -0.2516567
tukey’s hsd test
tukey_results <- TukeyHSD(aov(model1))
tukey_results
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = model1)
##
## $education.f
## diff lwr upr p adj
## HS Grad/GED-Some HS -5.021406 -7.109635 -2.9331772 0.0000000
## Some College-Some HS -6.468050 -9.005607 -3.9304933 0.0000000
## College Grad-Some HS -7.732082 -10.651155 -4.8130099 0.0000000
## Some College-HS Grad/GED -1.446645 -4.115760 1.2224707 0.5038012
## College Grad-HS Grad/GED -2.710677 -5.744810 0.3234566 0.0990635
## College Grad-Some College -1.264032 -4.623267 2.0952031 0.7682029
cohen’s d from post hoc differences and CIs
tukey_d <- TukeyHSD(aov(model1))[[1]][, -4] / sqrt(MSR)
tukey_d
## diff lwr upr
## HS Grad/GED-Some HS -0.22953627 -0.3249925 -0.13408009
## Some College-Some HS -0.29566464 -0.4116603 -0.17966896
## College Grad-Some HS -0.35344552 -0.4868809 -0.22001017
## Some College-HS Grad/GED -0.06612837 -0.1881378 0.05588104
## College Grad-HS Grad/GED -0.12390925 -0.2626042 0.01478571
## College Grad-Some College -0.05778087 -0.2113367 0.09577499