library(haven)
## Warning: package 'haven' was built under R version 4.5.2
addhealth_wave1_wave5 <- read_dta("C:/Users/minur/Downloads/addhealth_wave1_wave5.dta")
View(addhealth_wave1_wave5)
further: haven, ggplot2, dplyr
library(haven)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
addhealth_wave1_wave5 <- read_dta("C:/Users/minur/Downloads/addhealth_wave1_wave5.dta")
dim(addhealth_wave1_wave5)
## [1] 1839 79
summary(addhealth_wave1_wave5)
## id intmonth intyear sch_yr
## Length:1839 Min. : 1.000 Min. :94 Min. :0.0000
## Class :character 1st Qu.: 5.000 1st Qu.:95 1st Qu.:0.0000
## Mode :character Median : 6.000 Median :95 Median :0.0000
## Mean : 6.467 Mean :95 Mean :0.3589
## 3rd Qu.: 7.000 3rd Qu.:95 3rd Qu.:1.0000
## Max. :12.000 Max. :95 Max. :1.0000
##
## sex birthmonth birthyear grade
## Min. :1.000 Min. : 1.000 Min. :74.00 Min. : 7.000
## 1st Qu.:1.000 1st Qu.: 4.000 1st Qu.:78.00 1st Qu.: 8.000
## Median :2.000 Median : 7.000 Median :79.00 Median : 9.000
## Mean :1.619 Mean : 6.582 Mean :79.11 Mean : 9.472
## 3rd Qu.:2.000 3rd Qu.: 9.000 3rd Qu.:80.00 3rd Qu.:11.000
## Max. :2.000 Max. :12.000 Max. :83.00 Max. :12.000
## NA's :30
## chores tvhrs sleep sch_skip
## Min. :0.000 Min. : 0.00 Min. : 3.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 5.00 1st Qu.: 7.000 1st Qu.: 0.000
## Median :2.000 Median :10.00 Median : 8.000 Median : 0.000
## Mean :2.065 Mean :15.83 Mean : 7.793 Mean : 1.625
## 3rd Qu.:3.000 3rd Qu.:21.00 3rd Qu.: 8.000 3rd Qu.: 0.000
## Max. :3.000 Max. :99.00 Max. :13.000 Max. :99.000
## NA's :4 NA's :5 NA's :26
## suspended sch_focus sr_intel H1TO30
## Min. :0.0000 Min. :0.000 Min. :1.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.: 0.00
## Median :0.0000 Median :1.000 Median :4.000 Median : 0.00
## Mean :0.2198 Mean :1.255 Mean :4.054 Mean : 3.48
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:5.000 3rd Qu.: 0.00
## Max. :1.0000 Max. :4.000 Max. :6.000 Max. :18.00
## NA's :1 NA's :26 NA's :3 NA's :13
## H1TO53 H1PA7 nghbrs coll_desire
## Min. :0.0000 Min. :0.000 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:4.000
## Median :0.0000 Median :0.000 Median :1.000 Median :5.000
## Mean :0.2505 Mean :0.207 Mean :1.263 Mean :4.507
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:2.000 3rd Qu.:5.000
## Max. :1.0000 Max. :1.000 Max. :2.000 Max. :5.000
## NA's :11 NA's :18 NA's :40 NA's :3
## coll_likely S1 S3 S10A
## Min. :1.000 Min. :11.00 Min. : 7.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:13.00 1st Qu.: 8.000 1st Qu.:1.000
## Median :5.000 Median :15.00 Median : 9.000 Median :2.000
## Mean :4.274 Mean :14.75 Mean : 9.427 Mean :2.035
## 3rd Qu.:5.000 3rd Qu.:16.00 3rd Qu.:11.000 3rd Qu.:3.000
## Max. :5.000 Max. :19.00 Max. :12.000 Max. :5.000
## NA's :3 NA's :444 NA's :446 NA's :613
## S10B S44 S45A S45B
## Min. :1.000 Min. :0.0000 Min. :0.000 Min. :0.00
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:6.000 1st Qu.:2.00
## Median :2.000 Median :0.0000 Median :8.000 Median :4.00
## Mean :2.215 Mean :0.1221 Mean :6.797 Mean :4.42
## 3rd Qu.:3.000 3rd Qu.:0.0000 3rd Qu.:8.000 3rd Qu.:6.00
## Max. :5.000 Max. :1.0000 Max. :8.000 Max. :8.00
## NA's :600 NA's :439 NA's :497 NA's :502
## S45C S45D S45E S45F S47
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:6.000 1st Qu.:4.000 1st Qu.:2.00
## Median :1.000 Median :0.000 Median :8.000 Median :6.000 Median :2.00
## Mean :1.414 Mean :0.996 Mean :6.585 Mean :5.302 Mean :2.28
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:8.000 3rd Qu.:7.000 3rd Qu.:3.00
## Max. :8.000 Max. :8.000 Max. :8.000 Max. :8.000 Max. :4.00
## NA's :514 NA's :603 NA's :649 NA's :516 NA's :472
## S49 S50 p1_edu p1_work
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:4.000 1st Qu.:1.0000
## Median :1.0000 Median :2.000 Median :7.000 Median :1.0000
## Mean :0.5147 Mean :2.057 Mean :6.031 Mean :0.7545
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:8.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :5.000 Max. :9.000 Max. :1.0000
## NA's :479 NA's :469 NA's :222 NA's :214
## p1_happy p1_benes PA55 AH_PVT
## Min. :0.0000 Min. :0.00000 Min. : 0.00 Min. : 14.0
## 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.: 26.00 1st Qu.: 95.0
## Median :1.0000 Median :0.00000 Median : 44.00 Median :105.0
## Mean :0.9654 Mean :0.06843 Mean : 55.23 Mean :104.5
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.: 65.00 3rd Qu.:115.0
## Max. :1.0000 Max. :1.00000 Max. :999.00 Max. :139.0
## NA's :220 NA's :217 NA's :384 NA's :74
## AH_RAW hisp yob white
## Min. : 1.00 Min. :0.00000 Min. :1974 Min. :0.0000
## 1st Qu.:60.00 1st Qu.:0.00000 1st Qu.:1978 1st Qu.:0.0000
## Median :68.00 Median :0.00000 Median :1979 Median :1.0000
## Mean :67.41 Mean :0.08897 Mean :1979 Mean :0.7233
## 3rd Qu.:75.00 3rd Qu.:0.00000 3rd Qu.:1980 3rd Qu.:1.0000
## Max. :87.00 Max. :1.00000 Max. :1983 Max. :1.0000
## NA's :74 NA's :7 NA's :3
## black natam asian race
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:3.000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :5.000
## Mean :0.2081 Mean :0.03268 Mean :0.0305 Mean :4.118
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:5.000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :5.000
## NA's :3 NA's :3 NA's :3 NA's :5
## milk health famst8 w1_inc
## Min. :0.0000 Min. :1.000 Min. :1.00 Min. : 0
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:1.00 1st Qu.: 26000
## Median :1.0000 Median :2.000 Median :1.00 Median : 44000
## Mean :0.5789 Mean :2.076 Mean :2.71 Mean : 55227
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:6.00 3rd Qu.: 65000
## Max. :1.0000 Max. :5.000 Max. :8.00 Max. :999000
## NA's :1 NA's :384
## ed_p1 w1_inc7 w1loginc bottom25
## Min. :1.00 Min. :1.000 Min. : 7.601 Min. :0.0000
## 1st Qu.:2.00 1st Qu.:3.000 1st Qu.:10.194 1st Qu.:0.0000
## Median :3.00 Median :4.000 Median :10.714 Median :0.0000
## Mean :2.98 Mean :4.195 Mean :10.592 Mean :0.2557
## 3rd Qu.:4.00 3rd Qu.:5.000 3rd Qu.:11.082 3rd Qu.:1.0000
## Max. :5.00 Max. :7.000 Max. :13.815 Max. :1.0000
## NA's :222 NA's :384 NA's :395 NA's :384
## top25 H5Q015 bpm_w5 H5PP
## Min. :0.0000 Min. :0.0000 Min. : 42.00 Min. : 18.0
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 66.50 1st Qu.: 37.5
## Median :0.0000 Median :0.0000 Median : 74.00 Median : 43.0
## Mean :0.2509 Mean :0.2316 Mean : 74.94 Mean : 459.8
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 82.38 3rd Qu.: 49.0
## Max. :1.0000 Max. :1.0000 Max. :123.00 Max. :9999.0
## NA's :384 NA's :77
## H5MAP H5Q045A H5Q045B H5Q045C
## Min. : 65.3 Min. : 0.0000 Min. : 0.0000 Min. : 0.00000
## 1st Qu.: 86.2 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00000
## Median : 93.8 Median : 0.0000 Median : 0.0000 Median : 0.00000
## Mean : 508.6 Mean : 0.2382 Mean : 0.1169 Mean : 0.05982
## 3rd Qu.: 102.2 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.00000
## Max. :9999.0 Max. :95.0000 Max. :95.0000 Max. :95.00000
##
## H5Q045D H5Q045E H5Q045F H5HR1
## Min. : 0.00000 Min. : 0.0000 Min. : 0.0000 Min. :1.000
## 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:1.000
## Median : 0.00000 Median : 0.0000 Median : 0.0000 Median :1.000
## Mean : 0.06362 Mean : 0.0609 Mean : 0.1985 Mean :2.374
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:5.000
## Max. :95.00000 Max. :95.0000 Max. :95.0000 Max. :5.000
## NA's :2
## H5EC1 H5EC2 H5EC8 H5EC9
## Min. : 1.000 Min. : 1.000 Min. :0.0000 Min. : 1.000
## 1st Qu.: 5.000 1st Qu.: 8.000 1st Qu.:0.0000 1st Qu.: 4.000
## Median : 8.000 Median :10.000 Median :0.0000 Median : 6.000
## Mean : 7.117 Mean : 9.238 Mean :0.1735 Mean : 5.554
## 3rd Qu.: 9.000 3rd Qu.:11.000 3rd Qu.:0.0000 3rd Qu.: 7.000
## Max. :13.000 Max. :13.000 Max. :1.0000 Max. :10.000
## NA's :16 NA's :298 NA's :6 NA's :6
## H5ID1 H5ID6G H5ID21 H5CJ3
## Min. :1.000 Min. :0.0000 Min. : 0.00 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.: 0.00 1st Qu.:0.0000
## Median :2.000 Median :0.0000 Median : 1.00 Median :0.0000
## Mean :2.439 Mean :0.2934 Mean : 1.86 Mean :0.3007
## 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.: 3.00 3rd Qu.:1.0000
## Max. :5.000 Max. :1.0000 Max. :15.00 Max. :1.0000
## NA's :2 NA's :2 NA's :22 NA's :13
## H5CJ5 edu_w5
## Min. : 0.00 Min. :1.000
## 1st Qu.: 1.00 1st Qu.:3.000
## Median :97.00 Median :3.000
## Mean :68.21 Mean :3.464
## 3rd Qu.:97.00 3rd Qu.:4.000
## Max. :97.00 Max. :5.000
## NA's :1 NA's :2
data <− data %>% mutate( across (where( is . labelled ) , as factor ))
addhealth_wave1_wave5 <- addhealth_wave1_wave5 %>%
mutate(across(where(is.labelled), as_factor))
options(scipen = 999)
table(addhealth_wave1_wave5$p1_edu)
##
## (1) 8th grade or less
## 46
## (2) >8th grade/didn't graduate high school
## 112
## (3) Business/trade/voc. school instead high school
## 8
## (4) High school graduate
## 395
## (5) Completed a GED
## 52
## (6) Business/trade/voc. school after high school
## 156
## (7) College/didn't graduate
## 329
## (8) Graduated from college/university
## 292
## (9) Prof training beyond 4-year college/univ
## 227
## (10) Never went to school
## 0
## (96) Respondent refused to answer
## 0
is.factor(addhealth_wave1_wave5$p1_edu)
## [1] TRUE
table(addhealth_wave1_wave5$edu_w5)
##
## 1 Less than HS 2 HS/GED 3 Some Coll 4 BA Deg 5 PostBac
## 73 224 678 502 360
is.factor(addhealth_wave1_wave5$edu_w5)
## [1] TRUE
table(addhealth_wave1_wave5$H5EC2)
##
## 1 Less than 5K 2 5K to 9,999 3 10K to 14,999 4 15K to 19,999
## 42 28 34 23
## 5 20K to 24,999 6 25K to 29,999 7 30K to 39,999 8 40K to 49,999
## 41 48 89 110
## 9 50K to 74,999 10 75K to 99,999 11 100K to 149K 12 150K to 199K
## 280 275 303 140
## 13 200K or more
## 128
is.factor(addhealth_wave1_wave5$H5EC2)
## [1] TRUE
## Bar Graph for Parent's Education
barplot(prop.table(table(addhealth_wave1_wave5$ed_p1))*100,
main = "Parent's Education",
names.arg = c("Less than HS", "HS Diploma", "Some College", "BA Degree", "PostBac"),
ylim = c(0, 35),
ylab = "Percentage")
## Bar Graph for Educational Attainment
barplot(prop.table(table(addhealth_wave1_wave5$edu_w5))*100,
main = "Educational Attainment",
names.arg = c("Less than HS", "HS Diploma", "Some College", "BA Degree", "PostBac"),
ylim = c(0, 40),
ylab = "Percentage")
table(addhealth_wave1_wave5$H5EC2)
##
## 1 Less than 5K 2 5K to 9,999 3 10K to 14,999 4 15K to 19,999
## 42 28 34 23
## 5 20K to 24,999 6 25K to 29,999 7 30K to 39,999 8 40K to 49,999
## 41 48 89 110
## 9 50K to 74,999 10 75K to 99,999 11 100K to 149K 12 150K to 199K
## 280 275 303 140
## 13 200K or more
## 128
## Bar Graph for Household Income
barplot(prop.table(table(addhealth_wave1_wave5$H5EC2))*100,
main = "Household Income",
names.arg = c("<5", "5 to 9.9", "10 to 14.9", "15 to 19.9", "20 to 24.9", "25 to 29.9", "30 to 39.9", "40 to 49.9", "50 to 74.9", "75 to 99.9", "100 to 149", "150 to 199", ">200"),
xlim = c(0, 30),
ylim = c(0, 25),
ylab = "Percentage",
xlab = "Income per 1000 Dollars")
## 2(b) Provide a written description of the distribution of the
variable you find most interesting.
I found the educational attainment variable to be the most interesting as it appears to be an asymmetric, negatively skewed distribution, The graph shows that the distribution is not normally distributed. The frequency of those with less than a high school diploma seem to be dragging the mean down compared to the median leading to the negative skew. The median shows that the educational attainment achieved is at some college.
table(addhealth_wave1_wave5$ed_p1)
##
## 1 Less than HS 2 HS Diploma 3 Some College 4 BA Degree 5 PostBac
## 166 447 485 292 227
is.factor(addhealth_wave1_wave5$ed_p1)
## [1] TRUE
levels(addhealth_wave1_wave5$ed_p1)
## [1] "1 Less than HS" "2 HS Diploma" "3 Some College" "4 BA Degree"
## [5] "5 PostBac"
subdata <-subset(addhealth_wave1_wave5, ed_p1 %in% c("2 HS Diploma", "4 BA Degree", "5 PostBac"))
subdata$parent_ed2 <- ifelse(subdata$ed_p1 == "2 HS Diploma",
"High School Diploma",
"College Degree")
table(subdata$parent_ed2)
##
## College Degree High School Diploma
## 519 447
table(addhealth_wave1_wave5$ed_p1)
##
## 1 Less than HS 2 HS Diploma 3 Some College 4 BA Degree 5 PostBac
## 166 447 485 292 227
hsplot <- ggplot(subset(subdata, parent_ed2 == "High School Diploma"),
aes(x = H5EC2)) +
geom_bar(fill = "pink") +
labs(title = "Income for Adults with Parent's Education of High School Diploma") +
theme_minimal() +
ylim(0, 100) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
hsplot
hsplot <- ggplot(subset(subdata, parent_ed2 == "College Degree"),
aes(x = H5EC2)) +
geom_bar(fill = "pink") +
labs(title = "Income for Adults with Parent's Education of College Degree") +
theme_minimal() +
ylim(0, 100) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
hsplot
Although both show to be not a normal distribution and negatively skewed in its distribution, the income for adults with parents who have a college degree shows a more extreme negative skew, with both the median and the mean being larger compared to the income for adults with parents who only have a high school dipoma. Comparitvely, adults with parents who have a college degree seemingly earn more than adults with parents who have a high school diploma.
Step 1, Assumptions: * Independent random sampling, nominal level of measurement. IV: Parent’s educational attainment. DV: Adult educational attainment. Step 2, Null Hypothesis: * H0: fo = fe There is no relationship between childhood SES as measured by parent’s education and adult SES. * H1: fo =/= fe There is a relationship between childhood SES as measured by parent’s education and adult SES. Step 3, Critical Region: * x^2 distribution, alpha = 0.05. x^2 (critical) = 73.31 for df = 55
# df = (r-1)(c-1) #
dim(table(addhealth_wave1_wave5$ed_p1, addhealth_wave1_wave5$edu_w5))
## [1] 5 5
## [1] 5 5
# Chi square(critical) in R #
qchisq(0.05, 55, lower.tail = FALSE)
## [1] 73.31149
## [1] 73.31149
Step 4, Calculate Test Statistic:
chisq.test(table(addhealth_wave1_wave5$ed_p1, addhealth_wave1_wave5$edu_w5))
##
## Pearson's Chi-squared test
##
## data: table(addhealth_wave1_wave5$ed_p1, addhealth_wave1_wave5$edu_w5)
## X-squared = 394.28, df = 16, p-value < 0.00000000000000022
##
## Pearson's Chi-squared test
##
## data: table(addhealth_wave1_wave5$ed_p1, addhealth_wave1_wave5$edu_w5)
## X-squared = 394.28, df = 16, p-value < 0.00000000000000022
Step 5, Make a Decision and Interpret Results:
Our Chi2 hypothesis test showed that is a statistically significant association between adult SES and childhood SES, therefore, we reject the null hypothesis of independence. The test shows that there is a relationship between the adult SES and childhood SES at the 0.05 level due to our p-value being significantly smaller than our alpha.
table(addhealth_wave1_wave5$bpm_w5)
##
## 42 44 45 46.5 47 48 48.5 49 49.5 50 50.5 51.5 52
## 1 1 1 1 2 2 1 1 3 4 3 2 5
## 52.5 53 53.5 54 54.5 55 55.5 56 56.5 57 57.5 58 58.5
## 1 12 2 7 4 6 3 6 8 9 5 12 12
## 59 59.5 60 60.5 61 61.5 62 62.5 63 63.5 64 64.5 65
## 10 17 19 19 21 13 24 19 20 28 20 12 22
## 65.5 66 66.5 67 67.5 68 68.5 69 69.5 70 70.5 71 71.5
## 35 27 27 30 25 32 22 28 34 37 25 30 34
## 72 72.5 73 73.5 74 74.5 75 75.5 76 76.5 77 77.5 78
## 30 29 35 30 38 28 37 27 28 23 23 28 26
## 78.5 79 79.5 80 80.5 81 81.5 82 82.5 83 83.5 84 84.5
## 22 34 24 30 24 24 24 13 28 14 13 19 14
## 85 85.5 86 86.5 87 87.5 88 88.5 89 89.5 90 90.5 91
## 22 20 14 17 14 9 14 11 22 11 8 12 13
## 91.5 92 92.5 93 93.5 94 94.5 95 95.5 96 96.5 97 97.5
## 13 12 10 7 10 15 5 6 3 8 2 7 1
## 98 98.5 99 99.5 100 100.5 101 101.5 102 102.5 103 103.5 104
## 3 3 4 1 5 4 5 3 1 2 3 4 4
## 104.5 105 106.5 107 107.5 108.5 110 111 111.5 112.5 113 114 116.5
## 4 2 2 1 1 3 2 1 3 1 1 1 1
## 117.5 123
## 1 1
is.factor(addhealth_wave1_wave5$bpm_w5)
## [1] FALSE
## [1] FALSE
addhealth_wave1_wave5$bpm_w2 <- factor(addhealth_wave1_wave5$bpm_w5)
is.factor(addhealth_wave1_wave5$bpm_w2)
## [1] TRUE
## [1] TRUE
levels(addhealth_wave1_wave5$bpm_w2)
## [1] "42" "44" "45" "46.5" "47" "48" "48.5" "49" "49.5"
## [10] "50" "50.5" "51.5" "52" "52.5" "53" "53.5" "54" "54.5"
## [19] "55" "55.5" "56" "56.5" "57" "57.5" "58" "58.5" "59"
## [28] "59.5" "60" "60.5" "61" "61.5" "62" "62.5" "63" "63.5"
## [37] "64" "64.5" "65" "65.5" "66" "66.5" "67" "67.5" "68"
## [46] "68.5" "69" "69.5" "70" "70.5" "71" "71.5" "72" "72.5"
## [55] "73" "73.5" "74" "74.5" "75" "75.5" "76" "76.5" "77"
## [64] "77.5" "78" "78.5" "79" "79.5" "80" "80.5" "81" "81.5"
## [73] "82" "82.5" "83" "83.5" "84" "84.5" "85" "85.5" "86"
## [82] "86.5" "87" "87.5" "88" "88.5" "89" "89.5" "90" "90.5"
## [91] "91" "91.5" "92" "92.5" "93" "93.5" "94" "94.5" "95"
## [100] "95.5" "96" "96.5" "97" "97.5" "98" "98.5" "99" "99.5"
## [109] "100" "100.5" "101" "101.5" "102" "102.5" "103" "103.5" "104"
## [118] "104.5" "105" "106.5" "107" "107.5" "108.5" "110" "111" "111.5"
## [127] "112.5" "113" "114" "116.5" "117.5" "123"
hist(addhealth_wave1_wave5$bpm_w5,
col = c("lightblue"),
main = "BPM",
xlab = "BPM", ylab = "Frequency")
The dependent variable is BPM, which uses an interval-ratio level of measurement as values are inherently numeric with clear distinctions. The histogram shows an approximately symmetrical, bell-shaped curve, which shows a normal distribution. Therefore, we can say that BPM is about normally distributed according the histogram.
Step 1, Assumptions: *Independent random sampling, interval-rati level of measurement, noraml sampling distribution, population variances are equal.
Step 2, Null Hypothesis: * H0: u1 = u2 There is no statistically significant difference in mean heart rate between those whose families were in the lowest 25th percentile for income and those whose families were not. Childhood poverty is not associated with a higher risk of heart disease in adulthood. * H1: u1 =/= u2 There is a statistically significant difference in mean heart rate between those whose families were in the lowest 25th percentile for income and those whose families were not. Childhood poverty is associated with a higher risk of heart disease in adulthood.
Step 3, Critical Region: * T-distribution, alpha = 0.05, two-tailed t-test. * T(critical) is +/- 1.96 for 634 degrees of freedom.
t.test(bpm_w5 ~ bottom25, data = addhealth_wave1_wave5)
##
## Welch Two Sample t-test
##
## data: bpm_w5 by bottom25
## t = -3.2173, df = 634.09, p-value = 0.00136
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -3.6497048 -0.8830587
## sample estimates:
## mean in group 0 mean in group 1
## 74.34615 76.61254
## Welch Two Sample t-test
##
## data: bpm_w5 by bottom25
## t = -3.2173, df = 634.09, p-value = 0.00136
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -3.6497048 -0.8830587
## sample estimates:
## mean in group 0 mean in group 1
## 74.34615 76.61254
Step 4, Calculations:
t.test(addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 0], addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 1])
##
## Welch Two Sample t-test
##
## data: addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 0] and addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 1]
## t = -3.2173, df = 634.09, p-value = 0.00136
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.6497048 -0.8830587
## sample estimates:
## mean of x mean of y
## 74.34615 76.61254
## Welch Two Sample t-test
##
## data: addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 0] and ## addhealth_wave1_wave5$bpm_w5[addhealth_wave1_wave5$bottom25 == 1]
## t = -3.2173, df = 634.09, p-value = 0.00136
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.6497048 -0.8830587
## sample estimates:
## mean of x mean of y
## 74.34615 76.61254
Step 5, Conclusion: * P-value is 0.00136, which is less than our alpha of 0.05. * t(obtained) is -3.217, which is smaller than our t(critical) of +/-1.96. * We reject the null hypothesis. * At an alpha level of 0.05, we can conclude that there is a statistically signficant difference in the mean heart rate between those whose families were in the lowest 25th percentile for income and those whose families were not.
## 4(c)Write a few sentences interpreting the results of the hypothesis test.
Our two-tailed t-test showed that there is a statistically significant difference between in mean heart rate between those whose families were in the lowest 25th percentile for income and those whose families were not. Therefore, we reject the null hypothesis. The test shows that there is a relationship between a higher risk of heart disease in adulthood and childhood poverty at the 0.05 level due to the p-value being smaller than the alpha and our t(obtained) being smaller than our t(critical).
## 5. Investigate this question using another measure of childhood SES: parent’s education. Is parent’s education associated with the child’s heart health later in adulthood? Is this relationship significant at the 0.05 level?
## 5(a) Use a boxplot to compare bpm by parent’s education.
``` r
boxplot (bpm_w5 ~ ed_p1, data = addhealth_wave1_wave5,
main = "BPM by Parent's Education",
xlab = "Parent's Education", ylab = "BPM",
col = c("lightpink", "purple", "lightblue", "navyblue"),
names = c("Less than HS", "HS Diploma", "Some College", "BA Degree", "PostBac"))
addhealth_wave1_wave5$p1_edu_num <-as.numeric(as.factor(addhealth_wave1_wave5$ed_p1))
table(addhealth_wave1_wave5$p1_edu_num)
##
## 1 2 3 4 5
## 166 447 485 292 227
addhealth_wave1_wave5 <- addhealth_wave1_wave5 %>%
mutate(
p1_edu2 = case_when(
p1_edu_num == 1 ~ "1 Less than HS",
p1_edu_num == 2 ~ "2 HS Diplo/GED",
p1_edu_num == 3 ~ "3 Some College",
p1_edu_num == 4 ~ "4 Coll. degr +",
p1_edu_num == 5 ~ "4 Coll. degr +",
TRUE ~ NA_character_
)
)
table(addhealth_wave1_wave5$p1_edu2)
##
## 1 Less than HS 2 HS Diplo/GED 3 Some College 4 Coll. degr +
## 166 447 485 519
table(addhealth_wave1_wave5$p1_edu2, addhealth_wave1_wave5$bpm_w5)
##
## 42 44 45 46.5 47 48 48.5 49 49.5 50 50.5 51.5 52 52.5 53 53.5
## 1 Less than HS 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 2 HS Diplo/GED 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0
## 3 Some College 0 0 0 1 0 1 0 1 1 2 0 1 2 0 0 0
## 4 Coll. degr + 1 1 1 0 2 1 0 0 1 2 1 1 2 1 8 2
##
## 54 54.5 55 55.5 56 56.5 57 57.5 58 58.5 59 59.5 60 60.5 61
## 1 Less than HS 1 0 0 0 1 0 0 0 1 0 0 2 0 0 4
## 2 HS Diplo/GED 0 0 1 1 1 1 1 1 3 3 2 3 3 2 2
## 3 Some College 2 0 3 2 1 5 2 2 4 1 4 3 5 5 4
## 4 Coll. degr + 1 1 2 0 3 1 5 2 2 4 3 8 7 8 10
##
## 61.5 62 62.5 63 63.5 64 64.5 65 65.5 66 66.5 67 67.5 68 68.5
## 1 Less than HS 0 1 1 2 1 4 2 1 1 2 3 0 0 3 3
## 2 HS Diplo/GED 2 7 6 5 4 6 5 8 9 10 6 7 12 2 4
## 3 Some College 2 6 2 7 10 4 0 6 7 5 13 10 4 12 9
## 4 Coll. degr + 7 8 8 6 10 4 5 4 12 7 4 10 6 13 5
##
## 69 69.5 70 70.5 71 71.5 72 72.5 73 73.5 74 74.5 75 75.5 76
## 1 Less than HS 2 3 4 4 4 4 2 3 6 1 6 4 2 3 3
## 2 HS Diplo/GED 2 8 4 5 10 7 6 4 9 8 10 5 15 6 7
## 3 Some College 7 8 16 3 7 8 9 9 8 12 8 8 10 7 7
## 4 Coll. degr + 11 13 9 8 8 13 12 10 10 8 9 8 6 7 9
##
## 76.5 77 77.5 78 78.5 79 79.5 80 80.5 81 81.5 82 82.5 83 83.5
## 1 Less than HS 3 5 3 2 3 3 2 2 1 2 4 2 3 0 0
## 2 HS Diplo/GED 8 6 6 10 9 7 8 10 7 5 8 3 10 4 4
## 3 Some College 4 6 8 6 5 12 6 9 4 7 4 3 4 4 2
## 4 Coll. degr + 5 3 7 5 2 8 5 5 10 8 6 3 9 3 3
##
## 84 84.5 85 85.5 86 86.5 87 87.5 88 88.5 89 89.5 90 90.5 91
## 1 Less than HS 1 1 3 2 3 1 1 0 3 2 3 0 2 1 1
## 2 HS Diplo/GED 4 5 6 3 3 6 4 3 2 2 4 2 1 2 5
## 3 Some College 6 1 7 8 4 3 5 4 2 2 6 3 1 3 4
## 4 Coll. degr + 4 5 4 3 4 3 2 1 5 2 5 3 2 3 2
##
## 91.5 92 92.5 93 93.5 94 94.5 95 95.5 96 96.5 97 97.5 98 98.5
## 1 Less than HS 2 1 1 1 1 1 0 1 0 0 0 0 1 1 0
## 2 HS Diplo/GED 3 3 4 2 4 5 1 2 0 1 0 3 0 1 1
## 3 Some College 3 3 0 1 1 2 2 1 2 2 2 0 0 0 0
## 4 Coll. degr + 3 4 2 1 2 5 1 0 0 4 0 3 0 0 0
##
## 99 99.5 100 100.5 101 101.5 102 102.5 103 103.5 104 104.5 105
## 1 Less than HS 1 0 0 1 0 0 0 0 1 0 0 0 1
## 2 HS Diplo/GED 0 0 2 1 1 1 0 1 1 0 0 0 0
## 3 Some College 1 0 0 1 4 2 0 1 0 1 4 0 1
## 4 Coll. degr + 2 1 3 1 0 0 1 0 1 3 0 3 0
##
## 106.5 107 107.5 108.5 110 111 111.5 112.5 113 114 116.5 117.5
## 1 Less than HS 0 0 0 0 0 0 0 0 0 0 1 0
## 2 HS Diplo/GED 1 0 0 0 0 0 0 1 0 0 0 0
## 3 Some College 0 0 0 0 1 1 2 0 0 1 0 1
## 4 Coll. degr + 0 0 1 1 1 0 1 0 0 0 0 0
##
## 123
## 1 Less than HS 0
## 2 HS Diplo/GED 0
## 3 Some College 1
## 4 Coll. degr + 0
edinc_summary <- addhealth_wave1_wave5 %>%
filter(!is.na(p1_edu2)) %>%
group_by(p1_edu2) %>%
summarise(
mean_bpm = mean(bpm_w5, na.rm = TRUE),
n_group = sum(!is.na(bpm_w5))
) %>%
ungroup() %>%
mutate(
total_n = sum(n_group)
)
edinc_summary
## # A tibble: 4 × 4
## p1_edu2 mean_bpm n_group total_n
## <chr> <dbl> <int> <int>
## 1 1 Less than HS 76.6 159 1548
## 2 2 HS Diplo/GED 75.7 424 1548
## 3 3 Some College 74.9 460 1548
## 4 4 Coll. degr + 73.1 505 1548
## p1_edu2 mean_bpm n_group total_n
## 1 Less than HS 76.63522 159 1548
## 2 HS Diplo/GED 75.69811 424 1548
## 3 Some College 74.91522 460 1548
## 4 Coll. degr + 73.08713 505 1548
Step 1, Assumptions: *Independent random sampling, interval-ratio level of measurement, populations are normally distributed, population variances are equal.
Step 2, Null Hypothesis: * H0: u1 = u2 = u3 = u4 There is no difference in the mean bpm between the populations (parents education). Adult resting heart rate is not related to the educational attainment level of parents. * H1: At least one population mean is different. There is at least one difference in the mean BPM for adults with parents that have: less than a high school diploma, a high school diploma, some college, and a college degree or more. Adult resting heart rate is related to the educational attainment level of parents.
Step 3, Critical Region: *F distirbution, alpha = 0.05, F(critical) is 2.61 for dfw of 1835 and dfb of 3.
# dfw = n-k #
length(addhealth_wave1_wave5$bpm_w5) - length(table(addhealth_wave1_wave5$p1_edu2))
## [1] 1835
## [1] 1835
# dfb = k-1 #
length(table(addhealth_wave1_wave5$p1_edu2)) - 1
## [1] 3
## [1] 3
# F(critical) in R #
qf(0.05, 3, 1835, lower.tail = FALSE)
## [1] 2.609752
# [1] 2.609752
Step 4, Calculate Test Statistic:
anova1 <- anova(lm(bpm_w5 ~ factor(p1_edu2), data = addhealth_wave1_wave5))
anova1
## Analysis of Variance Table
##
## Response: bpm_w5
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(p1_edu2) 3 2353 784.24 5.7877 0.0006213 ***
## Residuals 1544 209212 135.50
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: bpm_w5
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(p1_edu2) 3 2353 784.24 5.7877 0.0006213 ***
## Residuals 1544 209212 135.50
## ---
## Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step 5, Make a Decision and Interpret Results: F(obtained) = 5.7877 which is beyond our F(critical) of 2.61 p-value is 0.0006213 which is less than our alpha of 0.05 Reject the null hypothesis. Adult resting heart rate is related to parent’s educational attainment level. *At least one mean BPM for adults with parents that have: less than a high school diploma, a high school diploma, some college, and a college degree or more, differs.
## 5(c) Write a short paragraph interpreting the results of your analysis and the relationship between these variables.
* People with a higher resting heart rate are at a higher risk of developing heart disease, and we can see by looking at this boxplot that the median BPM is the highest for those with parents that have less than a High School diploma, whereas the median is the lowest for parents with a college degree as seen by both the low median BPM in BA and PostBac degrees.
* Additionally, through our ANOVA test, we have concluded that adult resting heart rate is related to the educational attainment level of parents due to finding at least one statistically significant difference in the mean BPM for adults with parents that have: less than a high school diploma, a high school diploma, some college, and a college degree or more.
* Therefore, we can conclude from both measures that there is a relationship between a child’s heart health (BPM) later in adulthood and their parent’s education. The relationship is significant at the alpha level of 0.05.