This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
data(family_college)
##clean dataset ..
college_clean <- family_college %>%
select(teen, parents) %>%
filter(!is.na(teen), !is.na(parents))
##Summary of information and counting
summary(family_college)
## teen parents
## college:445 degree:280
## not :347 not :512
table(college_clean$teen)
##
## college not
## 445 347
table(college_clean$parents)
##
## degree not
## 280 512
##Grouping parent and teen info to see where each person falls+ proportions
college_table <-college_clean %>%
group_by(parents, teen)%>%
summarise(count = n())%>%
mutate(proportion = count/ sum(count))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by parents and teen.
## ℹ Output is grouped by parents.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(parents, teen))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
##Bar graph to visualize the information to find answers
ggplot(college_clean, aes(x = parents, fill = teen))+ geom_bar(position = "fill")+
labs(title= "Teens in College + Parent Education",
x = "Parent Education Status",
y="Proportion")
##Compares counts and tests to see if variables depend on each other.
table_college <-table(college_clean$parents, college_clean$teen)
chi_result <- chisq.test(table_college)
chi_result
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table_college
## X-squared = 120.17, df = 1, p-value < 2.2e-16
chi_result$statistic
## X-squared
## 120.1723
chi_result$p.value
## [1] 5.799686e-28
chi_result$expected
##
## college not
## degree 157.3232 122.6768
## not 287.6768 224.3232