R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
data(family_college)

##clean dataset ..

college_clean <- family_college %>%
  select(teen, parents) %>%
  filter(!is.na(teen), !is.na(parents))

##Summary of information and counting

summary(family_college)
##       teen       parents   
##  college:445   degree:280  
##  not    :347   not   :512
table(college_clean$teen)
## 
## college     not 
##     445     347
table(college_clean$parents)
## 
## degree    not 
##    280    512

##Grouping parent and teen info to see where each person falls+ proportions

college_table <-college_clean %>%
  group_by(parents, teen)%>%
  summarise(count = n())%>%
  mutate(proportion = count/ sum(count))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by parents and teen.
## ℹ Output is grouped by parents.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(parents, teen))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

##Bar graph to visualize the information to find answers

ggplot(college_clean, aes(x = parents, fill = teen))+ geom_bar(position = "fill")+
         labs(title= "Teens in College + Parent Education",
          x = "Parent Education Status",
          y="Proportion")

##Compares counts and tests to see if variables depend on each other.

table_college <-table(college_clean$parents, college_clean$teen)
chi_result <- chisq.test(table_college)
chi_result
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_college
## X-squared = 120.17, df = 1, p-value < 2.2e-16
chi_result$statistic
## X-squared 
##  120.1723
chi_result$p.value
## [1] 5.799686e-28
chi_result$expected
##         
##           college      not
##   degree 157.3232 122.6768
##   not    287.6768 224.3232