knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(infer)
table (yrbss$text_while_driving_30d)
## 
##             0           1-2         10-19         20-29           3-5 
##          4792           925           373           298           493 
##            30           6-9 did not drive 
##           827           311          4646
yrbss %>%filter(text_while_driving_30d =="30" & helmet_12m =="never") %>%nrow()/nrow(yrbss)
## [1] 0.03408673

E1&2

0= 4792, 1-2 = 925, 6-9 = 311, 10-19 = 373, 20-29 = 298, 30 = 827, DND = 4646

The proportion is 0.034

p <-yrbss %>%filter(text_while_driving_30d =="30" & yrbss$helmet_12m =="never") %>%nrow()/nrow(yrbss)

ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.003051546

Exercise 3

ME=0.003

table(yrbss$physically_active_7d)
## 
##    0    1    2    3    4    5    6    7 
## 2172  962 1270 1451 1265 1728  840 3622
n1 <-yrbss %>%filter( physically_active_7d > 6) %>%nrow(); n1
## [1] 3622
n = nrow(yrbss); n
## [1] 13583
p <- n1/n; p
## [1] 0.2666569
ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.007436836
c(p - ME, p + ME)
## [1] 0.2592200 0.2740937
table(yrbss$strength_training_7d)
## 
##    0    1    2    3    4    5    6    7 
## 3632 1012 1305 1468 1059 1333  513 2085
n1 <-yrbss %>%filter( strength_training_7d > 3) %>%nrow(); n1
## [1] 4990
n = nrow(yrbss); n
## [1] 13583
p <- n1/n; p
## [1] 0.367371
ME<-1.96*sqrt(p*(1-p)/nrow(yrbss))
ME
## [1] 0.008107467
c(p - ME, p + ME)
## [1] 0.3592635 0.3754784
n <- 1000
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) + 
  geom_line() +
  labs(x = "Population Proportion", y = "Margin of Error")

n <- 10
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) + 
  geom_line() +
  labs(x = "Population Proportion", y = "Margin of Error")

## Exercise 5

The ME seems to increase with the population increase. At 50% it begins to decrease

n <- 300
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) + 
  geom_line() +
  labs(x = "Population Proportion", y = "Margin of Error")

Exercise 6

The shape appears to be mostly normal

p<-0.1
n<-300

(p*(1-p)/n)^.5
## [1] 0.01732051
.1-(p*(1-p)/n)^.5
## [1] 0.08267949
.1+(p*(1-p)/n)^.5
## [1] 0.1173205
p<-0.1
n<-400

(p*(1-p)/n)^.5
## [1] 0.015
.1-(p*(1-p)/n)^.5
## [1] 0.085
.1+(p*(1-p)/n)^.5
## [1] 0.115

Exercise 8

As n increases, p becomes tighter

sleep <- yrbss  %>%
  filter(school_night_hours_sleep == "10+")

strengthTraining <- yrbss %>%
  mutate(text_ind = ifelse(strength_training_7d == "7", "yes", "no"))
strengthTraining %>%
  filter(text_ind != "") %>%
  specify(response = text_ind, success = "yes") %>%
  generate(reps = 1000, type = "bootstrap") %>%
  calculate(stat = "prop") %>%
  get_ci(level = 0.95)

Exercise 10

Type 1 is a false positive, so we can reject the null hypothesis.

Exercise 11

ME= 1.96SE = 1.96 (p(1-p)/n)^0.5