| title: “Lab 5: Statistical Inference” |
| author: “Evan McLaughlin” |
date: “10.4.2020” |
knitr::opts_chunk$set(eval = TRUE, results = FALSE, fig.show = "show", message = FALSE)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.0.4
library(openintro)
library(ggplot2)
library(dplyr)
library(infer)
## Warning: package 'infer' was built under R version 4.0.3
library(trelliscopejs)
## Warning: package 'trelliscopejs' was built under R version 4.0.3
global_monitor <- tibble(scientist_work = c(rep("Benefits", 80000), rep("Doesn't benefit", 20000)))
ggplot(global_monitor, aes(x = scientist_work)) + geom_bar() + labs(x = "", y = "", title = "Do you believe that the work scientists do benefit people like you?") + coord_flip()
global_monitor %>%
count(scientist_work) %>%
mutate(p = n /sum(n))
samp1 <- global_monitor %>%
sample_n(50)
samp1 %>%
count(scientist_work) %>%
mutate(p_samp = n /sum(n))
###samp1 %>%
###count(scientist_work) %>%
###mutate(p_hat = n /sum(n))
samp2 <- global_monitor %>%
sample_n(50)
samp2 %>%
count(scientist_work) %>%
mutate(p_samp2 = n /sum(n))
samp_h <- global_monitor %>%
sample_n(100)
samp_h %>%
count(scientist_work) %>%
mutate(p_samp_h = n /sum(n))
samp_m <- global_monitor %>%
sample_n(1000)
samp_m %>%
count(scientist_work) %>%
mutate(p_samp_m = n /sum(n))
sample_props50 <- global_monitor %>%
rep_sample_n(size = 50, reps = 15000, replace = TRUE) %>%
count(scientist_work) %>%
mutate(p_hat = n /sum(n)) %>%
filter(scientist_work == "Doesn't benefit")
ggplot(data = sample_props50, aes(x = p_hat)) +
geom_histogram(binwidth = 0.02) + labs(x = "p_hat (Doesn't benefit)", title = "Sampling distribution of p_hat", subtitle = "Sample size = 50, Number of samples = 15000"
)
sd <- sqrt((mean(sample_props50$p_hat)) * (1-mean(sample_props50$p_hat)) / 50)
### Not sure why this didn't work - Evan to check
sd
sample_props50 %>%
ggplot() + geom_histogram(aes(x = p_hat), binwidth = 0.075) + xlab("sample props")
### Exercise A-4
global_monitor %>%
sample_n(size = 50, replace = TRUE) %>%
count(scientist_work) %>%
mutate(p_hat = n /sum(n)) %>%
filter(scientist_work == "Doesn't benefit")
sample_props_small <- global_monitor %>%
rep_sample_n(size = 10, reps = 25, replace = TRUE) %>%
count(scientist_work) %>%
mutate(p_hat = n /sum(n)) %>%
filter(scientist_work == "Doesn't benefit")
sample_props_small
ggplot(data = sample_props50, aes(x = p_hat)) +
geom_histogram(binwidth = 0.02)
### Exercise A-6
us_adults <- tibble(climate_change_affects = c(rep("Yes", 62000), rep("No", 38000)))
ggplot(us_adults, aes(x = climate_change_affects)) +
geom_bar() + labs(x = "", y = "", title = "Do you think climate change is affecting your local community?") + coord_flip()
us_adults %>%
count(climate_change_affects) %>%
mutate(p = n /sum(n))