library(infer) #install_github("tidymodels/infer")
library(tidyverse)
The most exciting inference of them all!
First, calculate observed mean and save as obs_stat. This will be used in all settings.
# calculate observed mean
obs_stat <- gss %>%
specify(response = hours) %>%
calculate(stat = "mean")
Provided as reference.
set.seed(1234)
# generate a null distribution of means
null_dist <- gss %>%
specify(response = hours) %>%
hypothesize(null = "point", mu = 40) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "mean")
# get p-value
get_p_value(
null_dist,
obs_stat = obs_stat,
direction = "both"
)
## # A tibble: 1 x 1
## p_value
## <dbl>
## 1 0.056
# visualize null_dist with p-value
visualize(null_dist) +
shade_p_value(obs_stat = obs_stat, direction = "both")
# generate a boot distribution of means
boot_dist <- gss %>%
specify(response = hours) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "mean")
# get ci
ci <- get_confidence_interval(boot_dist,
level = 0.95,
type = "se", point_estimate = obs_stat
)
# visualize boot_dist with ci
visualize(boot_dist) +
shade_confidence_interval(endpoints = ci)
generate()Previous suggestions include assume(), theorize(), etc.
# generate a null distribution of means
null_dist <- gss %>%
specify(response = hours) %>%
hypothesize(null = "point", mu = 40) %>%
ASSUME(distribution = "t", df = 499)
# get p-value
get_p_value(
null_dist,
obs_stat = obs_stat,
direction = "both"
)
# visualize null_dist with p-value
visualize(null_dist) +
shade_p_value(obs_stat = obs_stat, direction = "both")
get_confidence_interval()# do nothing instead of generate
# get p-value
get_p_value(
"t", df = 499,
obs_stat = obs_stat,
direction = "both"
)
# visualize null_dist with p-value
visualize(null_dist) +
shade_p_value(obs_stat = obs_stat, direction = "both")
# ASSUME a sampling distribution
null_dist <- ASSUME("t", df = 499)
# get p-value
get_p_value(
null_dist,
obs_stat = obs_stat,
direction = "both"
)
# visualize null_dist with p-value
visualize(null_dist) +
shade_p_value(obs_stat = obs_stat, direction = "both")
generate()Previous suggestions include assume(), theorize(), etc.
# generate a sampling distribution of means
sampling_dist <- gss %>%
specify(response = hours) %>%
ASSUME(distribution = "t", df = 499)
# get ci
# note: no type argument
ci <- get_confidence_interval(sampling_dist,
level = 0.95,
point_estimate = obs_stat)
# visualize sampling_dist with ci
# note: I've never seen a CLT-based ci visualized this way
visualize(sampling_dist) +
shade_confidence_interval(endpoints = ci)
get_confidence_interval()# do nothing instead of generate
# get ci
ci <- get_confidence_interval("t", df = 499,
level = 0.95,
point_estimate = obs_stat)
# visualize sampling_dist with ci
# note: While I've never seen a CLT-based ci visualized this way anyway,
# I don't know how visualize could work because the following wouldn't work
# since the t-distribution is centered at 0
visualize("t", df = 499) +
shade_confidence_interval(endpoints = ci)
# ASSUME a sampling distribution
sampling_dist <- ASSUME("t", df = 499)
# get ci
ci <- get_confidence_interval(sampling_dist,
level = 0.95,
point_estimate = obs_stat)
# visualize sampling_dist with ci
# note: While I've never seen a CLT-based ci visualized this way anyway,
# I don't know how visualize could work because the following wouldn't work
# since the t-distribution is centered at 0
visualize(sampling_dist) +
shade_confidence_interval(endpoints = ci)