Monte Carlo simulation taking a set of polls results.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dslabs)
d <- 0.039
Ns <- c(1298, 533, 1342, 897, 774, 254, 812, 324, 1291, 1056, 2172, 516)
p <- (d + 1) / 2
polls <- map_df(Ns, function(N) {
x <- sample(c(0,1), size=N, replace=TRUE, prob=c(1-p, p))
x_hat <- mean(x)
se_hat <- sqrt(x_hat * (1 - x_hat) / N)
list(estimate = 2 * x_hat - 1,
low = 2*(x_hat - 1.96*se_hat) - 1,
high = 2*(x_hat + 1.96*se_hat) - 1,
sample_size = N)
}) %>% mutate(poll = seq_along(Ns))
library(dslabs)
data(heights)
x <- heights %>% filter(sex == "Male") %>%
pull(height)
library(dslabs)
data(heights)
x <- heights %>% filter(sex == "Male") %>% pull(height)
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.00 67.00 69.00 69.31 72.00 82.68
sd(x)
## [1] 3.611024
mu <- mean(x)
mu
## [1] 69.31475
sig <- sd(x)
sig
## [1] 3.611024
N <- 50
X <- sample(x, N, replace = TRUE)
mean(X)
## [1] 68.74015
sd(X)
## [1] 2.901294
polls data have already been loaded for you. Use the head function to examine them.data(polls_us_election_2016)
polls <- polls_us_election_2016 %>%
filter(pollster %in% c("Rasmussen Reports/Pulse Opinion Research",
"The Times-Picayune/Lucid") &
enddate >= "2016-10-15" &
state == "U.S.") %>%
mutate(spread = rawpoll_clinton/100 - rawpoll_trump/100)
head(as.tibble(polls))
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## # A tibble: 6 x 16
## state startdate enddate pollster grade samplesize population
## <fct> <date> <date> <fct> <fct> <int> <chr>
## 1 U.S. 2016-11-05 2016-11-07 The Times-Picayune/Lu~ <NA> 2521 lv
## 2 U.S. 2016-11-02 2016-11-06 Rasmussen Reports/Pul~ C+ 1500 lv
## 3 U.S. 2016-11-01 2016-11-03 Rasmussen Reports/Pul~ C+ 1500 lv
## 4 U.S. 2016-11-04 2016-11-06 The Times-Picayune/Lu~ <NA> 2584 lv
## 5 U.S. 2016-10-31 2016-11-02 Rasmussen Reports/Pul~ C+ 1500 lv
## 6 U.S. 2016-11-03 2016-11-05 The Times-Picayune/Lu~ <NA> 2526 lv
## # ... with 9 more variables: rawpoll_clinton <dbl>, rawpoll_trump <dbl>,
## # rawpoll_johnson <dbl>, rawpoll_mcmullin <dbl>, adjpoll_clinton <dbl>,
## # adjpoll_trump <dbl>, adjpoll_johnson <dbl>, adjpoll_mcmullin <dbl>,
## # spread <dbl>
sigma that contains a column for pollster and a column for s, the standard deviation of the spreadsigma <- polls %>% group_by(pollster) %>%
summarise(s = sd(spread)) %>%
ungroup()
sigma
## # A tibble: 2 x 2
## pollster s
## <fct> <dbl>
## 1 Rasmussen Reports/Pulse Opinion Research 0.0177
## 2 The Times-Picayune/Lucid 0.0268
res that summarizes the average, standard deviation, and number of polls for the two pollsters.res <- polls %>% group_by(pollster) %>%
summarise(avg = mean(spread),
s = sd(spread),
N = n()) %>%
ungroup()
estimate. Print this value to the console.estimate <- abs(res$avg[1]-res$avg[2])
estimate
## [1] 0.05229167
se_hat. Print this value to the console.se_hat <- sqrt(res$s[2]^2/res$N[2] + res$s[1]^2/res$N[1])
se_hat
## [1] 0.007031433
ci.Q <- qnorm(0.975)
lower <- estimate - Q*se_hat
upper <- estimate + Q*se_hat
ci <- c(lower, upper)
ci
## [1] 0.03851031 0.06607302
res to summarize the average, standard deviation, and number of polls for the two pollsters.res <- polls %>% group_by(pollster) %>%
summarize(avg = mean(spread), s = sd(spread), N = n())
estimate and se_hat contain the spread estimates and standard error, respectively.estimate <- res$avg[2] - res$avg[1]
se_hat <- sqrt(res$s[2]^2/res$N[2] + res$s[1]^2/res$N[1])
2*(1-pnorm(estimate/se_hat))
## [1] 1.030287e-13
polls <- polls_us_election_2016 %>%
filter(enddate >= "2016-10-15" &
state == "U.S.") %>%
group_by(pollster) %>%
filter(n() >= 5) %>%
mutate(spread = rawpoll_clinton/100 - rawpoll_trump/100) %>%
ungroup()
var that contains columns for the pollster, mean spread, and standard deviation. Print the contents of this object to the console.var <- polls %>% group_by(pollster) %>%
summarize(avg = mean(spread), s = sd(spread))
head(var)
## # A tibble: 6 x 3
## pollster avg s
## <fct> <dbl> <dbl>
## 1 ABC News/Washington Post 0.0373 0.0339
## 2 CVOTER International 0.0279 0.0180
## 3 Google Consumer Surveys 0.0303 0.0185
## 4 Gravis Marketing 0.016 0.0152
## 5 IBD/TIPP 0.00105 0.0168
## 6 Ipsos 0.0553 0.0195
\[\begin{align*} \mathrm{P}(1st)& = probability\,of\, developing\, SIDS = 1/8500 \\ \mathrm{P}(2nd \mid 1st)& =\, probability\, of\, developing\, SIDS\, given\, another\, sibling\, has\, the\, condition\, = 1/100 \\ \mathrm{P}(2nd \mid 1st)& =\frac{\mathrm{P}(2nd \cap 1st)}{\mathrm{P}(1st)} \\ \Leftrightarrow\, \\ \mathrm{P}(2nd \cap 1st)& =\mathrm{P}(2nd \mid 1st)*\mathrm{P}(1st)=\mathrm{P}(2nd \cap 1st)=\frac{1}{8500}*\frac{1}{100}=\frac{1}{850000} \end{align*}\]
library(tidyverse)
library(dslabs)
data(polls_us_election_2016)
polls <- polls_us_election_2016 %>%
filter(state == "Florida" & enddate >= "2016-11-04" ) %>%
mutate(spread = rawpoll_clinton/100 - rawpoll_trump/100)
results <- polls %>%
summarize(avg = mean(spread), se = sd(spread)/sqrt(n()))
results
## avg se
## 1 0.004154545 0.007218692
# Load the libraries and poll data
library(dplyr)
library(dslabs)
data(polls_us_election_2016)
# Create an object `polls` that contains the spread of predictions for each candidate in Florida during the last polling days
polls <- polls_us_election_2016 %>%
filter(state == "Florida" & enddate >= "2016-11-04" ) %>%
mutate(spread = rawpoll_clinton/100 - rawpoll_trump/100)
# Examine the `polls` object using the `head` function
head(polls)
## state startdate enddate pollster grade
## 1 Florida 2016-11-03 2016-11-06 Quinnipiac University A-
## 2 Florida 2016-11-02 2016-11-04 YouGov B
## 3 Florida 2016-11-01 2016-11-07 SurveyMonkey C-
## 4 Florida 2016-11-06 2016-11-06 Trafalgar Group C
## 5 Florida 2016-11-05 2016-11-06 Opinion Savvy/InsiderAdvantage C-
## 6 Florida 2016-11-02 2016-11-06 Rasmussen Reports/Pulse Opinion Research C+
## samplesize population rawpoll_clinton rawpoll_trump rawpoll_johnson
## 1 884 lv 46.00 45.00 2.00
## 2 1188 rv 45.00 45.00 NA
## 3 4092 lv 47.00 45.00 4.00
## 4 1100 lv 46.13 49.72 2.43
## 5 843 lv 48.40 46.40 3.00
## 6 525 lv 47.00 45.00 2.00
## rawpoll_mcmullin adjpoll_clinton adjpoll_trump adjpoll_johnson
## 1 NA 46.44315 43.93999 2.098310
## 2 NA 47.07455 46.99468 NA
## 3 NA 45.59190 44.32744 1.692430
## 4 NA 45.75904 46.82230 3.495849
## 5 NA 47.37976 45.68637 2.721857
## 6 NA 47.55885 45.13673 2.418502
## adjpoll_mcmullin spread
## 1 NA 0.0100
## 2 NA 0.0000
## 3 NA 0.0200
## 4 NA -0.0359
## 5 NA 0.0200
## 6 NA 0.0200
# Create an object called `results` that has two columns containing the average spread (`avg`) and the standard error (`se`). Print the results to the console.
results <- polls %>%
summarize(avg = mean(spread), se = sd(spread)/sqrt(n()))
results
## avg se
## 1 0.004154545 0.007218692
# The results` object has already been loaded. Examine the values stored: `avg` and `se` of the spread
results
## avg se
## 1 0.004154545 0.007218692
# Define `mu` and `tau`
mu <- 0
tau <- 0.01
# Define a variable called `sigma` that contains the standard error in the object `results`
sigma <- results %>% pull(se[1])
# Define a variable called `Y` that contains the average in the object `results`
Y <- results %>% pull(avg[1])
# Define a variable `B` using `sigma` and `tau`. Print this value to the console.
B <- sigma^2/(sigma^2+tau^2)
# Calculate the expected value of the posterior distribution
mu+(1-B)*(Y-mu)
## [1] 0.002731286
# Here are the variables we have defined
mu <- 0
tau <- 0.01
sigma <- results$se
Y <- results$avg
B <- sigma^2 / (sigma^2 + tau^2)
# Compute the standard error of the posterior distribution. Print this value to the console.
sqrt(1/((1/sigma^2)+(1/tau^2)))
## [1] 0.005853024
# Here are the variables we have defined in previous exercises
mu <- 0
tau <- 0.01
sigma <- results$se
Y <- results$avg
B <- sigma^2 / (sigma^2 + tau^2)
se <- sqrt( 1/ (1/sigma^2 + 1/tau^2))
E <- mu+(1-B)*(Y-mu) # Expected value
# Construct the 95% credible interval. Save the lower and then the upper confidence interval to a variable called `ci`.
lower <- E-qnorm(.975)*se
upper <- E+qnorm(.975)*se
ci <- c(lower, upper)
ci
## [1] -0.008740432 0.014203003
# Assign the expected value of the posterior distribution to the variable `exp_value`
exp_value <- B*mu + (1-B)*Y
exp_value
## [1] 0.002731286
# Assign the standard error of the posterior distribution to the variable `se`
se <- sqrt( 1/ (1/sigma^2 + 1/tau^2))
# Using the `pnorm` function, calculate the probability that the actual spread was less than 0 (in Trump's favor). Print this value to the console.
pnorm(0,exp_value, se)
## [1] 0.3203769
# Define the variables from previous exercises
mu <- 0
sigma <- results$se
Y <- results$avg
# Define a variable `taus` as different values of tau
taus <- seq(0.005, 0.05, len = 100)
# Create a function called `p_calc` that generates `B` and calculates the probability of the spread being less than 0
p_calc <- function(tau){
B <- sigma^2/(sigma^2+tau^2)
E <- B*mu + (1-B)*Y
se <- sqrt( 1/ (1/sigma^2 + 1/tau^2))
pnorm(0, E, se)
}
# Create a vector called `ps` by applying the function `p_calc` across values in `taus`
ps <- p_calc(taus)
# Plot `taus` on the x-axis and `ps` on the y-axis
chances_df <- data.frame(ps, taus)
ggplot(data = chances_df, mapping = aes(x = taus, y = ps)) +
geom_point(colour = "blue") +
ylab("ps") +
xlab("tau") +
ggtitle("Chances of winning in FL - US Presidential Elections 2016",
subtitle = "estimative based on a vector of values of tau")
# Load the libraries and data
library(dplyr)
library(dslabs)
data("polls_us_election_2016")
# Create a table called `polls` that filters by state, date, and reports the spread
polls <- polls_us_election_2016 %>%
filter(state != "U.S." & enddate >= "2016-10-31") %>%
mutate(spread = rawpoll_clinton/100 - rawpoll_trump/100)
# Create an object called `cis` that has the columns indicated in the instructions
cis <- polls %>% mutate(X_hat = (spread+1)/2,
se = 2*sqrt(X_hat*(1-X_hat)/samplesize),
lower = spread - qnorm(0.975)*se,
upper = spread + qnorm(0.975)*se) %>%
select(state, startdate, enddate, pollster, grade, spread, lower, upper)
# Add the actual results to the `cis` data set
add <- results_us_election_2016 %>% mutate(actual_spread = clinton/100 - trump/100) %>% select(state, actual_spread)
ci_data <- cis %>% mutate(state = as.character(state)) %>% left_join(add, by = "state")
# Create an object called `p_hits` that summarizes the proportion of confidence intervals that contain the actual value. Print this object to the console.
p_hits <- ci_data %>%
mutate(hit = c(actual_spread >= lower & actual_spread <= upper)) %>%
summarize(mean(hit == TRUE))
# The `cis` data have already been loaded for you
add <- results_us_election_2016 %>% mutate(actual_spread = clinton/100 - trump/100) %>% select(state, actual_spread)
ci_data <- cis %>% mutate(state = as.character(state)) %>% left_join(add, by = "state")
# Create an object called `p_hits` that summarizes the proportion of hits for each pollster that has at least 5 polls.
p_hits <- ci_data %>%
mutate(hit = actual_spread >= lower & actual_spread <= upper) %>%
group_by(pollster) %>%
filter(n() >= 5) %>%
summarize(proportion_hits = mean(hit), n = n(), grade = grade[1]) %>%
arrange(desc(proportion_hits))
# The `cis` data have already been loaded for you
add <- results_us_election_2016 %>% mutate(actual_spread = clinton/100 - trump/100) %>% select(state, actual_spread)
ci_data <- cis %>% mutate(state = as.character(state)) %>% left_join(add, by = "state")
# Create an object called `p_hits` that summarizes the proportion of hits for each state that has more than 5 polls.
p_hits <- ci_data %>%
mutate(hit = actual_spread >= lower & actual_spread <= upper) %>%
group_by(state) %>%
filter(n() > 5) %>%
summarize(proportion_hits = mean(hit), n = n()) %>%
arrange(desc(proportion_hits))
library(ggplot2)
# The `p_hits` data have already been loaded for you. Use the `head` function to examine it.
head(p_hits)
## # A tibble: 6 x 3
## state proportion_hits n
## <chr> <dbl> <int>
## 1 Connecticut 1 13
## 2 Delaware 1 12
## 3 Rhode Island 1 10
## 4 New Mexico 0.941 17
## 5 Washington 0.933 15
## 6 Oregon 0.929 14
# Make a barplot of the proportion of hits for each state
p_hits %>%
ggplot(aes(x= reorder(state, +proportion_hits), y= proportion_hits)) +
geom_bar(stat = "identity", width = 0.80, position = position_dodge(0.9), fill= "lightblue") +
ylab("Proportion of polls within the CI") +
xlab("State") +
theme(axis.text=element_text(size=7), aspect.ratio=16/9) +
coord_flip()
# The `cis` data have already been loaded. Examine it using the `head` function.
cis <- cis %>% mutate(state = as.character(state)) %>%
left_join(add, by = "state")
head(cis)
## state startdate enddate pollster grade spread
## 1 New Mexico 2016-11-06 2016-11-06 Zia Poll <NA> 0.02
## 2 Virginia 2016-11-03 2016-11-04 Public Policy Polling B+ 0.05
## 3 Iowa 2016-11-01 2016-11-04 Selzer & Company A+ -0.07
## 4 Wisconsin 2016-10-26 2016-10-31 Marquette University A 0.06
## 5 North Carolina 2016-11-04 2016-11-06 Siena College A 0.00
## 6 Georgia 2016-11-06 2016-11-06 Landmark Communications B -0.03
## lower upper actual_spread
## 1 -0.001331221 0.0413312213 0.083
## 2 -0.005634504 0.1056345040 0.054
## 3 -0.139125210 -0.0008747905 -0.094
## 4 0.004774064 0.1152259363 -0.007
## 5 -0.069295191 0.0692951912 -0.036
## 6 -0.086553820 0.0265538203 -0.051
# Create an object called `errors` that calculates the difference between the predicted and actual spread and indicates if the correct winner was predicted
errors <- cis %>% mutate(error = spread - actual_spread,
hit = sign(spread) == sign(actual_spread))
# Examine the last 6 rows of `errors`
tail(errors)
## state startdate enddate pollster grade spread
## 807 Utah 2016-10-04 2016-11-06 YouGov B -0.0910
## 808 Utah 2016-10-25 2016-10-31 Google Consumer Surveys B -0.0121
## 809 South Dakota 2016-10-28 2016-11-02 Ipsos A- -0.1875
## 810 Washington 2016-10-21 2016-11-02 Ipsos A- 0.0838
## 811 Utah 2016-11-01 2016-11-07 Google Consumer Surveys B -0.1372
## 812 Oregon 2016-10-21 2016-11-02 Ipsos A- 0.0905
## lower upper actual_spread error hit
## 807 -0.1660704570 -0.01592954 -0.180 0.0890 TRUE
## 808 -0.1373083389 0.11310834 -0.180 0.1679 TRUE
## 809 -0.3351563485 -0.03984365 -0.298 0.1105 TRUE
## 810 -0.0004028265 0.16800283 0.162 -0.0782 TRUE
## 811 -0.2519991224 -0.02240088 -0.180 0.0428 TRUE
## 812 -0.0019261469 0.18292615 0.110 -0.0195 TRUE
# Create an object called `errors` that calculates the difference between the predicted and actual spread and indicates if the correct winner was predicted
errors <- cis %>% mutate(error = spread - actual_spread, hit = sign(spread) == sign(actual_spread))
# Create an object called `p_hits` that summarizes the proportion of hits for each state that has 5 or more polls
p_hits <- errors %>% group_by(state) %>%
filter(state >= 5) %>%
summarize(proportion_hits = mean(hit), n = n())
# Make a barplot of the proportion of hits for each state
p_hits %>%
ggplot(aes(x= reorder(state,+proportion_hits), y=proportion_hits))+
geom_bar(stat = "identity", width = 0.80,
position = position_dodge(0.9),fill= "lightblue")+
ylab("Proportion of polls within the CI") +
xlab("State") +
theme(axis.text=element_text(size=7),
aspect.ratio=16/9) +
coord_flip()
# The `errors` data have already been loaded. Examine them using the `head` function.
head(errors)
## state startdate enddate pollster grade spread
## 1 New Mexico 2016-11-06 2016-11-06 Zia Poll <NA> 0.02
## 2 Virginia 2016-11-03 2016-11-04 Public Policy Polling B+ 0.05
## 3 Iowa 2016-11-01 2016-11-04 Selzer & Company A+ -0.07
## 4 Wisconsin 2016-10-26 2016-10-31 Marquette University A 0.06
## 5 North Carolina 2016-11-04 2016-11-06 Siena College A 0.00
## 6 Georgia 2016-11-06 2016-11-06 Landmark Communications B -0.03
## lower upper actual_spread error hit
## 1 -0.001331221 0.0413312213 0.083 -0.063 TRUE
## 2 -0.005634504 0.1056345040 0.054 -0.004 TRUE
## 3 -0.139125210 -0.0008747905 -0.094 0.024 TRUE
## 4 0.004774064 0.1152259363 -0.007 0.067 FALSE
## 5 -0.069295191 0.0692951912 -0.036 0.036 FALSE
## 6 -0.086553820 0.0265538203 -0.051 0.021 TRUE
# Generate a histogram of the error
hist(errors$error)
# Calculate the median of the errors. Print this value to the console.
median(errors$error)
## [1] 0.037
# The `errors` data have already been loaded. Examine them using the `head` function.
head(errors)
## state startdate enddate pollster grade spread
## 1 New Mexico 2016-11-06 2016-11-06 Zia Poll <NA> 0.02
## 2 Virginia 2016-11-03 2016-11-04 Public Policy Polling B+ 0.05
## 3 Iowa 2016-11-01 2016-11-04 Selzer & Company A+ -0.07
## 4 Wisconsin 2016-10-26 2016-10-31 Marquette University A 0.06
## 5 North Carolina 2016-11-04 2016-11-06 Siena College A 0.00
## 6 Georgia 2016-11-06 2016-11-06 Landmark Communications B -0.03
## lower upper actual_spread error hit
## 1 -0.001331221 0.0413312213 0.083 -0.063 TRUE
## 2 -0.005634504 0.1056345040 0.054 -0.004 TRUE
## 3 -0.139125210 -0.0008747905 -0.094 0.024 TRUE
## 4 0.004774064 0.1152259363 -0.007 0.067 FALSE
## 5 -0.069295191 0.0692951912 -0.036 0.036 FALSE
## 6 -0.086553820 0.0265538203 -0.051 0.021 TRUE
# Create a boxplot showing the errors by state for polls with grades B+ or higher
errors %>% filter(grade %in% c("A+", "A", "A-", "B+") | is.na(grade)) %>%
mutate(state = reorder(state, error)) %>%
ggplot(aes(x = state, y = error, fill = state)) +
geom_boxplot()+
geom_point() +
theme(axis.text.x = element_text(size = 7, angle = 90, vjust=.5, hjust=1), legend.position = "none")
# The `errors` data have already been loaded. Examine them using the `head` function.
head(errors)
## state startdate enddate pollster grade spread
## 1 New Mexico 2016-11-06 2016-11-06 Zia Poll <NA> 0.02
## 2 Virginia 2016-11-03 2016-11-04 Public Policy Polling B+ 0.05
## 3 Iowa 2016-11-01 2016-11-04 Selzer & Company A+ -0.07
## 4 Wisconsin 2016-10-26 2016-10-31 Marquette University A 0.06
## 5 North Carolina 2016-11-04 2016-11-06 Siena College A 0.00
## 6 Georgia 2016-11-06 2016-11-06 Landmark Communications B -0.03
## lower upper actual_spread error hit
## 1 -0.001331221 0.0413312213 0.083 -0.063 TRUE
## 2 -0.005634504 0.1056345040 0.054 -0.004 TRUE
## 3 -0.139125210 -0.0008747905 -0.094 0.024 TRUE
## 4 0.004774064 0.1152259363 -0.007 0.067 FALSE
## 5 -0.069295191 0.0692951912 -0.036 0.036 FALSE
## 6 -0.086553820 0.0265538203 -0.051 0.021 TRUE
# Create a boxplot showing the errors by state for states with at least 5 polls with grades B+ or higher.
errors %>% filter(grade %in% c("A+", "A", "A-", "B+") | is.na(grade)) %>%
group_by(state) %>%
filter(state >= 5) %>%
ungroup %>%
mutate(state = reorder(state, error)) %>%
ggplot(aes(x = state, y = error, fill = state)) +
geom_boxplot()+
geom_point() +
theme(axis.text.x = element_text(size = 7, angle = 90, vjust=.5, hjust=1), legend.position = "none")
# Calculate the probability of seeing t-distributed random variables being more than 2 in absolute value when 'df = 3'.
1-pt(2,3)+pt(-2,3)
## [1] 0.139326
# Generate a vector 'df' that contains a sequence of numbers from 3 to 50
df <- c(3:50)
# Make a function called 'pt_func' that calculates the probability that a value is more than |2| for any degrees of freedom
pt_func <- function(n) {
1-pt(2,n)+pt(-2,n)
}
# Generate a vector 'probs' that uses the `pt_func` function to calculate the probabilities
probs <- sapply(df, pt_func)
# Plot 'df' on the x-axis and 'probs' on the y-axis
plot(df, probs)
# Load the necessary libraries and data
library(dslabs)
library(dplyr)
data(heights)
# Use the sample code to generate 'x', a vector of male heights
x <- heights %>% filter(sex == "Male") %>%
.$height
# Create variables for the mean height 'mu', the sample size 'N', and the number of times the simulation should run 'B'
mu <- mean(x)
N <- 15
B <- 10000
se <- sd(x)/sqrt(N)
# Use the `set.seed` function to make sure your answer matches the expected result after random sampling
set.seed(1)
# Generate a logical vector 'res' that contains the results of the simulations
res <- replicate(B, {
X <- sample(x, N, replace = TRUE)
interval <- mean(X) + c(-1,1)*qnorm(0.975)*sd(X)/sqrt(N)
between(mu, interval[1], interval[2])
})
# Calculate the proportion of times the simulation produced values within the 95% confidence interval. Print this value to the console.
mean(res)
## [1] 0.9331
# The vector of filtered heights 'x' has already been loaded for you. Calculate the mean.
mu <- mean(x)
# Use the same sampling parameters as in the previous exercise.
set.seed(1)
N <- 15
B <- 10000
# Generate a logical vector 'res' that contains the results of the simulations using the t-distribution
res <- replicate(B, {
t <- sample(x, N, replace = TRUE)
interval <- mean(t) + c(-1,1)*qt(0.975, N-1)*sd(t)/sqrt(N)
between(mu, interval[1], interval[2])
})
# Calculate the proportion of times the simulation produced values within the 95% confidence interval. Print this value to the console.
mean(res)
## [1] 0.9512