library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 3.0.0 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /cloud/project
library(DT)
Os dados
set.seed(12345)
skoob = read_csv("skoob.csv")
## Parsed with column specification:
## cols(
## tempo = col_double()
## )
skoob = skoob %>%
sample_n(70) %>%
select(tempo)
glimpse(skoob)
## Rows: 70
## Columns: 1
## $ tempo <dbl> 0.432630, 0.446415, 0.449505, 0.492261, 0.462408, 0.480776, 0.4…
Informações
theta_c = skoob %>%
summarise(media = mean(tempo)) %>%
pull(media)
theta_c
## [1] 0.5129356
repeticoes = 4000
um_bootstrap <- function(x){
tempo = x %>% pull(tempo)
boot_x <- sample(tempo,
size = NROW(tempo),
replace = TRUE)
return(mean(boot_x))
}
set.seed(1212)
# A REAMOSTRAGEM
reamostragens = tibble(i = 1:repeticoes) %>%
mutate(theta_c_s = map_dbl(i, ~ um_bootstrap(skoob)))
sketch = htmltools::withTags(table(
tableHeader(skoob),
tableFooter(skoob)
))
datatable(head(reamostragens, 4000), colnames = c('ID' = 1, 'Tempo (ms)' = 2), options = list(pageLength = 10, dom = 'tip'), rownames = FALSE)
reamostragens %>%
ggplot(aes(x = theta_c_s)) +
geom_histogram(binwidth = 0.01,
colour = "darkorange",
fill = "white")

reamostragens %>%
ggplot(aes(x = theta_c_s - theta_c)) +
geom_histogram(binwidth = 0.01,
colour = "darkblue",
fill = "white")

confianca = .95
alpha = 1 - confianca
intervalo = reamostragens %>%
mutate(erro = theta_c_s - theta_c) %>%
summarise(erro_i = quantile(erro, alpha / 2),
erro_s = quantile(erro, 1 - alpha /2)) %>%
mutate(valor_i = theta_c + erro_i,
valor_s = theta_c + erro_s)
datatable(head(intervalo, 4000), colnames = c('erro_i' = 1, 'erro_s' = 2, 'valor_i' = 3, 'valor_s' = 4), options = list(pageLength = 10, dom = 'tip'), rownames = FALSE)
- Visualizando graficamente
ggplot() +
geom_rect(
data = intervalo,
aes(xmin = valor_i, xmax = valor_s),
ymin = -Inf,
ymax = Inf,
fill = "gold",
alpha = .25
) +
geom_histogram(
data = reamostragens,
aes(theta_c_s),
binwidth = .01,
fill = "white",
colour = "darkgrey"
) +
geom_vline(xintercept = theta_c,
color = "blue",
size = 1.2) +
geom_vline(xintercept = theta_c, color = "dark green") +
labs(title = expression("Intervalo estimado via bootstrap"))
