portais = read_csv(here::here("data/requests-portais.csv"), col_types = 'cd')
glimpse(portais)
Rows: 120
Columns: 2
$ site <chr> "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1", "g1"…
$ time <dbl> 1.600459, 1.471800, 1.398192, 1.380360, 1.386828, 1.529101, 1.393405, 1.357071, 1.402241, 1.421621, 1.5732…
boxplot(time~site,
data=portais,
main="Diferentes boxplot para cada tempo de acesso ao site",
xlab="SITES",
ylab="tempo de resposta",
col="orange",
border="brown"
)
portais %>%
ggplot(aes(x=time, y=site, fill = ..x..)) +
geom_density_ridges_gradient(scale = 1, rel_min_height = 0.01) +
scale_fill_viridis(name = "Time", option = "C") +
labs(
title = "Gráfico de densidade dos tempos de requisição",
y = "site",
x = ""
)
Picking joint bandwidth of 0.0143
Média dos tempos de respostas dos dados que tivemos.
s <- function(d, i) {
sumarizado = d[i,] %>%
summarise(saida = mean(time))
sumarizado %>%
pull(saida)
}
booted <- boot(data = filter(portais, site > 0),
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "basic",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.7126767
$ bias <dbl> 0.0004759096
$ std.error <dbl> 0.04028966
$ conf.low <dbl> 0.6297315
$ conf.high <dbl> 0.7880092
abusaram = nrow(filter(portais, time > 0))
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "media do tempo"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0, 1)) +
labs(
title = "ic p media de requisiçoes em
todos os sites",
x = "", y = "proporção da media de tempo") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='uol') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.3556272
$ bias <dbl> 0.0001132402
$ std.error <dbl> 0.003764978
$ conf.low <dbl> 0.3511333
$ conf.high <dbl> 0.3711073
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "uol"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0.3, 0.4)) +
labs(
title = "bootstrap de uol",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='g1') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 1.443589
$ bias <dbl> -0.0003648705
$ std.error <dbl> 0.01412669
$ conf.low <dbl> 1.41999
$ conf.high <dbl> 1.479095
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "g1"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(1.4,1.5)) +
labs(
title = "bootstrap de g1",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='folha') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.3791591
$ bias <dbl> 0.0001237104
$ std.error <dbl> 0.006366766
$ conf.low <dbl> 0.3715093
$ conf.high <dbl> 0.4046983
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "folha"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0.3, 0.5)) +
labs(
title = "bootstrap da folha",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='terra') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.6723314
$ bias <dbl> 0.0002129121
$ std.error <dbl> 0.009258262
$ conf.low <dbl> 0.6594893
$ conf.high <dbl> 0.699436
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "terra"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0.63, 0.71)) +
labs(
title = "bootstrap do terra",
x = "", y = "") +
coord_flip()
A partir daqui iremos ver a diferença entre os mais rapidos e mais lentos
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='folha') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
b = d[i,] %>%
filter(time > 0, site=='uol') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
a-b
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.02353193
$ bias <dbl> -8.626706e-05
$ std.error <dbl> 0.007263431
$ conf.low <dbl> 0.01258833
$ conf.high <dbl> 0.0447813
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "folha -uol "
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0, .2)) +
labs(
title = "bootstrap de folha-uol ",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='g1') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
b = d[i,] %>%
filter(time > 0, site=='terra') %>%
summarise(do_grupo = mean(time)) %>%
pull(do_grupo)
a-b
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 5
$ statistic <dbl> 0.7712579
$ bias <dbl> -0.0008769099
$ std.error <dbl> 0.01678933
$ conf.low <dbl> 0.7391313
$ conf.high <dbl> 0.8057938
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "g1- terra"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0,1)) +
labs(
title = "diferença entre g1- terra ",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='g1') %>%
summarise(do_grupo = quantile(time, 0.75)) %>%
pull(do_grupo)
b = d[i,] %>%
filter(time > 0, site=='terra') %>%
summarise(do_grupo = quantile(time, 0.75)) %>%
pull(do_grupo)
a - b
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 6
$ term <chr> "75%"
$ statistic <dbl> 0.8024518
$ bias <dbl> -0.001828
$ std.error <dbl> 0.02717992
$ conf.low <dbl> 0.734869
$ conf.high <dbl> 0.8488607
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "g1- terra"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0,1)) +
labs(
title = "diferença entre g1- terra usando 75
percentil ",
x = "", y = "") +
coord_flip()
s <- function(d, i) {
a = d[i,] %>%
filter(time > 0, site=='folha') %>%
summarise(do_grupo = quantile(time, 0.75)) %>%
pull(do_grupo)
b = d[i,] %>%
filter(time > 0, site=='uol') %>%
summarise(do_grupo = quantile(time, 0.75)) %>%
pull(do_grupo)
a - b
}
booted <- boot(data = portais,
statistic = s,
R = 2000)
estimado = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(estimado)
Rows: 1
Columns: 6
$ term <chr> "75%"
$ statistic <dbl> 0.019703
$ bias <dbl> 0.0009260522
$ std.error <dbl> 0.003659925
$ conf.low <dbl> 0.01359475
$ conf.high <dbl> 0.02665727
estimado %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "folha - uol"
)) +
geom_linerange() +
geom_point(color = "steelblue", size = 2) +
geom_text(
aes(
y = conf.high,
label = str_glue("[{round(conf.low, 2)}, {round(conf.high, 2)}]")
),
size = 3,
nudge_x = -.05,
show.legend = F
) +
scale_y_continuous(limits = c(0, .2)) +
labs(
title = "diferença entre folha - uol usando 75
percentil ",
x = "", y = "") +
coord_flip()