library(pwr)
library(tidyverse)
library(effectsize)
library(readxl)
library(janitor)
library(WebPower)
library(plotly)
set.seed(2022)
pob1 <- rnorm(n = 1000, mean = 123.4, sd = 15.4)
pob2 <- rnorm(n = 1000, mean = 133.4, sd = 10.5)
set.seed(2022)
muestra1 <- sample(x = pob1, size = 50, replace = TRUE)
muestra2 <- sample(x = pob2, size = 50, replace = TRUE)
prueba_t <-
t.test(
x = muestra1,
y = muestra2,
alternative = "two.sided",
conf.level = 0.95,
var.equal = FALSE
)
prueba_t
##
## Welch Two Sample t-test
##
## data: muestra1 and muestra2
## t = -4.0277, df = 82.989, p-value = 0.0001241
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.687210 -4.976746
## sample estimates:
## mean of x mean of y
## 123.3881 133.2201
n
: nĆŗmero de observaciones por
muestrad
: tamaño del efecto (¿?)sig.level
: nivel de significancia
\(\alpha\)power
: potencia de la pruebatype
: tipo de prueba: una muestra, dos
muestras independientes o dos muestras pareadasalternative
: tipo de hipótesis
alternativa\[d = \frac{\mu_1 - \mu_2}{S_{agrupado}}\]
media_muestra1 <- mean(muestra1)
media_muestra2 <- mean(muestra2)
desviacion_agrupada <- sd(c(muestra1, muestra2))
tamano_efecto <- (media_muestra1 - media_muestra2) / desviacion_agrupada
tamano_efecto
## [1] -0.7499455
cohens_d(prueba_t)
pwr.t.test(
n = 50,
d = -0.81 ,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
)
##
## Two-sample t test power calculation
##
## n = 50
## d = 0.81
## sig.level = 0.05
## power = 0.9798198
## alternative = two.sided
##
## NOTE: n is number in *each* group
pwr.t.test(
d = -0.81 ,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided",
power = 0.8
)
##
## Two-sample t test power calculation
##
## n = 24.9236
## d = 0.81
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
experimento <- read_excel("Simpson-Calidad-Leche.xls") %>%
clean_names() %>%
mutate(inv_simpson = as.numeric(inv_simpson))
prueba_t_experimento <-
t.test(
experimento$inv_simpson ~ experimento$alimentation
)
prueba_t_experimento
##
## Welch Two Sample t-test
##
## data: experimento$inv_simpson by experimento$alimentation
## t = -1.844, df = 58.138, p-value = 0.07029
## alternative hypothesis: true difference in means between group traditional and group unifeed is not equal to 0
## 95 percent confidence interval:
## -31.206216 1.279258
## sample estimates:
## mean in group traditional mean in group unifeed
## 53.69034 68.65382
cohens_d(prueba_t_experimento)
experimento %>%
count(alimentation)
pwr.t.test(
n = 29,
d = -0.47,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
)
##
## Two-sample t test power calculation
##
## n = 29
## d = 0.47
## sig.level = 0.05
## power = 0.4204649
## alternative = two.sided
##
## NOTE: n is number in *each* group
wp.t(
n1 = 29,
n2 = 34,
d = -0.47,
type = "two.sample.2n",
alpha = 0.05,
alternative = "two.sided"
)
## Unbalanced two-sample t-test
##
## n1 n2 d alpha power
## 29 34 0.47 0.05 0.4484033
##
## NOTE: n1 and n2 are number in *each* group
## URL: http://psychstat.org/ttest2n
resultado <-
wp.t(
n1 = as.integer(seq(5, 100, length.out = 50)),
n2 = as.integer(seq(10, 105, length.out = 50)),
d = -0.47,
type = "two.sample.2n",
alpha = 0.05,
alternative = "two.sided"
)
resultado
## Unbalanced two-sample t-test
##
## n1 n2 d alpha power
## 5 10 0.47 0.05 0.1251726
## 6 11 0.47 0.05 0.1397360
## 8 13 0.47 0.05 0.1685292
## 10 15 0.47 0.05 0.1969916
## 12 17 0.47 0.05 0.2251659
## 14 19 0.47 0.05 0.2530407
## 16 21 0.47 0.05 0.2805809
## 18 23 0.47 0.05 0.3077420
## 20 25 0.47 0.05 0.3344775
## 22 27 0.47 0.05 0.3607424
## 24 29 0.47 0.05 0.3864952
## 26 31 0.47 0.05 0.4116985
## 28 33 0.47 0.05 0.4363198
## 30 35 0.47 0.05 0.4603313
## 32 37 0.47 0.05 0.4837100
## 34 39 0.47 0.05 0.5064371
## 36 41 0.47 0.05 0.5284983
## 37 42 0.47 0.05 0.5392757
## 39 44 0.47 0.05 0.5603193
## 41 46 0.47 0.05 0.5806772
## 43 48 0.47 0.05 0.6003473
## 45 50 0.47 0.05 0.6193303
## 47 52 0.47 0.05 0.6376296
## 49 54 0.47 0.05 0.6552506
## 51 56 0.47 0.05 0.6722009
## 53 58 0.47 0.05 0.6884899
## 55 60 0.47 0.05 0.7041286
## 57 62 0.47 0.05 0.7191291
## 59 64 0.47 0.05 0.7335049
## 61 66 0.47 0.05 0.7472704
## 63 68 0.47 0.05 0.7604408
## 65 70 0.47 0.05 0.7730320
## 67 72 0.47 0.05 0.7850603
## 68 73 0.47 0.05 0.7908687
## 70 75 0.47 0.05 0.8020843
## 72 77 0.47 0.05 0.8127796
## 74 79 0.47 0.05 0.8229719
## 76 81 0.47 0.05 0.8326785
## 78 83 0.47 0.05 0.8419167
## 80 85 0.47 0.05 0.8507039
## 82 87 0.47 0.05 0.8590572
## 84 89 0.47 0.05 0.8669934
## 86 91 0.47 0.05 0.8745291
## 88 93 0.47 0.05 0.8816808
## 90 95 0.47 0.05 0.8884644
## 92 97 0.47 0.05 0.8948956
## 94 99 0.47 0.05 0.9009897
## 96 101 0.47 0.05 0.9067616
## 98 103 0.47 0.05 0.9122257
## 100 105 0.47 0.05 0.9173961
##
## NOTE: n1 and n2 are number in *each* group
## URL: http://psychstat.org/ttest2n
df_resultado <-
data.frame(n1 = resultado$n1,
n2 = resultado$n2,
potencia = resultado$power)
df_resultado %>%
ggplot(aes(x = n1, y = potencia)) +
geom_line() +
geom_hline(yintercept = 0.8, color = "red") +
geom_vline(xintercept = 70, color = "blue")
df_resultado %>%
ggplot(aes(x = n2, y = potencia)) +
geom_line() +
geom_hline(yintercept = 0.8, color = "red") +
geom_vline(xintercept = 75, color = "blue")
df_resultado %>%
plot_ly(
x = ~ n1,
y = ~ n2,
z = ~ potencia
) %>%
add_markers() %>%
layout(width = 800)
creditos <- read_rds("creditos_colombia_genero.Rds")
set.seed(2022)
muestra_1prop <- sample(creditos$genero, size = 50)
table(muestra_1prop)
## muestra_1prop
## H M
## 34 16
\[H_0: p = 0.5 \\ H_1: p \neq 0.5\]
prueba_1prop <-
prop.test(
x = 16,
n = 50,
p = 0.5,
conf.level = 0.95,
alternative = "two.sided"
)
prueba_1prop
##
## 1-sample proportions test with continuity correction
##
## data: 16 out of 50, null probability 0.5
## X-squared = 5.78, df = 1, p-value = 0.01621
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.1992781 0.4683118
## sample estimates:
## p
## 0.32
\[h = (2 \times \arcsin{\sqrt{p1}}) - (2 \times \arcsin{\sqrt{p2}})\]
(2 * asin(sqrt(16/50))) - (2 * asin(sqrt(0.5)))
## [1] -0.3682679
ES.h(p1 = 16/50, p2 = 0.5)
## [1] -0.3682679
pwr.p.test(
h = -0.3682679,
n = 50,
sig.level = 0.05,
alternative = "two.sided"
)
##
## proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3682679
## n = 50
## sig.level = 0.05
## power = 0.7402418
## alternative = two.sided
pwr.p.test(
h = -0.3682679,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
)
##
## proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3682679
## n = 57.87338
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
pwr.p.test(
h = -0.3682679,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
) %>%
plot()
\[H_0: p_h - p_m = 0 \\ H_1: p_h - p_m \neq 0 \]
creditos %>%
pull(genero) %>%
table() %>%
prop.table()
## .
## H M
## 0.618123 0.381877
0.618123 - 0.381877
## [1] 0.236246
set.seed(2022)
muestra_mujeres <- sample(creditos$genero, size = 50)
set.seed(2021)
muestra_hombres <- sample(creditos$genero, size = 50)
muestra_mujeres %>% table()
## .
## H M
## 34 16
muestra_hombres %>% table()
## .
## H M
## 20 30
prop.test(
x = c(34, 30),
n = c(50, 50),
alternative = "two.sided",
conf.level = 0.95
)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(34, 30) out of c(50, 50)
## X-squared = 0.39062, df = 1, p-value = 0.532
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.1275021 0.2875021
## sample estimates:
## prop 1 prop 2
## 0.68 0.60
ES.h(p1 = 34/50, p2 = 30/50)
## [1] 0.16691
pwr.2p.test(
h = 0.16691,
n = 50,
sig.level = 0.05,
alternative = "two.sided"
)
##
## Difference of proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.16691
## n = 50
## sig.level = 0.05
## power = 0.1328057
## alternative = two.sided
##
## NOTE: same sample sizes
pwr.2p.test(
h = 0.16691,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
) %>%
plot()
set.seed(2022)
muestra_mujeres <- sample(creditos$genero, size = 28)
set.seed(2021)
muestra_hombres <- sample(creditos$genero, size = 47)
muestra_mujeres %>% table()
## .
## H M
## 22 6
muestra_hombres %>% table()
## .
## H M
## 18 29
prop.test(
x = c(22, 29),
n = c(28, 47),
alternative = "two.sided",
conf.level = 0.95
)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(22, 29) out of c(28, 47)
## X-squared = 1.585, df = 1, p-value = 0.208
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.06574706 0.40313308
## sample estimates:
## prop 1 prop 2
## 0.7857143 0.6170213
ES.h(p1 = 22/28, p2 = 29/47)
## [1] 0.3720119
pwr.2p2n.test(
h = 0.3720119,
n1 = 28,
n2 = 47,
sig.level = 0.05,
alternative = "two.sided"
)
##
## difference of proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3720119
## n1 = 28
## n2 = 47
## sig.level = 0.05
## power = 0.3441869
## alternative = two.sided
##
## NOTE: different sample sizes
pwr.2p2n.test(
h = 0.3720119,
n1 = 60,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
) %>%
plot()
\[H_0: \mu = 10.24264 \\ H_1: \mu \neq 10.24264\]
guinea <- read_rds("pasto_guinea.Rds")
guinea %>%
pull(proteina) %>%
mean()
## [1] 10.24264
set.seed(2022)
muestra_1media <- sample(guinea$proteina, size = 971)
prueba_1media <-
t.test(
x = muestra_1media,
mu = 10.24264,
alternative = "two.sided",
conf.level = 0.95
)
prueba_1media
##
## One Sample t-test
##
## data: muestra_1media
## t = -0.076248, df = 970, p-value = 0.9392
## alternative hypothesis: true mean is not equal to 10.24264
## 95 percent confidence interval:
## 10.09661 10.37775
## sample estimates:
## mean of x
## 10.23718
cohens_d(prueba_1media)
pwr.t.test(
n = 50,
d = 0.09,
sig.level = 0.05,
type = "one.sample",
alternative = "two.sided"
)
##
## One-sample t test power calculation
##
## n = 50
## d = 0.09
## sig.level = 0.05
## power = 0.09566464
## alternative = two.sided
pwr.t.test(
power = 0.8,
d = 0.09,
sig.level = 0.05,
type = "one.sample",
alternative = "two.sided"
) %>%
plot()
\[H_0: \mu_1=\mu_2=\mu_3 \\ H_1: \mu_i \neq \mu_j\]
cohen.ES(test = "anov", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = anov
## size = medium
## effect.size = 0.25
pwr.anova.test(
k = 3,
f = 0.25,
sig.level = 0.05,
power = 0.8
)
##
## Balanced one-way analysis of variance power calculation
##
## k = 3
## n = 52.3966
## f = 0.25
## sig.level = 0.05
## power = 0.8
##
## NOTE: n is number in each group
pwr.anova.test(
k = 3,
f = 0.25,
sig.level = 0.05,
power = 0.8
) %>%
plot()
\[H_0: \rho = 0 \\H_1: \rho \neq 0\]
cohen.ES(test = "r", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = r
## size = medium
## effect.size = 0.3
pwr.r.test(
r = 0.3,
sig.level = 0.05,
power = 0.9,
alternative = "two.sided"
)
##
## approximate correlation power calculation (arctangh transformation)
##
## n = 111.8068
## r = 0.3
## sig.level = 0.05
## power = 0.9
## alternative = two.sided
pwr.r.test(
r = 0.3,
sig.level = 0.05,
power = 0.9,
alternative = "two.sided"
) %>%
plot()
cohen.ES(test = "f2", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = f2
## size = medium
## effect.size = 0.15
pwr.f2.test(
u = 1, # Coeficientes - 1 (intercepto) = nĆŗmero de predictores
f2 = 0.15, # TamaƱo de efecto
sig.level = 0.05,
power = 0.8
)
##
## Multiple regression power calculation
##
## u = 1
## v = 52.315
## f2 = 0.15
## sig.level = 0.05
## power = 0.8
\[n = v + u + 1\]
52 + 1 + 1
## [1] 54
cohen.ES(test = "f2", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = f2
## size = medium
## effect.size = 0.15
pwr.f2.test(
u = 3, # Coeficientes - 1 (intercepto) = nĆŗmero de predictores
f2 = 0.15, # TamaƱo de efecto
sig.level = 0.05,
power = 0.8
)
##
## Multiple regression power calculation
##
## u = 3
## v = 72.70583
## f2 = 0.15
## sig.level = 0.05
## power = 0.8
\[n = v + u + 1\]
73 + 3 + 1
## [1] 77
\[Y = RebaƱo + Grasa + Dieta\]
datos_modelos <- experimento %>%
mutate(
fat_g_100_ml = as.numeric(fat_g_100_ml)
)
modelo <-
lm(inv_simpson ~ herd_id + fat_g_100_ml + alimentation, data = datos_modelos)
summary(modelo)
##
## Call:
## lm(formula = inv_simpson ~ herd_id + fat_g_100_ml + alimentation,
## data = datos_modelos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.356 -14.818 1.012 21.298 41.918
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.430 11.525 0.558 0.579301
## herd_idTF2 5.273 22.003 0.240 0.811551
## herd_idTF3 63.769 18.906 3.373 0.001411 **
## herd_idTF4 25.022 18.115 1.381 0.173102
## herd_idTF5 49.840 13.731 3.630 0.000648 ***
## herd_idUF1 40.293 13.738 2.933 0.004984 **
## herd_idUF2 32.255 18.165 1.776 0.081642 .
## herd_idUF3 74.256 16.399 4.528 3.5e-05 ***
## herd_idUF4 36.692 18.660 1.966 0.054610 .
## herd_idUF5 27.509 18.502 1.487 0.143103
## fat_g_100_ml 3.963 2.238 1.770 0.082507 .
## alimentationunifeed NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.05 on 52 degrees of freedom
## Multiple R-squared: 0.4633, Adjusted R-squared: 0.3601
## F-statistic: 4.489 on 10 and 52 DF, p-value: 0.0001418
\[f2 =\sqrt{R^2_{ajustado}}\]
sqrt(0.3601)
## [1] 0.6000833
pwr.f2.test(
u = 13,
f2 = 0.6000833,
sig.level = 0.05,
v = 52 # grados de libertad del modelo
)
##
## Multiple regression power calculation
##
## u = 13
## v = 52
## f2 = 0.6000833
## sig.level = 0.05
## power = 0.9828605
encuestas <- read_csv("EncuestasColombia2022-Update.csv")
encuestas
encuestas %>%
ggplot(aes(x = gustavo_petro)) +
geom_density()
candidato <- encuestas %>%
pull(gustavo_petro) %>%
na.omit()
t.test(x = candidato, conf.level = 0.95)
##
## One Sample t-test
##
## data: candidato
## t = 23.011, df = 57, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 31.08799 37.01442
## sample estimates:
## mean of x
## 34.05121
library(infer)
set.seed(2022)
remuestreo_candidato <- encuestas %>%
specify(response = gustavo_petro) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "mean")
remuestreo_candidato %>%
visualize()
remuestreo_candidato %>%
get_confidence_interval(level = 0.95, type = "percentile")
remuestreo_candidato %>%
get_confidence_interval(level = 0.95, type = "se",
point_estimate = mean(encuestas$gustavo_petro, na.rm = TRUE))