library(pwr)
library(tidyverse)
library(effectsize)
library(readxl)
library(janitor)
library(WebPower)
library(plotly)
set.seed(2022)
pob1 <- rnorm(n = 1000, mean = 123.4, sd = 15.4)
pob2 <- rnorm(n = 1000, mean = 133.4, sd = 10.5)
set.seed(2022)
muestra1 <- sample(x = pob1, size = 50, replace = TRUE)
muestra2 <- sample(x = pob2, size = 50, replace = TRUE)
prueba_t <-
t.test(
x = muestra1,
y = muestra2,
alternative = "two.sided",
conf.level = 0.95,
var.equal = FALSE
)
prueba_t
##
## Welch Two Sample t-test
##
## data: muestra1 and muestra2
## t = -4.0277, df = 82.989, p-value = 0.0001241
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.687210 -4.976746
## sample estimates:
## mean of x mean of y
## 123.3881 133.2201
n
: número de observaciones por
muestrad
: tamaño del efecto (¿?)sig.level
: nivel de significancia
\(\alpha\)power
: potencia de la pruebatype
: tipo de prueba: una muestra, dos
muestras independientes o dos muestras pareadasalternative
: tipo de hipótesis
alternativa\[d = \frac{\mu_1 - \mu_2}{S_{agrupado}} = \frac{\mu_1 - \mu_2}{\sqrt{(s_1^2 + s_2^2)/2}}\]
media_muestra1 <- mean(muestra1)
media_muestra2 <- mean(muestra2)
desviacion_agrupada <- sqrt( (var(muestra1) + var(muestra2)) / 2 )
tamano_efecto1 <- (media_muestra1 - media_muestra2) / desviacion_agrupada
tamano_efecto1
## [1] -0.8055412
tamano_efecto2 <- cohens_d(prueba_t)
tamano_efecto2$Cohens_d
## [1] -0.8055412
pwr.t.test(
n = 50,
d = tamano_efecto1,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
)
##
## Two-sample t test power calculation
##
## n = 50
## d = 0.8055412
## sig.level = 0.05
## power = 0.9787184
## alternative = two.sided
##
## NOTE: n is number in *each* group
pwr.t.test(
n = 50,
d = tamano_efecto1,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
) %>%
plot()
pwr.t.test(
d = tamano_efecto1,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided",
power = 0.8
)
##
## Two-sample t test power calculation
##
## n = 25.18879
## d = 0.8055412
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
pwr.t.test(
d = tamano_efecto1,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided",
power = 0.8
) %>%
plot()
experimento <- read_excel("Simpson-Calidad-Leche.xls") %>%
clean_names() %>%
mutate(inv_simpson = as.numeric(inv_simpson))
prueba_t_experimento <-
t.test(
experimento$inv_simpson ~ experimento$alimentation
)
prueba_t_experimento
##
## Welch Two Sample t-test
##
## data: experimento$inv_simpson by experimento$alimentation
## t = -1.844, df = 58.138, p-value = 0.07029
## alternative hypothesis: true difference in means between group traditional and group unifeed is not equal to 0
## 95 percent confidence interval:
## -31.206216 1.279258
## sample estimates:
## mean in group traditional mean in group unifeed
## 53.69034 68.65382
cohens_d(prueba_t_experimento)
experimento %>%
count(alimentation)
pwr.t.test(
n = 29,
d = -0.47,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
)
##
## Two-sample t test power calculation
##
## n = 29
## d = 0.47
## sig.level = 0.05
## power = 0.4204649
## alternative = two.sided
##
## NOTE: n is number in *each* group
pwr.t.test(
n = 29,
d = -0.47,
sig.level = 0.05,
type = "two.sample",
alternative = "two.sided"
) %>%
plot()
wp.t(
n1 = 29,
n2 = 34,
d = -0.47,
type = "two.sample.2n",
alpha = 0.05,
alternative = "two.sided"
)
## Unbalanced two-sample t-test
##
## n1 n2 d alpha power
## 29 34 0.47 0.05 0.4484033
##
## NOTE: n1 and n2 are number in *each* group
## URL: http://psychstat.org/ttest2n
creditos <- read_rds("creditos_colombia_genero.Rds")
set.seed(2022)
muestra_1prop <- sample(creditos$genero, size = 50, replace = TRUE)
table(muestra_1prop)
## muestra_1prop
## H M
## 34 16
\[H_0: p = 0.5 \\ H_1: p \neq 0.5\]
prueba_1prop <-
prop.test(
x = 16,
n = 50,
p = 0.5,
conf.level = 0.95,
alternative = "two.sided"
)
prueba_1prop
##
## 1-sample proportions test with continuity correction
##
## data: 16 out of 50, null probability 0.5
## X-squared = 5.78, df = 1, p-value = 0.01621
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.1992781 0.4683118
## sample estimates:
## p
## 0.32
\[h = (2 \times \arcsin{\sqrt{p1}}) - (2 \times \arcsin{\sqrt{p2}})\]
(2 * asin(sqrt(16/50))) - (2 * asin(sqrt(0.5)))
## [1] -0.3682679
ES.h(p1 = 16/50, p2 = 0.5)
## [1] -0.3682679
pwr.p.test(
h = -0.3682679,
n = 50,
sig.level = 0.05,
alternative = "two.sided"
)
##
## proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3682679
## n = 50
## sig.level = 0.05
## power = 0.7402418
## alternative = two.sided
pwr.p.test(
h = -0.3682679,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
)
##
## proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3682679
## n = 57.87338
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
pwr.p.test(
h = -0.3682679,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
) %>%
plot()
\[H_0: p_h - p_m = 0 \\ H_1: p_h - p_m \neq 0 \]
creditos %>%
pull(genero) %>%
table() %>%
prop.table()
## .
## H M
## 0.618123 0.381877
0.618123 - 0.381877
## [1] 0.236246
set.seed(2023)
muestra_mujeres <- sample(creditos$genero, size = 50)
set.seed(2022)
muestra_hombres <- sample(creditos$genero, size = 50)
muestra_mujeres %>% table()
## .
## H M
## 30 20
muestra_hombres %>% table()
## .
## H M
## 34 16
prop.test(
x = c(20, 34),
n = c(50, 50),
alternative = "two.sided",
conf.level = 0.95
)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(20, 34) out of c(50, 50)
## X-squared = 6.8035, df = 1, p-value = 0.009098
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.48750208 -0.07249792
## sample estimates:
## prop 1 prop 2
## 0.40 0.68
ES.h(p1 = 20/50, p2 = 34/50)
## [1] -0.5696258
pwr.2p.test(
h = 0.5696258,
n = 50,
sig.level = 0.05,
alternative = "two.sided"
)
##
## Difference of proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.5696258
## n = 50
## sig.level = 0.05
## power = 0.8127748
## alternative = two.sided
##
## NOTE: same sample sizes
pwr.2p.test(
h = 0.5696258,
power = 0.8,
sig.level = 0.05,
alternative = "two.sided"
) %>%
plot()
set.seed(2022)
muestra_mujeres <- sample(creditos$genero, size = 28)
set.seed(2021)
muestra_hombres <- sample(creditos$genero, size = 47)
muestra_mujeres %>% table()
## .
## H M
## 22 6
muestra_hombres %>% table()
## .
## H M
## 18 29
prop.test(
x = c(22, 29),
n = c(28, 47),
alternative = "two.sided",
conf.level = 0.95
)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(22, 29) out of c(28, 47)
## X-squared = 1.585, df = 1, p-value = 0.208
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.06574706 0.40313308
## sample estimates:
## prop 1 prop 2
## 0.7857143 0.6170213
ES.h(p1 = 22/28, p2 = 29/47)
## [1] 0.3720119
pwr.2p2n.test(
h = 0.3720119,
n1 = 28,
n2 = 47,
sig.level = 0.05,
alternative = "two.sided"
)
##
## difference of proportion power calculation for binomial distribution (arcsine transformation)
##
## h = 0.3720119
## n1 = 28
## n2 = 47
## sig.level = 0.05
## power = 0.3441869
## alternative = two.sided
##
## NOTE: different sample sizes
\[H_0: \mu = 10.24264 \\ H_1: \mu \neq 10.24264\]
guinea <- read_rds("pasto_guinea.Rds")
guinea %>%
pull(proteina) %>%
mean()
## [1] 10.24264
set.seed(2022)
muestra_1media <- sample(guinea$proteina, size = 50)
prueba_1media <-
t.test(
x = muestra_1media,
mu = 10.24264,
alternative = "two.sided",
conf.level = 0.95
)
prueba_1media
##
## One Sample t-test
##
## data: muestra_1media
## t = 0.62648, df = 49, p-value = 0.5339
## alternative hypothesis: true mean is not equal to 10.24264
## 95 percent confidence interval:
## 9.803835 11.078965
## sample estimates:
## mean of x
## 10.4414
cohens_d(prueba_1media)
pwr.t.test(
n = 50,
d = 0.09,
sig.level = 0.05,
type = "one.sample",
alternative = "two.sided"
)
##
## One-sample t test power calculation
##
## n = 50
## d = 0.09
## sig.level = 0.05
## power = 0.09566464
## alternative = two.sided
pwr.t.test(
power = 0.8,
d = 0.09,
sig.level = 0.05,
type = "one.sample",
alternative = "two.sided"
) %>%
plot()
\[H_0: \rho = 0 \\H_1: \rho \neq 0\]
cohen.ES(test = "r", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = r
## size = medium
## effect.size = 0.3
pwr.r.test(
r = 0.3,
sig.level = 0.05,
power = 0.9,
alternative = "two.sided"
)
##
## approximate correlation power calculation (arctangh transformation)
##
## n = 111.8068
## r = 0.3
## sig.level = 0.05
## power = 0.9
## alternative = two.sided
pwr.r.test(
r = 0.3,
sig.level = 0.05,
power = 0.9,
alternative = "two.sided"
) %>%
plot()
cohen.ES(test = "f2", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = f2
## size = medium
## effect.size = 0.15
pwr.f2.test(
u = 1, # Coeficientes - 1 (intercepto) = número de predictores
f2 = 0.15, # Tamaño de efecto
sig.level = 0.05,
power = 0.8
)
##
## Multiple regression power calculation
##
## u = 1
## v = 52.315
## f2 = 0.15
## sig.level = 0.05
## power = 0.8
\[n = v + u + 1\]
52 + 1 + 1
## [1] 54
cohen.ES(test = "f2", size = "medium")
##
## Conventional effect size from Cohen (1982)
##
## test = f2
## size = medium
## effect.size = 0.15
pwr.f2.test(
u = 3, # Coeficientes - 1 (intercepto) = número de predictores
f2 = 0.15, # Tamaño de efecto
sig.level = 0.05,
power = 0.8
)
##
## Multiple regression power calculation
##
## u = 3
## v = 72.70583
## f2 = 0.15
## sig.level = 0.05
## power = 0.8
\[n = v + u + 1\]
73 + 3 + 1
## [1] 77
\[Y = Rebaño + Grasa + Dieta\]
datos_modelos <- experimento %>%
mutate(
fat_g_100_ml = as.numeric(fat_g_100_ml)
)
modelo <-
lm(inv_simpson ~ herd_id + fat_g_100_ml + alimentation, data = datos_modelos)
summary(modelo)
##
## Call:
## lm(formula = inv_simpson ~ herd_id + fat_g_100_ml + alimentation,
## data = datos_modelos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.356 -14.818 1.012 21.298 41.918
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.430 11.525 0.558 0.579301
## herd_idTF2 5.273 22.003 0.240 0.811551
## herd_idTF3 63.769 18.906 3.373 0.001411 **
## herd_idTF4 25.022 18.115 1.381 0.173102
## herd_idTF5 49.840 13.731 3.630 0.000648 ***
## herd_idUF1 40.293 13.738 2.933 0.004984 **
## herd_idUF2 32.255 18.165 1.776 0.081642 .
## herd_idUF3 74.256 16.399 4.528 3.5e-05 ***
## herd_idUF4 36.692 18.660 1.966 0.054610 .
## herd_idUF5 27.509 18.502 1.487 0.143103
## fat_g_100_ml 3.963 2.238 1.770 0.082507 .
## alimentationunifeed NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.05 on 52 degrees of freedom
## Multiple R-squared: 0.4633, Adjusted R-squared: 0.3601
## F-statistic: 4.489 on 10 and 52 DF, p-value: 0.0001418
\[f2 =\sqrt{R^2_{ajustado}}\]
sqrt(0.3601)
## [1] 0.6000833
pwr.f2.test(
u = 11,
f2 = 0.6000833,
sig.level = 0.05,
v = 52 # grados de libertad del modelo
)
##
## Multiple regression power calculation
##
## u = 11
## v = 52
## f2 = 0.6000833
## sig.level = 0.05
## power = 0.986946