library(tidyverse)
library(readxl)
<- read_excel("encuesta_bien_excel.xlsx")
datos datos
%>%
datos ggplot(mapping = aes(x = promedio_academico)) +
geom_density()
%>%
datos ggplot(mapping = aes(sample = promedio_academico)) +
geom_qq() +
geom_qq_line()
library(ggpubr)
ggqqplot(data = datos$promedio_academico)
library(car)
qqPlot(x = datos$promedio_academico)
## [1] 4 22
\[H_0: Sí\ hay\ normalidad = X \sim N(\mu, \sigma) \\ H_1: No\ hay\ normalidad = X \nsim N(\mu, \sigma)\]
shapiro.test(x = datos$promedio_academico)
##
## Shapiro-Wilk normality test
##
## data: datos$promedio_academico
## W = 0.94241, p-value = 0.1056
library(nortest)
ad.test(x = datos$promedio_academico)
##
## Anderson-Darling normality test
##
## data: datos$promedio_academico
## A = 0.60659, p-value = 0.1046
\[H_0: \mu= 3.5 \\ H_1: \mu \neq 3.5\]
x
: la muestra de datos (variable)mu
: valor promedio de referenciaalternative
: tipo de contraste de hipótesis, puede ser bilateral (“two.sided”) o unilateral (“less” o “greater”).conf.level
: nivel de confianza. Es igual a \(1- \alpha= 1-0.05 = 0.95\)t.test(x = datos$promedio_academico,
mu = 3.5,
alternative = "two.sided",
conf.level = 0.95)
##
## One Sample t-test
##
## data: datos$promedio_academico
## t = 4.3798, df = 29, p-value = 0.0001415
## alternative hypothesis: true mean is not equal to 3.5
## 95 percent confidence interval:
## 3.606073 3.791927
## sample estimates:
## mean of x
## 3.699
\[H_0: El\ promedio\ académico\ está\ localizado\ en\ 3.5 \\ H_1: El\ promedio\ académico\ no\ está\ localizado\ en\ 3.5\]
wilcox.test(x = datos$promedio_academico,
mu = 3.5,
alternative = "two.sided",
conf.level = 0.95)
##
## Wilcoxon signed rank test with continuity correction
##
## data: datos$promedio_academico
## V = 323, p-value = 0.0001826
## alternative hypothesis: true location is not equal to 3.5
%>%
datos ggplot(mapping = aes(sample = promedio_academico, color = trabajo)) +
facet_wrap(facets = ~trabajo, scales = "free") +
geom_qq(show.legend = FALSE) +
geom_qq_line(show.legend = FALSE)
<- datos %>% filter(trabajo == "Sí")
si_trabajan <- datos %>% filter(trabajo == "No") no_trabajan
shapiro.test(x = si_trabajan$promedio_academico)
##
## Shapiro-Wilk normality test
##
## data: si_trabajan$promedio_academico
## W = 0.84708, p-value = 0.03909
shapiro.test(x = no_trabajan$promedio_academico)
##
## Shapiro-Wilk normality test
##
## data: no_trabajan$promedio_academico
## W = 0.93752, p-value = 0.2378
\[H_0: \mu_{si} = \mu_{no} \\ H_1: \mu_{si} \neq \mu_{no}\]
El juego de hipótesis anterior también puede ser planteado de la siguiente manera:
\[H_0: \mu_{si} - \mu_{no} = 0\\ H_1: \mu_{si} - \mu_{no} \neq 0\]
t.test(datos$promedio_academico ~ datos$trabajo,
alternative = "two.sided",
conf.level = 0.99)
##
## Welch Two Sample t-test
##
## data: datos$promedio_academico by datos$trabajo
## t = 1.5859, df = 20.113, p-value = 0.1284
## alternative hypothesis: true difference in means between group No and group Sí is not equal to 0
## 99 percent confidence interval:
## -0.1171464 0.4125531
## sample estimates:
## mean in group No mean in group Sí
## 3.753158 3.605455
wilcox.test(datos$promedio_academico ~ datos$trabajo,
alternative = "two.sided",
conf.level = 0.99)
##
## Wilcoxon rank sum test with continuity correction
##
## data: datos$promedio_academico by datos$trabajo
## W = 137, p-value = 0.166
## alternative hypothesis: true location shift is not equal to 0