Se va a trabajar la base titulada diabetes_risk_dataset.
#Paquetes
library(readr)
library(ggplot2)
library(BSDA)
Carga de base de datos
diabetes <- read_csv("diabetes_risk_dataset.csv")
diabetes=as.data.frame(unclass(diabetes),
stringsAsFactors = TRUE)
Tranformación de datos codificados a variables tipo factor
\[ \left\{\begin{matrix} Ho: \mu =\mu _{x}\\ Ha: \mu \neq \mu _{x} \end{matrix}\right. \]
z.test(x=diabetes$age,
sigma.x =sd(diabetes$age),
mu=50,
alternative="two.side",
conf.level=0.95)
##
## One-sample z-Test
##
## data: diabetes$age
## z = -8.3075, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
## 49.43794 49.65252
## sample estimates:
## mean of x
## 49.54523
Existe evidencia suficiente del 95%, para rechazar la prueba nula, es decir, que la edad de los pacientes no es igual a 50.
ggplot(diabetes, aes(x=age))+
geom_histogram(bins=10,
col="white")+
labs(title = "Histograma de edad de pacientes",
x="Clases de las edades",
y="Frecuencia Absoluta")+
geom_vline(aes(xintercept = mean(diabetes$age)),
color="red",
linetype="dashed")
\[ \left\{\begin{matrix} Ho: \mu_{x1} =\mu _{x2}\\ Ha: \mu_{x1} \neq \mu _{x2} \end{matrix}\right. \] ### Diagnóstico gráficos
ggplot(diabetes, aes(x=bmi))+
geom_histogram(bins=10,
color="white")+
facet_wrap(.~smoker)
ggplot(diabetes, aes(x=bmi,
fill=smoker))+
geom_histogram(bins=10,
color="white",
alpha=0.5)+
geom_vline(aes(xintercept = mean(diabetes$bmi)),
color="red",
linetype="dashed")
smokerIBM=split(diabetes, diabetes$smoker)
z.test(x=smokerIBM$`0`$bmi,
y=smokerIBM$`1`$bmi,
sigma.x =sd(smokerIBM$`0`$bmi),
sigma.y=sd(smokerIBM$`1`$bmi),
mu=,
alternative="two.side",
conf.level=0.95)
##
## Two-sample z-Test
##
## data: smokerIBM$`0`$bmi and smokerIBM$`1`$bmi
## z = -0.006229, p-value = 0.995
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07664373 0.07615810
## sample estimates:
## mean of x mean of y
## 27.03396 27.03420
ggplot(diabetes, aes(x=bmi,
fill=smoker)) +
geom_density(alpha = 0.5)+
labs(title = "Diagrama de densidad de BMI de los pacientes",
x="BMI",
y="Densidad")
ggplot(diabetes, aes(x=bmi,
fill=smoker)) +
geom_density(alpha = 0.5)+
labs(title = "Diagrama de densidad de BMI de los pacientes",
x="BMI",
y="Densidad")+
facet_wrap(.~smoker)
ggplot(diabetes, aes(y=bmi,
fill=smoker)) +
geom_boxplot()+
labs(title = "Diagrama de boxplot BMI de los pacientes",
y="BMI")+
geom_hline(aes(yintercept = mean(diabetes$bmi)),
color = "red", linewidth = 1.2,
linetype = "dashed")
## Warning: Use of `diabetes$bmi` is discouraged.
## ℹ Use `bmi` instead.
ggplot(diabetes, aes(y=bmi,
fill=smoker)) +
geom_boxplot()+
labs(title = "Diagrama de boxplot BMI de los pacientes",
y="BMI")+
geom_hline(aes(yintercept = mean(diabetes$bmi)),
color = "red", linewidth = 1.2,
linetype = "dashed")+
facet_wrap(.~smoker)
## Warning: Use of `diabetes$bmi` is discouraged.
## ℹ Use `bmi` instead.
# Prueba Hipotesis para proporciones
\[ \left\{\begin{matrix} Ho: \pi_{x1} =\pi _{x2}\\ Ha: \pi_{x1} \neq \pi _{x2} \end{matrix}\right. \]
tablasmoker=table(diabetes$smoker)
tablasmoker
##
## 0 1
## 79800 20200
prop.test(x = c(79800, 20200),
n = c(100000, 100000),
alternative = "two.sided",
conf.level = 0.95,
correct = TRUE)
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(79800, 20200) out of c(1e+05, 1e+05)
## X-squared = 71041, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.5924708 0.5995292
## sample estimates:
## prop 1 prop 2
## 0.798 0.202
ggplot(diabetes, aes(x=smoker))+
geom_bar()
# Ho: p = 0.5 vs Ha: p ≠ 0.5
prop.test(x = 79800,
n = 100000,
p = 0.5,
alternative = "two.sided",
conf.level = 0.95,
correct = TRUE)
##
## 1-sample proportions test with continuity correction
##
## data: 79800 out of 1e+05, null probability 0.5
## X-squared = 35520, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.7954951 0.8004819
## sample estimates:
## p
## 0.798