Se realizó una investigación para conocer el Índice de Masa Corporal (IMC) de cuatro poblaciones distintas ubicadas en el Sur de Jalisco, una vez creado el estudio y el diseño, el tamaño de muestra arrojo la cantidad de 30 personas. Los resultados se presentan en al Tabla 2.
library(readxl)
examen <- read_excel("examen.xlsx")
View(examen)
Criterio: Seguir la linea roja.
qqnorm(examen$Tuxpan)
qqline(examen$Tuxpan,col=2)
qqnorm(examen$Tamazula)
qqline(examen$Tamazula,col=2)
qqnorm(examen$Zapotlán)
qqline(examen$Zapotlán,col=2)
qqnorm(examen$Zapotiltic)
qqline(examen$Zapotiltic,col=2)
xb_Tuxp=mean(examen$Tuxpan)
s_Tuxp=sd(examen$Tuxpan)
hist(examen$Tuxpan, freq = F, col = "blue", xlab = "Balance", main = "",
xlim = c(xb_Tuxp-4*s_Tuxp, xb_Tuxp+4*s_Tuxp), ylim = c(0, 0.25), )
curve(dnorm(x, mean = xb_Tuxp, sd = s_Tuxp), col = 2, lwd = 2, add = TRUE)
xb_Tam=mean(examen$Tamazula)
s_Tam=sd(examen$Tamazula)
hist(examen$Tamazula, freq = F, col = "gray", xlab = "Balance", main = "",
xlim = c(xb_Tam-4*s_Tam, xb_Tam+4*s_Tam), ylim = c(0, 0.25), )
curve(dnorm(x, mean = xb_Tam, sd = s_Tam), col = 2, lwd = 2, add = TRUE)
xb_Zap=mean(examen$Zapotlán)
s_Zap=sd(examen$Zapotlán)
hist(examen$Zapotlán, freq = F, col = "pink", xlab = "Balance", main = "",
xlim = c(xb_Zap-4*s_Zap, xb_Zap+4*s_Zap), ylim = c(0, 0.25), )
curve(dnorm(x, mean = xb_Zap, sd = s_Zap), col = 2, lwd = 2, add = TRUE)
xb_Ztil=mean(examen$Zapotiltic)
s_Ztil=sd(examen$Zapotiltic)
hist(examen$Zapotiltic, freq = F, col = "yellow", xlab = "Balance", main = "",
xlim = c(xb_Ztil-4*s_Ztil, xb_Ztil+4*s_Ztil), ylim = c(0, 0.25), )
curve(dnorm(x, mean = xb_Ztil, sd = s_Ztil), col = 2, lwd = 2, add = TRUE)
Se eligió esta prueba ya que son menos de 50 datos
shapiro.test(examen$Tuxpan) #es normal
##
## Shapiro-Wilk normality test
##
## data: examen$Tuxpan
## W = 0.96395, p-value = 0.3892
shapiro.test(examen$Tamazula) #es normal
##
## Shapiro-Wilk normality test
##
## data: examen$Tamazula
## W = 0.95488, p-value = 0.228
shapiro.test(examen$Zapotlán) #es normal
##
## Shapiro-Wilk normality test
##
## data: examen$Zapotlán
## W = 0.95285, p-value = 0.2014
shapiro.test(examen$Zapotiltic) #es normal
##
## Shapiro-Wilk normality test
##
## data: examen$Zapotiltic
## W = 0.9644, p-value = 0.3992
Se utiliza esta prueba ya que son datos normales.
bartlett.test(list(examen$Tuxpan, examen$Tamazula, examen$Zapotlán, examen$Zapotiltic))
##
## Bartlett test of homogeneity of variances
##
## data: list(examen$Tuxpan, examen$Tamazula, examen$Zapotlán, examen$Zapotiltic)
## Bartlett's K-squared = 1.5396, df = 3, p-value = 0.6732
Los resultados indican que sí hay homocedasticidad
Cargamos paquetes
library(readxl)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.2
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datos_largos <- pivot_longer(examen,
cols = everything(),
names_to = "Población",
values_to = "IMC")
anova_resultado <- aov(IMC ~ Población, data = datos_largos)
summary(anova_resultado)
## Df Sum Sq Mean Sq F value Pr(>F)
## Población 3 30.1 10.031 1.081 0.36
## Residuals 116 1076.2 9.278
Resultado explicado: No existen diferencias estadísticamente significativas entre los IMC de las poblaciones.
Se usa el método de Pearson porque son datos normales
correlacion <- cor(examen, use = "complete.obs", method = "pearson")
print(correlacion)
## Tuxpan Tamazula Zapotlán Zapotiltic
## Tuxpan 1.00000000 -0.01810688 -0.5322067 0.1633216
## Tamazula -0.01810688 1.00000000 0.1334118 -0.0202623
## Zapotlán -0.53220670 0.13341175 1.0000000 -0.2962594
## Zapotiltic 0.16332161 -0.02026230 -0.2962594 1.0000000
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
##
## Adjuntando el paquete: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
correlacion_melt <- melt(correlacion)
ggplot(correlacion_melt, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
theme_minimal() +
labs(title = "Matriz de Correlación", x = "Población", y = "Población")
Las que se correlacionan a un nivel moderado son Zapotlán-Tuxpan (-0.53).
modelo <- lm(Zapotlán ~ Tuxpan, data = examen)
summary(modelo)
##
## Call:
## lm(formula = Zapotlán ~ Tuxpan, data = examen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.2484 -2.0664 0.3436 2.4346 4.3876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.1483 4.8514 8.482 3.19e-09 ***
## Tuxpan -0.6360 0.1912 -3.326 0.00247 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.793 on 28 degrees of freedom
## Multiple R-squared: 0.2832, Adjusted R-squared: 0.2576
## F-statistic: 11.06 on 1 and 28 DF, p-value: 0.002468
plot(examen$Tuxpan, examen$Zapotlán,
xlab = "Tuxpan",
ylab = "Zapotlán",
main = "Regresión lineal: Zapotlán vs Tuxpan",
pch = 19, col = "blue")
abline(modelo, col = "red", lwd = 2)
Zapotlán=41.15−0.636*Tuxpan
Aunque el modelo es estadísticamente significativo el nivel de correlación entre ambas regiones es muy bajo (28%)