##Base de datos
Sayula <- c(25,25,29,27,25,29,29,29,25,29,29,29,25,25,29,29,31,31,29,27,25,25,27,29,31,29,31,29,25,29)
GomezFarias <- c(29,25,25,29,25,29,29,29,27,29,31,25,25,25,29,25,29,31,29,25,27,25,25,25,25,29,29,27,27,29)
Zacoalco <- c(29,29,29,29,29,27,27,25,29,25,31,29,25,29,27,29,25,25,29,27,27,27,25,31,25,29,29,25,27,25)
Techaluta <- c(27,31,27,25,27,25,29,27,27,25,29,29,25,25,25,25,25,29,29,25,29,27,25,25,31,29,25,25,31,25)
IMC <- c(Sayula, GomezFarias, Zacoalco, Techaluta)
Municipio <- factor(rep(c("Sayula","GomezFarias","Zacoalco","Techaluta"), each=30))
datos <- data.frame(Municipio, IMC)
head(datos)
## Municipio IMC
## 1 Sayula 25
## 2 Sayula 25
## 3 Sayula 29
## 4 Sayula 27
## 5 Sayula 25
## 6 Sayula 29
#Estadística descriptiva
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datos %>%
group_by(Municipio) %>%
summarise(
Media = mean(IMC),
DE = sd(IMC),
Min = min(IMC),
Max = max(IMC)
)
## # A tibble: 4 × 5
## Municipio Media DE Min Max
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 GomezFarias 27.3 2.08 25 31
## 2 Sayula 27.9 2.15 25 31
## 3 Techaluta 26.9 2.13 25 31
## 4 Zacoalco 27.5 1.94 25 31
#Gráficas de normalidad
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
par(mfrow=c(2,2))
hist(Sayula, main="Sayula")
hist(GomezFarias, main="Gomez Farías")
hist(Zacoalco, main="Zacoalco")
hist(Techaluta, main="Techaluta")
par(mfrow=c(2,2))
qqnorm(Sayula)
qqline(Sayula)
qqnorm(GomezFarias)
qqline(GomezFarias)
qqnorm(Zacoalco)
qqline(Zacoalco)
qqnorm(Techaluta)
qqline(Techaluta)
#Pruebas de normalidad
shapiro.test(Sayula)
##
## Shapiro-Wilk normality test
##
## data: Sayula
## W = 0.81358, p-value = 0.0001171
shapiro.test(GomezFarias)
##
## Shapiro-Wilk normality test
##
## data: GomezFarias
## W = 0.79961, p-value = 6.449e-05
shapiro.test(Zacoalco)
##
## Shapiro-Wilk normality test
##
## data: Zacoalco
## W = 0.84349, p-value = 0.0004543
shapiro.test(Techaluta)
##
## Shapiro-Wilk normality test
##
## data: Techaluta
## W = 0.80207, p-value = 7.153e-05
#Gráfica
boxplot(IMC ~ Municipio, data=datos, col="lightblue", main="Distribución del IMC")
#Prueba formal
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
leveneTest(IMC ~ Municipio, data=datos)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.1938 0.9005
## 116
#Anova de una vía
modelo_anova <- aov(IMC ~ Municipio, data=datos)
summary(modelo_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## Municipio 3 13.7 4.567 1.058 0.37
## Residuals 116 500.7 4.316
#Prueba post-hoc
TukeyHSD(modelo_anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = IMC ~ Municipio, data = datos)
##
## $Municipio
## diff lwr upr p adj
## Sayula-GomezFarias 0.6000000 -0.7982503 1.9982503 0.6788785
## Techaluta-GomezFarias -0.3333333 -1.7315837 1.0649170 0.9250380
## Zacoalco-GomezFarias 0.2000000 -1.1982503 1.5982503 0.9822332
## Techaluta-Sayula -0.9333333 -2.3315837 0.4649170 0.3079191
## Zacoalco-Sayula -0.4000000 -1.7982503 0.9982503 0.8783260
## Zacoalco-Techaluta 0.5333333 -0.8649170 1.9315837 0.7529797
#Correlación
matriz <- data.frame(Sayula, GomezFarias, Zacoalco, Techaluta)
cor(matriz, method="pearson")
## Sayula GomezFarias Zacoalco Techaluta
## Sayula 1.00000000 0.44029194 -0.19965949 -0.07738834
## GomezFarias 0.44029194 1.00000000 -0.09995974 0.03518802
## Zacoalco -0.19965949 -0.09995974 1.00000000 0.14093991
## Techaluta -0.07738834 0.03518802 0.14093991 1.00000000
#Correlación específica
cor.test(Sayula, GomezFarias, method="pearson")
##
## Pearson's product-moment correlation
##
## data: Sayula and GomezFarias
## t = 2.5949, df = 28, p-value = 0.01489
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0951093 0.6909588
## sample estimates:
## cor
## 0.4402919
#Regresión líneal
modelo_regresion <- lm(GomezFarias ~ Sayula)
summary(modelo_regresion)
##
## Call:
## lm(formula = GomezFarias ~ Sayula)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6064 -1.0410 0.6763 1.2488 3.2488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.3516 4.6049 3.334 0.00242 **
## Sayula 0.4276 0.1648 2.595 0.01489 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.904 on 28 degrees of freedom
## Multiple R-squared: 0.1939, Adjusted R-squared: 0.1651
## F-statistic: 6.733 on 1 and 28 DF, p-value: 0.01489
#Análisis de residuos
par(mfrow=c(2,2))
plot(modelo_regresion)