Examen final estadística

##Base de datos

Sayula <- c(25,25,29,27,25,29,29,29,25,29,29,29,25,25,29,29,31,31,29,27,25,25,27,29,31,29,31,29,25,29)

GomezFarias <- c(29,25,25,29,25,29,29,29,27,29,31,25,25,25,29,25,29,31,29,25,27,25,25,25,25,29,29,27,27,29)

Zacoalco <- c(29,29,29,29,29,27,27,25,29,25,31,29,25,29,27,29,25,25,29,27,27,27,25,31,25,29,29,25,27,25)

Techaluta <- c(27,31,27,25,27,25,29,27,27,25,29,29,25,25,25,25,25,29,29,25,29,27,25,25,31,29,25,25,31,25)

IMC <- c(Sayula, GomezFarias, Zacoalco, Techaluta)

Municipio <- factor(rep(c("Sayula","GomezFarias","Zacoalco","Techaluta"), each=30))

datos <- data.frame(Municipio, IMC)

head(datos)

##   Municipio IMC
## 1    Sayula  25
## 2    Sayula  25
## 3    Sayula  29
## 4    Sayula  27
## 5    Sayula  25
## 6    Sayula  29

#Estadística descriptiva

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.3

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

datos %>%
  group_by(Municipio) %>%
  summarise(
    Media = mean(IMC),
    DE = sd(IMC),
    Min = min(IMC),
    Max = max(IMC)
  )

## # A tibble: 4 × 5
##   Municipio   Media    DE   Min   Max
##   <fct>       <dbl> <dbl> <dbl> <dbl>
## 1 GomezFarias  27.3  2.08    25    31
## 2 Sayula       27.9  2.15    25    31
## 3 Techaluta    26.9  2.13    25    31
## 4 Zacoalco     27.5  1.94    25    31

#Gráficas de normalidad

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.4.3

par(mfrow=c(2,2))

hist(Sayula, main="Sayula")
hist(GomezFarias, main="Gomez Farías")
hist(Zacoalco, main="Zacoalco")
hist(Techaluta, main="Techaluta")

par(mfrow=c(2,2))

qqnorm(Sayula)
qqline(Sayula)
qqnorm(GomezFarias)
qqline(GomezFarias)
qqnorm(Zacoalco)
qqline(Zacoalco)
qqnorm(Techaluta)
qqline(Techaluta)

#Pruebas de normalidad

shapiro.test(Sayula)

## 
##  Shapiro-Wilk normality test
## 
## data:  Sayula
## W = 0.81358, p-value = 0.0001171

shapiro.test(GomezFarias)

## 
##  Shapiro-Wilk normality test
## 
## data:  GomezFarias
## W = 0.79961, p-value = 6.449e-05

shapiro.test(Zacoalco)

## 
##  Shapiro-Wilk normality test
## 
## data:  Zacoalco
## W = 0.84349, p-value = 0.0004543

shapiro.test(Techaluta)

## 
##  Shapiro-Wilk normality test
## 
## data:  Techaluta
## W = 0.80207, p-value = 7.153e-05

#Gráfica

boxplot(IMC ~ Municipio, data=datos, col="lightblue", main="Distribución del IMC")

#Prueba formal

library(car)

## Warning: package 'car' was built under R version 4.4.3

## Cargando paquete requerido: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Adjuntando el paquete: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

leveneTest(IMC ~ Municipio, data=datos)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.1938 0.9005
##       116

#Anova de una vía

modelo_anova <- aov(IMC ~ Municipio, data=datos)

summary(modelo_anova)

##              Df Sum Sq Mean Sq F value Pr(>F)
## Municipio     3   13.7   4.567   1.058   0.37
## Residuals   116  500.7   4.316

#Prueba post-hoc

TukeyHSD(modelo_anova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = IMC ~ Municipio, data = datos)
## 
## $Municipio
##                             diff        lwr       upr     p adj
## Sayula-GomezFarias     0.6000000 -0.7982503 1.9982503 0.6788785
## Techaluta-GomezFarias -0.3333333 -1.7315837 1.0649170 0.9250380
## Zacoalco-GomezFarias   0.2000000 -1.1982503 1.5982503 0.9822332
## Techaluta-Sayula      -0.9333333 -2.3315837 0.4649170 0.3079191
## Zacoalco-Sayula       -0.4000000 -1.7982503 0.9982503 0.8783260
## Zacoalco-Techaluta     0.5333333 -0.8649170 1.9315837 0.7529797

#Correlación

matriz <- data.frame(Sayula, GomezFarias, Zacoalco, Techaluta)

cor(matriz, method="pearson")

##                  Sayula GomezFarias    Zacoalco   Techaluta
## Sayula       1.00000000  0.44029194 -0.19965949 -0.07738834
## GomezFarias  0.44029194  1.00000000 -0.09995974  0.03518802
## Zacoalco    -0.19965949 -0.09995974  1.00000000  0.14093991
## Techaluta   -0.07738834  0.03518802  0.14093991  1.00000000

#Correlación específica

cor.test(Sayula, GomezFarias, method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  Sayula and GomezFarias
## t = 2.5949, df = 28, p-value = 0.01489
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.0951093 0.6909588
## sample estimates:
##       cor 
## 0.4402919

#Regresión líneal

modelo_regresion <- lm(GomezFarias ~ Sayula)

summary(modelo_regresion)

## 
## Call:
## lm(formula = GomezFarias ~ Sayula)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6064 -1.0410  0.6763  1.2488  3.2488 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  15.3516     4.6049   3.334  0.00242 **
## Sayula        0.4276     0.1648   2.595  0.01489 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.904 on 28 degrees of freedom
## Multiple R-squared:  0.1939, Adjusted R-squared:  0.1651 
## F-statistic: 6.733 on 1 and 28 DF,  p-value: 0.01489

#Análisis de residuos

par(mfrow=c(2,2))
plot(modelo_regresion)

Examen final estadística - problema 1

Emily Margarita Davalos Escobedo

2026-05-10