Estadistica Multivariable
############ ESTADISTICA Multivariable ############
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
labels = "ESTADÍSTICA MULTIVARIABLE",
cex = 2,
col = "blue",
font =6)

#Carga de datos
datos <- read.csv("C:\\Users\\Usuario\\Downloads\\water_pollution_disease.csv"
, encoding = "UTF-8")
head(datos) # Muestra las primeras filas
## Country Region Year Water.Source.Type Contaminant.Level..ppm. pH.Level
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## Turbidity..NTU. Dissolved.Oxygen..mg.L. Nitrate.Level..mg.L.
## 1 3.93 4.28 8.28
## 2 4.79 3.86 15.74
## 3 0.79 3.42 36.67
## 4 1.96 3.12 36.92
## 5 4.22 9.15 49.35
## 6 4.03 8.66 31.35
## Lead.Concentration..µg.L. Bacteria.Count..CFU.mL. Water.Treatment.Method
## 1 7.89 3344 Filtration
## 2 14.68 2122 Boiling
## 3 9.96 2330 None
## 4 6.77 3779 Boiling
## 5 12.51 4182 Filtration
## 6 16.74 880 None
## Access.to.Clean.Water....of.Population. Diarrheal.Cases.per.100.000.people
## 1 33.60 472
## 2 89.54 122
## 3 35.29 274
## 4 57.53 3
## 5 36.60 466
## 6 69.48 258
## Cholera.Cases.per.100.000.people Typhoid.Cases.per.100.000.people
## 1 33 44
## 2 27 8
## 3 39 50
## 4 33 13
## 5 31 68
## 6 22 55
## Infant.Mortality.Rate..per.1.000.live.births. GDP.per.Capita..USD.
## 1 76.16 57057
## 2 77.30 17220
## 3 48.45 86022
## 4 95.66 31166
## 5 58.78 25661
## 6 70.13 84334
## Healthcare.Access.Index..0.100. Urbanization.Rate....
## 1 96.92 84.61
## 2 84.73 73.37
## 3 58.37 72.86
## 4 39.07 71.07
## 5 23.03 55.55
## 6 53.45 86.11
## Sanitation.Coverage....of.Population. Rainfall..mm.per.year. Temperature...C.
## 1 63.23 2800 4.94
## 2 29.12 1572 16.93
## 3 93.56 2074 21.73
## 4 94.25 937 3.79
## 5 69.23 2295 31.44
## 6 51.11 2530 8.01
## Population.Density..people.per.km..
## 1 593
## 2 234
## 3 57
## 4 555
## 5 414
## 6 775
str(datos) # Muestra la estructura del data frame
## 'data.frame': 3000 obs. of 24 variables:
## $ Country : chr "Mexico" "Brazil" "Indonesia" "Nigeria" ...
## $ Region : chr "North" "West" "Central" "East" ...
## $ Year : int 2015 2017 2022 2016 2005 2013 2022 2024 2014 2013 ...
## $ Water.Source.Type : chr "Lake" "Well" "Pond" "Well" ...
## $ Contaminant.Level..ppm. : num 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
## $ pH.Level : num 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
## $ Turbidity..NTU. : num 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
## $ Dissolved.Oxygen..mg.L. : num 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
## $ Nitrate.Level..mg.L. : num 8.28 15.74 36.67 36.92 49.35 ...
## $ Lead.Concentration..µg.L. : num 7.89 14.68 9.96 6.77 12.51 ...
## $ Bacteria.Count..CFU.mL. : int 3344 2122 2330 3779 4182 880 2977 1172 159 2493 ...
## $ Water.Treatment.Method : chr "Filtration" "Boiling" "None" "Boiling" ...
## $ Access.to.Clean.Water....of.Population. : num 33.6 89.5 35.3 57.5 36.6 ...
## $ Diarrheal.Cases.per.100.000.people : int 472 122 274 3 466 258 208 397 265 261 ...
## $ Cholera.Cases.per.100.000.people : int 33 27 39 33 31 22 23 0 23 2 ...
## $ Typhoid.Cases.per.100.000.people : int 44 8 50 13 68 55 90 10 29 38 ...
## $ Infant.Mortality.Rate..per.1.000.live.births.: num 76.2 77.3 48.5 95.7 58.8 ...
## $ GDP.per.Capita..USD. : int 57057 17220 86022 31166 25661 84334 6726 76593 5470 72858 ...
## $ Healthcare.Access.Index..0.100. : num 96.9 84.7 58.4 39.1 23 ...
## $ Urbanization.Rate.... : num 84.6 73.4 72.9 71.1 55.5 ...
## $ Sanitation.Coverage....of.Population. : num 63.2 29.1 93.6 94.2 69.2 ...
## $ Rainfall..mm.per.year. : int 2800 1572 2074 937 2295 2530 1573 940 2150 2083 ...
## $ Temperature...C. : num 4.94 16.93 21.73 3.79 31.44 ...
## $ Population.Density..people.per.km.. : int 593 234 57 555 414 775 584 111 538 250 ...
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Limpiar y resumir los datos por año
datos_limpios <- datos %>%
group_by(Year) %>%
summarise(Nivel_promedio =
mean(as.numeric(as.character(Contaminant.Level..ppm.)), na.rm =
TRUE))
# Conjetura de modelo matemático
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
labels = "Modelo Polinómico",
cex = 2, # Tamaño del texto
col = "blue",
font =6)

# Variables para el modelo
x <- datos_limpios$Year
y <- datos_limpios$Nivel_promedio
# Guardar los años originales (x_real)
x_real <- datos_limpios$Year
x_esc <- scale(x_real)
x_esc_vector <- as.vector(x_esc)
y <- datos_limpios$Nivel_promedio
#Escalado
x_esc <- scale(datos_limpios$Year)
x_esc_vector <- as.vector(x_esc)
y <- datos_limpios$Nivel_promedio
x <- scale(datos_limpios$Year)
# Crear términos polinómicos hasta grado 5
x2 <- x_esc_vector^2
x3 <- x_esc_vector^3
x4 <- x_esc_vector^4
x5 <- x_esc_vector^5
# Ajustar modelo de regresión polinómica
modelo_poli <- lm(y ~ x_esc_vector + x2 + x3 + x4 + x5)
modelo_poli
##
## Call:
## lm(formula = y ~ x_esc_vector + x2 + x3 + x4 + x5)
##
## Coefficients:
## (Intercept) x_esc_vector x2 x3 x4
## 5.02507 0.40061 0.03423 -0.48725 -0.06455
## x5
## 0.11528
# Obtener coeficientes
beta0 <- modelo_poli$coefficients[1]
beta1 <- modelo_poli$coefficients[2]
beta2 <- modelo_poli$coefficients[3]
beta3 <- modelo_poli$coefficients[4]
beta4 <- modelo_poli$coefficients[5]
beta5 <- modelo_poli$coefficients[6]
a<-beta0
b<-beta1
c<-beta2
d<-beta3
e<-beta4
f<-beta5
# Mostrar ecuación
cat("Ecuación del modelo polinómico (grado 5):\n")
## Ecuación del modelo polinómico (grado 5):
cat("Y =", round(a, 4), "+", round(b, 4), "*x +",
round(c, 4), "*x^2 +", round(d, 4), "*x^3 +", round(e, 4), "*x^4 +",
round(f, 4), "*x^5")
## Y = 5.0251 + 0.4006 *x + 0.0342 *x^2 + -0.4872 *x^3 + -0.0646 *x^4 + 0.1153 *x^5
#Formamos la ecuación
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
labels = paste("Y =", round(a, 4), "+", round(b, 4), "*x +",
round(c, 4), "*x^2 +", round(d, 4), "*x^3 +",
round(e, 4),"*x^4 +",
round(f, 4), "*x^5"),
cex = 1, col = "blue", font = 6)

# Resumen del modelo
summary(modelo_poli)
##
## Call:
## lm(formula = y ~ x_esc_vector + x2 + x3 + x4 + x5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34211 -0.15720 -0.04067 0.17172 0.36241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.02507 0.09003 55.818 <2e-16 ***
## x_esc_vector 0.40061 0.21675 1.848 0.0802 .
## x2 0.03423 0.19784 0.173 0.8645
## x3 -0.48725 0.30014 -1.623 0.1210
## x4 -0.06455 0.07730 -0.835 0.4140
## x5 0.11528 0.09243 1.247 0.2275
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2392 on 19 degrees of freedom
## Multiple R-squared: 0.3694, Adjusted R-squared: 0.2034
## F-statistic: 2.226 on 5 and 19 DF, p-value: 0.09394
# Graficar puntos con eje real
plot(x_real, y, col = "red", pch = 19,
main = "Gráfica 1 .Regresión Polinómica: Nivel de contaminante (ppm)
en función de Año",
xlab = "Año", ylab = "Nivel de contaminante (ppm)")
# Mostrar el eje X forzando todos los años hasta 2024
axis(1, at = seq(min(x_real), 2024, by = 3))
# Graficar la curva
# Secuencia para curva ajustada
x_seq_real <- seq(min(x_real), max(x_real), length.out = 300)
x_seq_esc <- (x_seq_real - mean(x_real)) / sd(x_real)
x_seq2 <- x_seq_esc^2
x_seq3 <- x_seq_esc^3
x_seq4 <- x_seq_esc^4
x_seq5 <- x_seq_esc^5
datos_pred <- data.frame(
x_esc_vector = x_seq_esc,
x2 = x_seq2,
x3 = x_seq3,
x4 = x_seq4,
x5 = x_seq5
)
# Predecir
y_pred <- predict(modelo_poli, newdata = datos_pred)
lines(x_seq_real, y_pred, col = "blue", lwd = 2)

# Coeficiente de Pearson
cor(y, fitted(modelo_poli))
## [1] 0.6077691
# RESTRICCIONES Y > 0
y_rango <- a + b * x_seq_real + c * x_seq2^2 + d * x_seq3^3 + e * x_seq4^4
+ f * x_seq5^5
## [1] -2.342316e+04 -1.980510e+04 -1.672693e+04 -1.411095e+04 -1.189024e+04
## [6] -1.000718e+04 -8.412270e+03 -7.062969e+03 -5.922795e+03 -4.960481e+03
## [11] -4.149262e+03 -3.466251e+03 -2.891899e+03 -2.409528e+03 -2.004925e+03
## [16] -1.665992e+03 -1.382443e+03 -1.145546e+03 -9.478934e+02 -7.832105e+02
## [21] -6.461895e+02 -5.323459e+02 -4.378953e+02 -3.596490e+02 -2.949231e+02
## [26] -2.414624e+02 -1.973739e+02 -1.610712e+02 -1.312268e+02 -1.067313e+02
## [31] -8.665893e+01 -7.023838e+01 -5.682800e+01 -4.589484e+01 -3.699690e+01
## [36] -2.976822e+01 -2.390629e+01 -1.916147e+01 -1.532807e+01 -1.223693e+01
## [41] -9.749193e+00 -7.751025e+00 -6.149323e+00 -4.868060e+00 -3.845273e+00
## [46] -3.030561e+00 -2.383002e+00 -1.869443e+00 -1.463076e+00 -1.142266e+00
## [51] -8.895958e-01 -6.910691e-01 -5.354650e-01 -4.138077e-01 -3.189334e-01
## [56] -2.451382e-01 -1.878910e-01 -1.436015e-01 -1.094317e-01 -8.314386e-02
## [61] -6.297839e-02 -4.755514e-02 -3.579456e-02 -2.685458e-02 -2.008022e-02
## [66] -1.496351e-02 -1.111163e-02 -8.221753e-03 -6.061162e-03 -4.451565e-03
## [71] -3.256821e-03 -2.373324e-03 -1.722496e-03 -1.244949e-03 -8.959618e-04
## [76] -6.419815e-04 -4.579301e-04 -3.251366e-04 -2.297559e-04 -1.615636e-04
## [81] -1.130410e-04 -7.868283e-05 -5.447660e-05 -3.751084e-05 -2.568314e-05
## [86] -1.748270e-05 -1.182929e-05 -7.954560e-06 -5.314882e-06 -3.527768e-06
## [91] -2.325626e-06 -1.522342e-06 -9.892631e-07 -6.380091e-07 -4.082632e-07
## [96] -2.591363e-07 -1.631024e-07 -1.017650e-07 -6.292112e-08 -3.853897e-08
## [101] -2.337468e-08 -1.403328e-08 -8.335966e-09 -4.897106e-09 -2.843810e-09
## [106] -1.631609e-09 -9.243727e-10 -5.168196e-10 -2.849825e-10 -1.548786e-10
## [111] -8.289782e-11 -4.366505e-11 -2.261504e-11 -1.150634e-11 -5.745456e-12
## [116] -2.812504e-12 -1.348143e-12 -6.319747e-13 -2.893207e-13 -1.291551e-13
## [121] -5.612626e-14 -2.369939e-14 -9.703604e-15 -3.843816e-15 -1.469333e-15
## [126] -5.404653e-16 -1.906831e-16 -6.429522e-17 -2.063370e-17 -6.272811e-18
## [131] -1.796722e-18 -4.818467e-19 -1.201066e-19 -2.758762e-20 -5.779738e-21
## [136] -1.090977e-21 -1.827931e-22 -2.669107e-23 -3.319416e-24 -3.414666e-25
## [141] -2.797086e-26 -1.734159e-27 -7.588349e-29 -2.120555e-30 -3.255989e-32
## [146] -2.157389e-34 -4.030109e-37 -8.956038e-41 -2.546227e-46 -3.005148e-58
## [151] 3.005148e-58 2.546227e-46 8.956038e-41 4.030109e-37 2.157389e-34
## [156] 3.255989e-32 2.120555e-30 7.588349e-29 1.734159e-27 2.797086e-26
## [161] 3.414666e-25 3.319416e-24 2.669107e-23 1.827931e-22 1.090977e-21
## [166] 5.779738e-21 2.758762e-20 1.201066e-19 4.818467e-19 1.796722e-18
## [171] 6.272811e-18 2.063370e-17 6.429522e-17 1.906831e-16 5.404653e-16
## [176] 1.469333e-15 3.843816e-15 9.703604e-15 2.369939e-14 5.612626e-14
## [181] 1.291551e-13 2.893207e-13 6.319747e-13 1.348143e-12 2.812504e-12
## [186] 5.745456e-12 1.150634e-11 2.261504e-11 4.366505e-11 8.289782e-11
## [191] 1.548786e-10 2.849825e-10 5.168196e-10 9.243727e-10 1.631609e-09
## [196] 2.843810e-09 4.897106e-09 8.335966e-09 1.403328e-08 2.337468e-08
## [201] 3.853897e-08 6.292112e-08 1.017650e-07 1.631024e-07 2.591363e-07
## [206] 4.082632e-07 6.380091e-07 9.892631e-07 1.522342e-06 2.325626e-06
## [211] 3.527768e-06 5.314882e-06 7.954560e-06 1.182929e-05 1.748270e-05
## [216] 2.568314e-05 3.751084e-05 5.447660e-05 7.868283e-05 1.130410e-04
## [221] 1.615636e-04 2.297559e-04 3.251366e-04 4.579301e-04 6.419815e-04
## [226] 8.959618e-04 1.244949e-03 1.722496e-03 2.373324e-03 3.256821e-03
## [231] 4.451565e-03 6.061162e-03 8.221753e-03 1.111163e-02 1.496351e-02
## [236] 2.008022e-02 2.685458e-02 3.579456e-02 4.755514e-02 6.297839e-02
## [241] 8.314386e-02 1.094317e-01 1.436015e-01 1.878910e-01 2.451382e-01
## [246] 3.189334e-01 4.138077e-01 5.354650e-01 6.910691e-01 8.895958e-01
## [251] 1.142266e+00 1.463076e+00 1.869443e+00 2.383002e+00 3.030561e+00
## [256] 3.845273e+00 4.868060e+00 6.149323e+00 7.751025e+00 9.749193e+00
## [261] 1.223693e+01 1.532807e+01 1.916147e+01 2.390629e+01 2.976822e+01
## [266] 3.699690e+01 4.589484e+01 5.682800e+01 7.023838e+01 8.665893e+01
## [271] 1.067313e+02 1.312268e+02 1.610712e+02 1.973739e+02 2.414624e+02
## [276] 2.949231e+02 3.596490e+02 4.378953e+02 5.323459e+02 6.461895e+02
## [281] 7.832105e+02 9.478934e+02 1.145546e+03 1.382443e+03 1.665992e+03
## [286] 2.004925e+03 2.409528e+03 2.891899e+03 3.466251e+03 4.149262e+03
## [291] 4.960481e+03 5.922795e+03 7.062969e+03 8.412270e+03 1.000718e+04
## [296] 1.189024e+04 1.411095e+04 1.672693e+04 1.980510e+04 2.342316e+04
indices_validos <- which(y_rango > 0)
x_validos <- x_seq_real[indices_validos]
cat("Restricción: Y > 0 se cumple en el rango:\n")
## Restricción: Y > 0 se cumple en el rango:
cat(round(min(x_validos), 2), "< x <", round(max(x_validos), 2), "\n")
## 2000 < x < 2024
#Estimaciones
modelo_poli <- lm(y ~ x_esc_vector + x2 + x3 + x4 + x5)
# Calcular media y desviación estándar de los años originales
media_x <- mean(x_real)
sd_x <- sd(x_real)
# Año a estimar
año_nuevo <- 2021
x_esc_nuevo <- (año_nuevo - media_x) / sd_x
# Crear todos los términos necesarios con nombres correctos
nuevo_dato <- data.frame(
x_esc_vector = x_esc_nuevo,
x2 = x_esc_nuevo^2,
x3 = x_esc_nuevo^3,
x4 = x_esc_nuevo^4,
x5 = x_esc_nuevo^5
)
# Predecir
prediccion_2021 <- predict(modelo_poli, newdata = nuevo_dato)
# Mostrar resultado
cat("Predicción para el año 2021:", round(prediccion_2021, 4), "\n")
## Predicción para el año 2021: 4.846
#
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
labels =
"¿Que cantidad de nivel de
contaminante (ppm) se espera para
el año 2021?
R= 4.846 (ppm)",
cex = 2,
col = "blue",
font = 6)
