#UNIVERSIDAD CENTRAL DE ECUADOR
#Facultad de Ingeniería en Geología,Minas, Petroleos y Ambiental
#INGENIERIA AMBIENTAL
#AUTHOR: SOFIA HEREDIA
#FECHA: 14-05-2025
# Carga del conjunto de datos
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp6hYTL6\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp6hYTL6\downloaded_packages
library(readxl)
library(readr)
datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
## Country Region Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## # ℹ abbreviated name: ¹`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## # `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## # `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## # `Water Treatment Method` <chr>,
## # `Access to Clean Water (% of Population)` <dbl>,
## # `Diarrheal Cases per 100,000 people` <dbl>, …
# ED VARIABLE CUANTITATIVA CONTINUA
str(datos)
## spc_tbl_ [3,000 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Country : chr [1:3000] "Mexico" "Brazil" "Indonesia" "Nigeria" ...
## $ Region : chr [1:3000] "North" "West" "Central" "East" ...
## $ Year : num [1:3000] 2015 2017 2022 2016 2005 ...
## $ Water Source Type : chr [1:3000] "Lake" "Well" "Pond" "Well" ...
## $ Contaminant Level (ppm) : num [1:3000] 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
## $ pH Level : num [1:3000] 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
## $ Turbidity (NTU) : num [1:3000] 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
## $ Dissolved Oxygen (mg/L) : num [1:3000] 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
## $ Nitrate Level (mg/L) : num [1:3000] 8.28 15.74 36.67 36.92 49.35 ...
## $ Lead Concentration (µg/L) : num [1:3000] 7.89 14.68 9.96 6.77 12.51 ...
## $ Bacteria Count (CFU/mL) : num [1:3000] 3344 2122 2330 3779 4182 ...
## $ Water Treatment Method : chr [1:3000] "Filtration" "Boiling" "None" "Boiling" ...
## $ Access to Clean Water (% of Population) : num [1:3000] 33.6 89.5 35.3 57.5 36.6 ...
## $ Diarrheal Cases per 100,000 people : num [1:3000] 472 122 274 3 466 258 208 397 265 261 ...
## $ Cholera Cases per 100,000 people : num [1:3000] 33 27 39 33 31 22 23 0 23 2 ...
## $ Typhoid Cases per 100,000 people : num [1:3000] 44 8 50 13 68 55 90 10 29 38 ...
## $ Infant Mortality Rate (per 1,000 live births): num [1:3000] 76.2 77.3 48.5 95.7 58.8 ...
## $ GDP per Capita (USD) : num [1:3000] 57057 17220 86022 31166 25661 ...
## $ Healthcare Access Index (0-100) : num [1:3000] 96.9 84.7 58.4 39.1 23 ...
## $ Urbanization Rate (%) : num [1:3000] 84.6 73.4 72.9 71.1 55.5 ...
## $ Sanitation Coverage (% of Population) : num [1:3000] 63.2 29.1 93.6 94.2 69.2 ...
## $ Rainfall (mm per year) : num [1:3000] 2800 1572 2074 937 2295 ...
## $ Temperature (°C) : num [1:3000] 4.94 16.93 21.73 3.79 31.44 ...
## $ Population Density (people per km²) : num [1:3000] 593 234 57 555 414 775 584 111 538 250 ...
## - attr(*, "spec")=
## .. cols(
## .. Country = col_character(),
## .. Region = col_character(),
## .. Year = col_double(),
## .. `Water Source Type` = col_character(),
## .. `Contaminant Level (ppm)` = col_double(),
## .. `pH Level` = col_double(),
## .. `Turbidity (NTU)` = col_double(),
## .. `Dissolved Oxygen (mg/L)` = col_double(),
## .. `Nitrate Level (mg/L)` = col_double(),
## .. `Lead Concentration (µg/L)` = col_double(),
## .. `Bacteria Count (CFU/mL)` = col_double(),
## .. `Water Treatment Method` = col_character(),
## .. `Access to Clean Water (% of Population)` = col_double(),
## .. `Diarrheal Cases per 100,000 people` = col_double(),
## .. `Cholera Cases per 100,000 people` = col_double(),
## .. `Typhoid Cases per 100,000 people` = col_double(),
## .. `Infant Mortality Rate (per 1,000 live births)` = col_double(),
## .. `GDP per Capita (USD)` = col_double(),
## .. `Healthcare Access Index (0-100)` = col_double(),
## .. `Urbanization Rate (%)` = col_double(),
## .. `Sanitation Coverage (% of Population)` = col_double(),
## .. `Rainfall (mm per year)` = col_double(),
## .. `Temperature (°C)` = col_double(),
## .. `Population Density (people per km²)` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
mortalidad_infantil <- datos$`Infant Mortality Rate (per 1,000 live births)`
mortalidad_infantil <- na.omit(mortalidad_infantil)
# PROCEDIMIENTO MANUAL
# PROCEDIMIENTO MANUAL
min <-min(mortalidad_infantil)
max <-max(mortalidad_infantil)
R <-max-min
K <- floor(1+3.33*log10(length(mortalidad_infantil)))
A <-R/K
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(mortalidad_infantil, mortalidad_infantil >= Li[i] & mortalidad_infantil < Ls[i]))
} else {
ni[i] <- length(subset(mortalidad_infantil, mortalidad_infantil >= Li[i] & mortalidad_infantil <= Ls[i]))
}
}
sum(ni)
## [1] 3000
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_mortalidad_inf <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_mortalidad_inf) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales <-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_mortalidad_inf_total <-rbind(TDF_mortalidad_inf,totales)
View(TDF_mortalidad_inf_total)
# Tabla de Distribución de frecuencia
Histograma_mortalidad_inf <- hist(mortalidad_infantil, main="Gráfica N:1 Distribución de Mortalidad Infantil",
xlab = "Mortalidad Infantil",
ylab = "cantidad",col = "orange")

limites <- Histograma_mortalidad_inf$breaks
liminf <- limites[1:10]
liminsup <- limites[2:11]
MC <- Histograma_mortalidad_inf$mids
ni <- Histograma_mortalidad_inf$counts
hi <- ni/sum(ni)*100
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nides <- rev(cumsum(rev(ni)))
Hides <- rev(cumsum(rev(hi)))
TDF_mortalidad <- data.frame(liminf,liminsup,MC,ni,round(hi,2),
Niasc,Nides,round(Hiasc,2),
round(Hides,2))
# crear de fila de totales
totales <- c(
liminf= "-",
liminsup= "-",
MC= "-",
ni= sum(ni),
hi= sum(hi),
Niasc= "-",
Nides= "-",
Hiasc= "-",
Hides= "-")
TDF_mortalidad_total <- rbind(TDF_mortalidad,totales)
colnames(TDF_mortalidad_total) <- c("Limininf","Liminsup","MC","ni","hi(%)",
"Ni asc","Hi asc(%)","Ni desc","Hi desc(%)")
View(TDF_mortalidad_total)
# Estetíca de la tabla
install.packages("kableExtra")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp6hYTL6\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp6hYTL6\downloaded_packages
library(kableExtra)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
##
## The following object is masked from 'package:kableExtra':
##
## group_rows
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
kable(TDF_mortalidad_total, align = "c",
caption = "Tabla de Distribución de Frecuencias de Mortalidad Infantil de los países del estudio") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla de Distribución de Frecuencias de Mortalidad Infantil de los
países del estudio
|
Limininf
|
Liminsup
|
MC
|
ni
|
hi(%)
|
Ni asc
|
Hi asc(%)
|
Ni desc
|
Hi desc(%)
|
|
0
|
10
|
5
|
256
|
8.53
|
256
|
3000
|
8.53
|
100
|
|
10
|
20
|
15
|
297
|
9.9
|
553
|
2744
|
18.43
|
91.47
|
|
20
|
30
|
25
|
320
|
10.67
|
873
|
2447
|
29.1
|
81.57
|
|
30
|
40
|
35
|
314
|
10.47
|
1187
|
2127
|
39.57
|
70.9
|
|
40
|
50
|
45
|
306
|
10.2
|
1493
|
1813
|
49.77
|
60.43
|
|
50
|
60
|
55
|
296
|
9.87
|
1789
|
1507
|
59.63
|
50.23
|
|
60
|
70
|
65
|
301
|
10.03
|
2090
|
1211
|
69.67
|
40.37
|
|
70
|
80
|
75
|
288
|
9.6
|
2378
|
910
|
79.27
|
30.33
|
|
80
|
90
|
85
|
315
|
10.5
|
2693
|
622
|
89.77
|
20.73
|
|
90
|
100
|
95
|
307
|
10.23
|
3000
|
307
|
100
|
10.23
|
|
|
|
|
3000
|
100
|
|
|
|
|
View(TDF_mortalidad_total)
# GRAFICAS
# Histograma
hist(mortalidad_infantil, breaks = 10,
main = "Gráfica N°1: Distribución de Mortalidad Infantil",
xlab = "Mortalidad Infantil",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "yellow",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_mortalidad_inf$breaks,
labels = Histograma_mortalidad_inf$breaks, las = 1,
cex.axis = 0.9)

# Global
hist(mortalidad_infantil, breaks = 10,
main = "Gráfica N°2: Distribución de Mortalidad Infantil Global",
xlab = "Mortalidad Infantil",
ylab = "Cantidad",
ylim = c(0, length(mortalidad_infantil)),
col = "yellow",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_mortalidad_inf$breaks,
labels = Histograma_mortalidad_inf$breaks, las = 1,
cex.axis = 0.9)

barplot(TDF_mortalidad_inf$hi,
space=0,
col = "orange",
main ="Gráfica N°3: Distribución Porcentual de Mortalidad Infantil ",
xlab="Mortalidad Infantil (%)",
ylab="Porcentaje (%)",
names.arg= TDF_mortalidad_inf$Mc,
ylim = c(0,100))

# Local
hist(mortalidad_infantil, breaks = 10,
main = "Gráfica N°4: Distribución de Mortalidad Infantil",
xlab = "Mortalidad Infantil (%)",
ylab = "Cantidad",
ylim = c(0,max(ni)),
col = "yellow",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_mortalidad_inf$breaks,
labels = Histograma_mortalidad_inf$breaks, las = 1,
cex.axis = 0.9)

barplot(TDF_mortalidad_inf$hi,space=0,
col = "lightblue",
main ="Gráfica N°5: Distribución Porcentual de Mortalidad Infantil",
xlab="Mortalidad Infantil (%)",
ylab="Porcentaje (%)",
ylim = c(0,10),
names.arg = TDF_mortalidad_inf$Mc)

# Diagrama de Ojiva Ascendente y Descendente
plot(liminf ,Nides,
main = "Gráfica N°6:Distribución de frecuencias Ascendentes y Descendentes de Mortalidad Infantil",
xlab = "Mortalidad Infantil (%)",
ylab = "Cantidad",
xlim = c(10,90),
col = "skyblue",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(liminsup,Niasc,
col = "pink",
type = "o",
lwd = 3)
axis(1, at = seq(0, 100, by = 10))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(liminf, Hides * 100,
main = "Gráfica N°7: Distribución de Frecuencias Ascendentes y Descendentes de Mortalidad Infantil",
xlab = " Mortalidad Infantil (%)",
ylab = "Porcentaje (%)",
xlim = c(10,90),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(liminsup, Hiasc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,100,by=10))

# Diagrama de caja
boxplot(mortalidad_infantil,
main = "Gráfica N°: Distribución de frecuencias de Mortalidad Infantil",
ylab = "Índice de Acceso a la Salud (0-100)",
col = "green",
horizontal = TRUE)

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(mortalidad_infantil), 2)
media
## [1] 50.81
# Moda
max_frecuencia <- max(TDF_mortalidad_total$ni)
moda <- TDF_mortalidad_total$MC[TDF_mortalidad_total$ni == max_frecuencia]
moda
## [1] "25"
# Mediana
mediana <- median(mortalidad_infantil)
mediana
## [1] 50.23
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(mortalidad_infantil)
varianza
## [1] 810.2746
# Desviación Estándar
sd <- sd(mortalidad_infantil)
sd
## [1] 28.46532
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 56.02
# INDICADORES DE FORMA #
# Asimetría
install.packages("e1071")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp6hYTL6\downloaded_packages
library(e1071)
asimetria <- skewness(mortalidad_infantil, type = 2)
asimetria
## [1] 0.02118438
#Curtosis
curtosis <- kurtosis(mortalidad_infantil)
curtosis
## [1] -1.208777
tabla_indicadores <- data.frame("Variable" =c("Tasa de Mortalidad Infantil (%)"),
"Rango" = c("[10.03 ;89.98]"),
"X" = c(media),
"Me" = c(round(mediana,2)),
"Mo" = c("No hay moda"),
"V" = c(round(varianza,2)),
"Sd" = c(round(sd,2)),
"Cv" = c(cv),
"As" = c(round(asimetria,4)),
"K" = c(round(curtosis,2)),
"Valores Atipicos" = "No hay presencia de valores atipicos")
library(knitr)
kable(tabla_indicadores, align = 'c', caption = "Conclusiones de la variable
densidad de poblacion en personas por km² ")
Conclusiones de la variable densidad de poblacion en personas
por km²
| Tasa de Mortalidad Infantil (%) |
[10.03 ;89.98] |
50.81 |
50.23 |
No hay moda |
810.27 |
28.47 |
56.02 |
0.0212 |
-1.21 |
No hay presencia de valores atipicos |