#UNIVERSIDAD CENTRAL DE ECUADOR
#Facultad de Ingeniería en Geología,Minas, Petroleos y Ambiental
#INGENIERIA AMBIENTAL
#AUTHOR: SOFIA HEREDIA
#FECHA: 14-05-2025
# Carga del conjunto de datos
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpcBXb9o\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpcBXb9o\downloaded_packages
library(readxl)
library(readr)
datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
## Country Region Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## # ℹ abbreviated name: ¹`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## # `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## # `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## # `Water Treatment Method` <chr>,
## # `Access to Clean Water (% of Population)` <dbl>,
## # `Diarrheal Cases per 100,000 people` <dbl>, …
###---------------------------------------------ESTADÍSTICA_DESCRIPTIVA---------------------------------------------------------------------------------###
# ED VARIABLE CUANTITATIVA CONTINUA
str(datos)
## spc_tbl_ [3,000 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Country : chr [1:3000] "Mexico" "Brazil" "Indonesia" "Nigeria" ...
## $ Region : chr [1:3000] "North" "West" "Central" "East" ...
## $ Year : num [1:3000] 2015 2017 2022 2016 2005 ...
## $ Water Source Type : chr [1:3000] "Lake" "Well" "Pond" "Well" ...
## $ Contaminant Level (ppm) : num [1:3000] 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
## $ pH Level : num [1:3000] 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
## $ Turbidity (NTU) : num [1:3000] 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
## $ Dissolved Oxygen (mg/L) : num [1:3000] 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
## $ Nitrate Level (mg/L) : num [1:3000] 8.28 15.74 36.67 36.92 49.35 ...
## $ Lead Concentration (µg/L) : num [1:3000] 7.89 14.68 9.96 6.77 12.51 ...
## $ Bacteria Count (CFU/mL) : num [1:3000] 3344 2122 2330 3779 4182 ...
## $ Water Treatment Method : chr [1:3000] "Filtration" "Boiling" "None" "Boiling" ...
## $ Access to Clean Water (% of Population) : num [1:3000] 33.6 89.5 35.3 57.5 36.6 ...
## $ Diarrheal Cases per 100,000 people : num [1:3000] 472 122 274 3 466 258 208 397 265 261 ...
## $ Cholera Cases per 100,000 people : num [1:3000] 33 27 39 33 31 22 23 0 23 2 ...
## $ Typhoid Cases per 100,000 people : num [1:3000] 44 8 50 13 68 55 90 10 29 38 ...
## $ Infant Mortality Rate (per 1,000 live births): num [1:3000] 76.2 77.3 48.5 95.7 58.8 ...
## $ GDP per Capita (USD) : num [1:3000] 57057 17220 86022 31166 25661 ...
## $ Healthcare Access Index (0-100) : num [1:3000] 96.9 84.7 58.4 39.1 23 ...
## $ Urbanization Rate (%) : num [1:3000] 84.6 73.4 72.9 71.1 55.5 ...
## $ Sanitation Coverage (% of Population) : num [1:3000] 63.2 29.1 93.6 94.2 69.2 ...
## $ Rainfall (mm per year) : num [1:3000] 2800 1572 2074 937 2295 ...
## $ Temperature (°C) : num [1:3000] 4.94 16.93 21.73 3.79 31.44 ...
## $ Population Density (people per km²) : num [1:3000] 593 234 57 555 414 775 584 111 538 250 ...
## - attr(*, "spec")=
## .. cols(
## .. Country = col_character(),
## .. Region = col_character(),
## .. Year = col_double(),
## .. `Water Source Type` = col_character(),
## .. `Contaminant Level (ppm)` = col_double(),
## .. `pH Level` = col_double(),
## .. `Turbidity (NTU)` = col_double(),
## .. `Dissolved Oxygen (mg/L)` = col_double(),
## .. `Nitrate Level (mg/L)` = col_double(),
## .. `Lead Concentration (µg/L)` = col_double(),
## .. `Bacteria Count (CFU/mL)` = col_double(),
## .. `Water Treatment Method` = col_character(),
## .. `Access to Clean Water (% of Population)` = col_double(),
## .. `Diarrheal Cases per 100,000 people` = col_double(),
## .. `Cholera Cases per 100,000 people` = col_double(),
## .. `Typhoid Cases per 100,000 people` = col_double(),
## .. `Infant Mortality Rate (per 1,000 live births)` = col_double(),
## .. `GDP per Capita (USD)` = col_double(),
## .. `Healthcare Access Index (0-100)` = col_double(),
## .. `Urbanization Rate (%)` = col_double(),
## .. `Sanitation Coverage (% of Population)` = col_double(),
## .. `Rainfall (mm per year)` = col_double(),
## .. `Temperature (°C)` = col_double(),
## .. `Population Density (people per km²)` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
acceso_salud <- datos$`Healthcare Access Index (0-100)`
acceso_salud <- na.omit(acceso_salud)
# PROCEDIMIENTO MANUAL
min <-min(acceso_salud)
max <-max(acceso_salud)
R <-max-min
K <- floor(1+3.33*log10(length(acceso_salud)))
A <-R/K
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(acceso_salud, acceso_salud >= Li[i] & acceso_salud < Ls[i]))
} else {
ni[i] <- length(subset(acceso_salud, acceso_salud >= Li[i] & acceso_salud <= Ls[i]))
}
}
sum(ni)
## [1] 3000
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_acceso <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_acceso) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales <-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_acceso_total <-rbind(TDF_acceso,totales)
View(TDF_acceso_total)
# Tabla de Distribución de frecuencia
HistogramaAcceso_salud <- hist(acceso_salud, main="Gráfica N: Distribución de Acceso a la Salud",
xlab = "acceso a la salud",
ylab = "cantidad",col = "purple")

limites <- HistogramaAcceso_salud$breaks
liminf <- limites[1:10]
liminsup <- limites[2:11]
MC <- HistogramaAcceso_salud$mids
ni <- HistogramaAcceso_salud$counts
hi <- ni/sum(ni)*100
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nides <- rev(cumsum(rev(ni)))
Hides <- rev(cumsum(rev(hi)))
TDF_fuente <- data.frame(liminf,liminsup,MC,ni,round(hi,2),
Niasc,Nides,round(Hiasc,2),
round(Hides,2))
# crear de fila de totales
totales <- c(
liminf= "-",
liminsup= "-",
MC= "-",
ni= sum(ni),
hi= sum(hi),
Niasc= "-",
Nides= "-",
Hiasc= "-",
Hides= "-")
TDF_fuente <- rbind(TDF_fuente,totales)
colnames(TDF_fuente) <- c("Limininf","Liminsup","MC","ni","hi(%)",
"Ni asc","Hi asc(%)","Ni desc","Hi desc(%)")
View(TDF_fuente)
# Estetíca de la tabla
install.packages("kableExtra")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpcBXb9o\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpcBXb9o\downloaded_packages
library(kableExtra)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
##
## The following object is masked from 'package:kableExtra':
##
## group_rows
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
kable(TDF_acceso_total, align = "c",
caption = "Tabla de Distribución de Frecuencias de Acceso a la Salud de los países del estudio") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla de Distribución de Frecuencias de Acceso a la Salud de los países
del estudio
|
Li
|
Ls
|
Mc
|
ni
|
hi
|
Ni_asc(%)
|
Ni_desc(%)
|
Hi_asc
|
Hi_desc
|
|
0.19
|
8.51
|
4.35
|
261
|
8.7
|
261
|
3000
|
8.7
|
100
|
|
8.51
|
16.82
|
12.665
|
251
|
8.37
|
512
|
2739
|
17.07
|
91.3
|
|
16.82
|
25.14
|
20.98
|
247
|
8.23
|
759
|
2488
|
25.3
|
82.93
|
|
25.14
|
33.45
|
29.295
|
236
|
7.87
|
995
|
2241
|
33.17
|
74.7
|
|
33.45
|
41.77
|
37.61
|
249
|
8.3
|
1244
|
2005
|
41.47
|
66.83
|
|
41.77
|
50.08
|
45.925
|
250
|
8.33
|
1494
|
1756
|
49.8
|
58.53
|
|
50.08
|
58.4
|
54.24
|
264
|
8.8
|
1758
|
1506
|
58.6
|
50.2
|
|
58.4
|
66.72
|
62.56
|
239
|
7.97
|
1997
|
1242
|
66.57
|
41.4
|
|
66.72
|
75.03
|
70.875
|
259
|
8.63
|
2256
|
1003
|
75.2
|
33.43
|
|
75.03
|
83.35
|
79.19
|
251
|
8.37
|
2507
|
744
|
83.57
|
24.8
|
|
83.35
|
91.66
|
87.505
|
239
|
7.97
|
2746
|
493
|
91.53
|
16.43
|
|
91.66
|
99.98
|
95.82
|
254
|
8.47
|
3000
|
254
|
100
|
8.47
|
|
|
|
|
3000
|
100
|
|
|
|
|
View(TDF_fuente)
# GRAFICAS
# Histograma
hist(acceso_salud, breaks = 10,
main = "Gráfica N°1: Distribución de Acceso a la Salud",
xlab = "Acceso a la Salud",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = HistogramaAcceso_salud$breaks,
labels = HistogramaAcceso_salud$breaks, las = 1,
cex.axis = 0.9)

# Global
hist(acceso_salud, breaks = 10,
main = "Gráfica N°2: Distribución de Acceso a la Salud Global",
xlab = "Acceso a la Salud",
ylab = "Cantidad",
ylim = c(0, length(acceso_salud)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = HistogramaAcceso_salud$breaks,
labels = HistogramaAcceso_salud$breaks, las = 1,
cex.axis = 0.9)

barplot(TDF_acceso$hi,
space=0,
col = "skyblue",
main ="Gráfica N°3: Distribución Porcentual del Acceso a la Salud ",
xlab="Acceso a la Salud (%)",
ylab="Porcentaje (%)",
names.arg= TDF_acceso$Mc,
ylim = c(0,100))

# Local
hist(acceso_salud, breaks = 10,
main = "Gráfica N°4: Distribución de Acceso a la Salud",
xlab = "Acceso a la Salud(%)",
ylab = "Cantidad",
ylim = c(0,max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = HistogramaAcceso_salud$breaks,
labels = HistogramaAcceso_salud$breaks, las = 1,
cex.axis = 0.9)

barplot(TDF_acceso$hi,space=0,
col = "lightblue",
main ="Gráfica N°5: Distribución Porcentual acceso a la salud",
xlab="Acceso a la Salud(%)",
ylab="Porcentaje (%)",
ylim = c(0,14),
names.arg = TDF_acceso$Mc)

# Diagrama de Ojiva Ascendente y Descendente
plot(liminf ,Nides,
main = "Gráfica N°6:Distribución de frecuencias Ascendentes y Descendentes del Acceso a la Salud",
xlab = "Acceso a la Salud(%)",
ylab = "Cantidad",
xlim = c(10,90),
col = "skyblue",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(liminsup,Niasc,
col = "pink",
type = "o",
lwd = 3)
axis(1, at = seq(0, 100, by = 10))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(liminf, Hides * 100,
main = "Gráfica N°7: Distribución de Frecuencias Ascendentes y Descendentes de Acceso a la Salud",
xlab = " Tasa de urbanización(%)",
ylab = "Porcentaje (%)",
xlim = c(10,90),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(liminsup, Hiasc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,100,by=10))

# Diagrama de caja
boxplot(acceso_salud,
main = "Gráfica N°: Distribución de frecuencias de acceso a la Salud",
ylab = "Índice de Acceso a la Salud (0-100)",
col = "lightblue",
horizontal = TRUE)

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(acceso_salud), 2)
media
## [1] 50.03
# Moda
max_frecuencia <- max(TDF_acceso_total$ni)
moda <- TDF_acceso_total$MC[TDF_acceso_total$ni == max_frecuencia]
moda
## NULL
# Mediana
mediana <- median(acceso_salud)
mediana
## [1] 50.39
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(acceso_salud)
varianza
## [1] 835.0179
# Desviación Estándar
sd <- sd(acceso_salud)
sd
## [1] 28.89668
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 57.76
# INDICADORES DE FORMA #
# Asimetría
install.packages("e1071")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'e1071'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\e1071\libs\x64\e1071.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\e1071\libs\x64\e1071.dll:
## Permission denied
## Warning: restored 'e1071'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpcBXb9o\downloaded_packages
library(e1071)
asimetria <- skewness(acceso_salud, type = 2)
asimetria
## [1] -0.01223584
#Curtosis
curtosis <- kurtosis(acceso_salud)
curtosis
## [1] -1.19759
tabla_indicadores <- data.frame("Variable" =c("Tasa de Acceso a la Salud (%)"),
"Rango" = c("[10.03 ;89.98]"),
"X" = c(media),
"Me" = c(round(mediana,2)),
"Mo" = c("No hay moda"),
"V" = c(round(varianza,2)),
"Sd" = c(round(sd,2)),
"Cv" = c(cv),
"As" = c(round(asimetria,4)),
"K" = c(round(curtosis,2)),
"Valores Atipicos" = "No hay presencia de valores atipicos")
library(knitr)
kable(tabla_indicadores, align = 'c', caption = "Conclusiones de la variable
densidad de poblacion en personas por km² ")
Conclusiones de la variable densidad de poblacion en personas
por km²
| Tasa de Acceso a la Salud (%) |
[10.03 ;89.98] |
50.03 |
50.39 |
No hay moda |
835.02 |
28.9 |
57.76 |
-0.0122 |
-1.2 |
No hay presencia de valores atipicos |