#UNIVERSIDAD CENTRAL DE ECUADOR
#Facultad de Ingeniería en Geología,Minas, Petroleos y Ambiental
#INGENIERIA AMBIENTAL
#AUTHOR: SOFIA HEREDIA
#FECHA: 14-05-2025
#carga de datos
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp8W3vhA\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp8W3vhA\downloaded_packages
library(readxl)
library(readr)
datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
## Country Region Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## # ℹ abbreviated name: ¹`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## # `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## # `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## # `Water Treatment Method` <chr>,
## # `Access to Clean Water (% of Population)` <dbl>,
## # `Diarrheal Cases per 100,000 people` <dbl>, …
# ED VARIABLE CUANTITATIVA CONTINUA
str(datos)
## spc_tbl_ [3,000 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Country : chr [1:3000] "Mexico" "Brazil" "Indonesia" "Nigeria" ...
## $ Region : chr [1:3000] "North" "West" "Central" "East" ...
## $ Year : num [1:3000] 2015 2017 2022 2016 2005 ...
## $ Water Source Type : chr [1:3000] "Lake" "Well" "Pond" "Well" ...
## $ Contaminant Level (ppm) : num [1:3000] 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
## $ pH Level : num [1:3000] 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
## $ Turbidity (NTU) : num [1:3000] 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
## $ Dissolved Oxygen (mg/L) : num [1:3000] 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
## $ Nitrate Level (mg/L) : num [1:3000] 8.28 15.74 36.67 36.92 49.35 ...
## $ Lead Concentration (µg/L) : num [1:3000] 7.89 14.68 9.96 6.77 12.51 ...
## $ Bacteria Count (CFU/mL) : num [1:3000] 3344 2122 2330 3779 4182 ...
## $ Water Treatment Method : chr [1:3000] "Filtration" "Boiling" "None" "Boiling" ...
## $ Access to Clean Water (% of Population) : num [1:3000] 33.6 89.5 35.3 57.5 36.6 ...
## $ Diarrheal Cases per 100,000 people : num [1:3000] 472 122 274 3 466 258 208 397 265 261 ...
## $ Cholera Cases per 100,000 people : num [1:3000] 33 27 39 33 31 22 23 0 23 2 ...
## $ Typhoid Cases per 100,000 people : num [1:3000] 44 8 50 13 68 55 90 10 29 38 ...
## $ Infant Mortality Rate (per 1,000 live births): num [1:3000] 76.2 77.3 48.5 95.7 58.8 ...
## $ GDP per Capita (USD) : num [1:3000] 57057 17220 86022 31166 25661 ...
## $ Healthcare Access Index (0-100) : num [1:3000] 96.9 84.7 58.4 39.1 23 ...
## $ Urbanization Rate (%) : num [1:3000] 84.6 73.4 72.9 71.1 55.5 ...
## $ Sanitation Coverage (% of Population) : num [1:3000] 63.2 29.1 93.6 94.2 69.2 ...
## $ Rainfall (mm per year) : num [1:3000] 2800 1572 2074 937 2295 ...
## $ Temperature (°C) : num [1:3000] 4.94 16.93 21.73 3.79 31.44 ...
## $ Population Density (people per km²) : num [1:3000] 593 234 57 555 414 775 584 111 538 250 ...
## - attr(*, "spec")=
## .. cols(
## .. Country = col_character(),
## .. Region = col_character(),
## .. Year = col_double(),
## .. `Water Source Type` = col_character(),
## .. `Contaminant Level (ppm)` = col_double(),
## .. `pH Level` = col_double(),
## .. `Turbidity (NTU)` = col_double(),
## .. `Dissolved Oxygen (mg/L)` = col_double(),
## .. `Nitrate Level (mg/L)` = col_double(),
## .. `Lead Concentration (µg/L)` = col_double(),
## .. `Bacteria Count (CFU/mL)` = col_double(),
## .. `Water Treatment Method` = col_character(),
## .. `Access to Clean Water (% of Population)` = col_double(),
## .. `Diarrheal Cases per 100,000 people` = col_double(),
## .. `Cholera Cases per 100,000 people` = col_double(),
## .. `Typhoid Cases per 100,000 people` = col_double(),
## .. `Infant Mortality Rate (per 1,000 live births)` = col_double(),
## .. `GDP per Capita (USD)` = col_double(),
## .. `Healthcare Access Index (0-100)` = col_double(),
## .. `Urbanization Rate (%)` = col_double(),
## .. `Sanitation Coverage (% of Population)` = col_double(),
## .. `Rainfall (mm per year)` = col_double(),
## .. `Temperature (°C)` = col_double(),
## .. `Population Density (people per km²)` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
acceso_agua <- datos$`Access to Clean Water (% of Population)`
acceso_agua <- na.omit(acceso_agua)
# PROCEDIMIENTO MANUAL
min <-min(acceso_agua)
max <-max(acceso_agua)
R <-max-min
K <- floor(1+3.33*log10(length(acceso_agua)))
A <-R/K
Li <-seq(from=min,to=max-A,by=A)
Ls <-seq(from=min+A,to=max,by=A)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(acceso_agua, acceso_agua >= Li[i] & acceso_agua < Ls[i]))
} else {
ni[i] <- length(subset(acceso_agua, acceso_agua >= Li[i] & acceso_agua <= Ls[i]))
}
}
sum(ni)
## [1] 3000
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_acceso_agua <- data.frame(
round(Li,2),round(Ls,2),round(Mc,2), ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_acceso_agua) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales <-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_acceso_agua_total <-rbind(TDF_acceso_agua,totales)
View(TDF_acceso_agua_total)
# Tabla de Distribución de frecuencia
Histograma_acceso_agua <- hist(acceso_agua, main="Gráfica N 1: Distribución del Acceso al Agua limpia a la Población analizada",
xlab = "Acceso al Agua",
ylab = "cantidad",col = "purple")

limites <- Histograma_acceso_agua$breaks
liminf <- limites[-length(limites)]
liminsup <- limites[-1]
MC <- Histograma_acceso_agua$mids
ni <- Histograma_acceso_agua$counts
hi <- ni/sum(ni)*100
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nides <- rev(cumsum(rev(ni)))
Hides <- rev(cumsum(rev(hi)))
TDF_acceso_agua <- data.frame(
"Límite Inferior" = round(liminf, 2),
"Límite Superior" = round(liminsup, 2),
"Marca de Clase" = round(MC, 2),
"Frecuencia (ni)" = ni,
"Frecuencia Relativa (%)" = round(hi,2),
"Frec. Acum. Asc (Ni)" = Niasc,
"Frec. Acum. Desc (Ni)" = Nides,
"Frec. Rel. Acum. Asc (%)" = round(Hiasc, 2),
"Frec. Rel. Acum. Desc (%)" = round(Hides, 2)
)
# Mostrar tabla
print(TDF_acceso_agua)
## Límite.Inferior Límite.Superior Marca.de.Clase Frecuencia..ni.
## 1 30 35 32.5 219
## 2 35 40 37.5 236
## 3 40 45 42.5 213
## 4 45 50 47.5 199
## 5 50 55 52.5 243
## 6 55 60 57.5 223
## 7 60 65 62.5 173
## 8 65 70 67.5 231
## 9 70 75 72.5 201
## 10 75 80 77.5 217
## 11 80 85 82.5 208
## 12 85 90 87.5 223
## 13 90 95 92.5 206
## 14 95 100 97.5 208
## Frecuencia.Relativa.... Frec..Acum..Asc..Ni. Frec..Acum..Desc..Ni.
## 1 7.30 219 3000
## 2 7.87 455 2781
## 3 7.10 668 2545
## 4 6.63 867 2332
## 5 8.10 1110 2133
## 6 7.43 1333 1890
## 7 5.77 1506 1667
## 8 7.70 1737 1494
## 9 6.70 1938 1263
## 10 7.23 2155 1062
## 11 6.93 2363 845
## 12 7.43 2586 637
## 13 6.87 2792 414
## 14 6.93 3000 208
## Frec..Rel..Acum..Asc.... Frec..Rel..Acum..Desc....
## 1 7.30 100.00
## 2 15.17 92.70
## 3 22.27 84.83
## 4 28.90 77.73
## 5 37.00 71.10
## 6 44.43 63.00
## 7 50.20 55.57
## 8 57.90 49.80
## 9 64.60 42.10
## 10 71.83 35.40
## 11 78.77 28.17
## 12 86.20 21.23
## 13 93.07 13.80
## 14 100.00 6.93
# crear de fila de totales
totales <- c(
liminf= "-",
liminsup= "-",
MC= "-",
ni= sum(ni),
hi= sum(hi),
Niasc= "-",
Nides= "-",
Hiasc= "-",
Hides= "-")
TDF_acceso_agua_total <- rbind(TDF_acceso_agua,totales)
colnames(TDF_acceso_agua_total) <- c("Limininf","Liminsup","MC","ni","hi(%)",
"Ni asc","Hi asc(%)","Ni desc","Hi desc(%)")
View(TDF_acceso_agua_total)
# Estetíca de la tabla
install.packages("kableExtra")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp8W3vhA\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp8W3vhA\downloaded_packages
library(kableExtra)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
##
## The following object is masked from 'package:kableExtra':
##
## group_rows
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
kable(TDF_acceso_agua_total, align = "c",
caption = "Tabla de Distribución de Frecuencias de Acceso de Agua limpia a la población") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla de Distribución de Frecuencias de Acceso de Agua limpia a la
población
|
Limininf
|
Liminsup
|
MC
|
ni
|
hi(%)
|
Ni asc
|
Hi asc(%)
|
Ni desc
|
Hi desc(%)
|
|
30
|
35
|
32.5
|
219
|
7.3
|
219
|
3000
|
7.3
|
100
|
|
35
|
40
|
37.5
|
236
|
7.87
|
455
|
2781
|
15.17
|
92.7
|
|
40
|
45
|
42.5
|
213
|
7.1
|
668
|
2545
|
22.27
|
84.83
|
|
45
|
50
|
47.5
|
199
|
6.63
|
867
|
2332
|
28.9
|
77.73
|
|
50
|
55
|
52.5
|
243
|
8.1
|
1110
|
2133
|
37
|
71.1
|
|
55
|
60
|
57.5
|
223
|
7.43
|
1333
|
1890
|
44.43
|
63
|
|
60
|
65
|
62.5
|
173
|
5.77
|
1506
|
1667
|
50.2
|
55.57
|
|
65
|
70
|
67.5
|
231
|
7.7
|
1737
|
1494
|
57.9
|
49.8
|
|
70
|
75
|
72.5
|
201
|
6.7
|
1938
|
1263
|
64.6
|
42.1
|
|
75
|
80
|
77.5
|
217
|
7.23
|
2155
|
1062
|
71.83
|
35.4
|
|
80
|
85
|
82.5
|
208
|
6.93
|
2363
|
845
|
78.77
|
28.17
|
|
85
|
90
|
87.5
|
223
|
7.43
|
2586
|
637
|
86.2
|
21.23
|
|
90
|
95
|
92.5
|
206
|
6.87
|
2792
|
414
|
93.07
|
13.8
|
|
95
|
100
|
97.5
|
208
|
6.93
|
3000
|
208
|
100
|
6.93
|
|
|
|
|
3000
|
100
|
|
|
|
|
View(TDF_acceso_agua_total)
# GRAFICAS
# Histograma
hist(acceso_agua, breaks = 10,
main = "Gráfica N°1: Distribución de Acceso de Agua limpia a la población",
xlab = "Acceso de Agua limpia",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "blue",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_acceso_agua$breaks,
labels = Histograma_acceso_agua$breaks, las = 1,
cex.axis = 0.9)

# Global
hist(acceso_agua, breaks = 10,
main = "Gráfica N°2: Distribución de Acceso de Agua limpia a la población",
xlab = "Acceso de Agua limpia",
ylab = "Cantidad",
ylim = c(0, length(acceso_agua)),
col = "skyblue",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_acceso_agua$breaks,
labels = Histograma_acceso_agua$breaks, las = 1,
cex.axis = 0.9)

TDF_acceso_agua$Frecuencia.Relativa.... <- as.numeric(TDF_acceso_agua$Frecuencia.Relativa....)
barplot(TDF_acceso_agua$Frecuencia.Relativa....,
space=0,
col = "blue",
main ="Gráfica N°3: Distribución Porcentual de Acceso de Agua limpia a la población ",
xlab="Acceso de Agua limpia",
ylab="Porcentaje (%)",
names.arg= TDF_acceso_agua$MC,
ylim = c(0,100))

# Local
hist(acceso_agua, breaks = 10,
main = "Gráfica N°4: Distribución de Acceso de Agua limpia a la población",
xlab = "Acceso de Agua",
ylab = "Cantidad",
ylim = c(0,max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_acceso_agua$breaks,
labels = Histograma_acceso_agua$breaks, las = 1,
cex.axis = 0.9)

barplot(TDF_acceso_agua$Frecuencia.Relativa....,space=0,
col = "lightblue",
main ="Gráfica N°5: Distribución Porcentual de Acceso al Agua limpia en la población",
xlab="Acceso al Agua limpia",
ylab="Porcentaje (%)",
ylim = c(0,14),
names.arg = TDF_acceso_agua$Marca.de.Clase)

# Diagrama de Ojiva Ascendente y Descendente
plot(liminf ,Nides,
main = "Gráfica N°6:Distribución de frecuencias Ascendentes y Descendentes de Acceso al Agua limpia en la población",
xlab = "Acceso al Agua limpia",
ylab = "Cantidad",
xlim = c(10,90),
col = "skyblue",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(liminsup,Niasc,
col = "pink",
type = "o",
lwd = 3)
axis(1, at = seq(0, 100, by = 10))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(liminf, Hides * 100,
main = "Gráfica N°7: Distribución de Frecuencias Ascendentes y Descendentes de Acceso al Agua limpia en la población",
xlab = "Acceso al Agua limpia en la población",
ylab = "Porcentaje (%)",
xlim = c(10,90),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(liminsup, Hiasc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,100,by=10))

# Diagrama de caja
boxplot(acceso_agua,
main = "Gráfica N°: Distribución de frecuencias de Acceso al Agua limpia en la población",
ylab = "Acceso al Agua limpia en la población",
col = "green",
horizontal = TRUE)

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(acceso_agua), 2)
media
## [1] 64.61
# Moda
max_frecuencia <- max(TDF_acceso_agua_total$ni)
moda <- TDF_acceso_agua_total$MC[TDF_acceso_agua_total$ni == max_frecuencia]
moda
## [1] "-"
# Mediana
mediana <- median(acceso_agua)
mediana
## [1] 64.78
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(acceso_agua)
varianza
## [1] 412.4336
# Desviación Estándar
sd <- sd(acceso_agua)
sd
## [1] 20.30846
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 31.43
# INDICADORES DE FORMA #
# Asimetría
install.packages("e1071")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'e1071'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\e1071\libs\x64\e1071.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\e1071\libs\x64\e1071.dll:
## Permission denied
## Warning: restored 'e1071'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\Rtmp8W3vhA\downloaded_packages
library(e1071)
asimetria <- skewness(acceso_agua, type = 2)
asimetria
## [1] 0.01772978
#Curtosis
curtosis <- kurtosis(acceso_agua)
curtosis
## [1] -1.217223
tabla_indicadores <- data.frame("Variable" =c("Acceso al Agua limpia en la población"),
"Rango" = c("[10.03 ;89.98]"),
"X" = c(media),
"Me" = c(round(mediana,2)),
"Mo" = c("No hay moda"),
"V" = c(round(varianza,2)),
"Sd" = c(round(sd,2)),
"Cv" = c(cv),
"As" = c(round(asimetria,4)),
"K" = c(round(curtosis,2)),
"Valores Atipicos" = "No hay presencia de valores atipicos")
library(knitr)
kable(tabla_indicadores, align = 'c', caption = "Conclusiones de la variable
Acceso al Agua limpia en la población ")
Conclusiones de la variable Acceso al Agua limpia en la
población
| Acceso al Agua limpia en la población |
[10.03 ;89.98] |
64.61 |
64.78 |
No hay moda |
412.43 |
20.31 |
31.43 |
0.0177 |
-1.22 |
No hay presencia de valores atipicos |