#UNIVERSIDAD CENTRAL DE ECUADOR
#Facultad de Ingeniería en Geología,Minas, Petroleos y Ambiental
#INGENIERIA AMBIENTAL
#AUTHOR: SOFIA HEREDIA
#FECHA: 14-05-2025
#carga de datos
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpAjRrty\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpAjRrty\downloaded_packages
library(readxl)
library(readr)
datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
## Country Region Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## # ℹ abbreviated name: ¹`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## # `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## # `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## # `Water Treatment Method` <chr>,
## # `Access to Clean Water (% of Population)` <dbl>,
## # `Diarrheal Cases per 100,000 people` <dbl>, …
# VARIABLE CUANTITATIVA DISCRETA
casos_tifus <- datos$`Typhoid Cases per 100,000 people`
casos_tifus <- na.omit(datos$`Typhoid Cases per 100,000 people`)
summary(casos_tifus)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 24.00 49.00 49.27 75.00 99.00
# Agrupar por intervalos
tamaño_clase <- 10
min_val <- floor(min(casos_tifus) / tamaño_clase) * tamaño_clase
max_val <- ceiling(max(casos_tifus) / tamaño_clase) * tamaño_clase
breaks <- seq(min_val, max_val + tamaño_clase, by = tamaño_clase)
labels <- paste(head(breaks, -1), tail(breaks, -1), sep = "-")
intervalos <- cut(casos_tifus,
breaks = breaks,
include.lowest = TRUE,
right = FALSE,
labels = labels)
ni <- as.numeric(table(intervalos))
hi <- ni / sum(ni)
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
TDF_TIFUS <- data.frame(
"Intervalos" = labels,
"ni" = ni,
"hi(%)" = round(hi * 100, 2),
"Ni asc" = Ni_asc,
"Ni desc" = Ni_desc,
"Hi asc(%)" = round(Hi_asc * 100, 2),
"Hi desc(%)" = round(Hi_desc * 100, 2)
)
print(TDF_TIFUS)
## Intervalos ni hi... Ni.asc Ni.desc Hi.asc... Hi.desc...
## 1 0-10 293 9.77 293 3000 9.77 100.00
## 2 10-20 300 10.00 593 2707 19.77 90.23
## 3 20-30 319 10.63 912 2407 30.40 80.23
## 4 30-40 319 10.63 1231 2088 41.03 69.60
## 5 40-50 297 9.90 1528 1769 50.93 58.97
## 6 50-60 287 9.57 1815 1472 60.50 49.07
## 7 60-70 281 9.37 2096 1185 69.87 39.50
## 8 70-80 285 9.50 2381 904 79.37 30.13
## 9 80-90 324 10.80 2705 619 90.17 20.63
## 10 90-100 295 9.83 3000 295 100.00 9.83
## 11 100-110 0 0.00 3000 0 100.00 0.00
# Tabla de distribución de frecuencia
TDF_TIFUS <- data.frame(
"Intervalos" = TDF_TIFUS$Intervalos,
"ni" = ni,
"hi(%)" = round(hi * 100, 2),
"Ni asc" = Ni_asc,
"Ni desc" = Ni_desc,
"Hi asc(%)" = round(Hi_asc * 100, 2),
"Hi desc(%)" = round(Hi_desc * 100, 2)
)
# Crear la fila de totales
totales <- data.frame(
"Intervalos" = "TOTAL",
"ni" = sum(ni),
"hi(%)" = round(sum(hi) * 100, 2),
"Ni asc" = "-",
"Ni desc" = "-",
"Hi asc(%)" = "-",
"Hi desc(%)" = "-"
)
TDF_TIFUS_COMPLETA <- rbind(TDF_TIFUS, totales)
library(knitr)
library(kableExtra)
kable(TDF_TIFUS_COMPLETA, align = 'c',
caption = "Tabla de Distribución de Frecuencias de Casos de Tifus por cada 100,000 personas") %>%
kable_styling(full_width = FALSE, position = "center",
bootstrap_options = c("striped", "hover", "condensed"))
Tabla de Distribución de Frecuencias de Casos de Tifus por cada 100,000
personas
|
Intervalos
|
ni
|
hi…
|
Ni.asc
|
Ni.desc
|
Hi.asc…
|
Hi.desc…
|
|
0-10
|
293
|
9.77
|
293
|
3000
|
9.77
|
100
|
|
10-20
|
300
|
10.00
|
593
|
2707
|
19.77
|
90.23
|
|
20-30
|
319
|
10.63
|
912
|
2407
|
30.4
|
80.23
|
|
30-40
|
319
|
10.63
|
1231
|
2088
|
41.03
|
69.6
|
|
40-50
|
297
|
9.90
|
1528
|
1769
|
50.93
|
58.97
|
|
50-60
|
287
|
9.57
|
1815
|
1472
|
60.5
|
49.07
|
|
60-70
|
281
|
9.37
|
2096
|
1185
|
69.87
|
39.5
|
|
70-80
|
285
|
9.50
|
2381
|
904
|
79.37
|
30.13
|
|
80-90
|
324
|
10.80
|
2705
|
619
|
90.17
|
20.63
|
|
90-100
|
295
|
9.83
|
3000
|
295
|
100
|
9.83
|
|
100-110
|
0
|
0.00
|
3000
|
0
|
100
|
0
|
|
TOTAL
|
3000
|
100.00
|
|
|
|
|
# GRAFICAS
# Diagrama de barras local
barplot(ni,
main="Gráfica N°1: Distribución por intervalos de casos de Tifus por cada 100.000 personas",
xlab = "Casos de Tifus",
ylab = "Cantidad",
col = "blue",
ylim = c(0,max(ni)),
names.arg=TDF_TIFUS$Intervalos,
las=2,
cex.names=0.63)
mtext("*En los intervalos los limites superiores son abiertos", side = 1, line = 5, adj = 0, cex = 0.7)

barplot(TDF_TIFUS$hi...,
main="Gráfica N°2: Distribución porcentual de casos de Tifus por cada 100.000 personas",
xlab = "Casos de Tifus",
ylab = "Porcentaje",
col = "lightgreen",
names.arg=TDF_TIFUS$Intervalos,
las=2,
cex.names=0.63)

# Diagrama de barras global
barplot(ni,
main="Gráfica N°3: Distribución por intervalos de casos de Tifus por cada 100.000 personas",
xlab = "Casos de Tifus",
ylab = "Cantidad",
col = "orange",
ylim = c(0,3000),
names.arg=TDF_TIFUS$Intervalos,
las=2,
cex.names=0.63)

barplot(TDF_TIFUS$hi...,
main="Gráfica N°4: Distribución porcentual de casos de Tifus por cada 100.000 personas",
xlab = "Casos de Tifus",
ylab = "Porcentaje (%)",
col = "yellow",
ylim = c(0,100),
names.arg=TDF_TIFUS$Intervalos,
las=2,
cex.names=0.63)

# Diagrama de Ojiva Ascendente y Descendente
x_pos <- 1:length(TDF_TIFUS$Intervalos)
plot(x_pos ,Ni_desc,
main = "Gráfica N°5: Distribución de Frecuencias Ascendentes y Descendentes de casos de Tifus por cada 100.000 personas",
xlab = " casos de Tifus por cada 100.000 personas",
ylab = "Cantidad",
col = "orange",
type = "p",
lwd = 3,
xaxt="n")
lines(x_pos,Ni_asc,
col = "green",
type = "p",
lwd = 3)
axis(side = 1, at = x_pos, labels = TDF_TIFUS$Intervalos, las = 2, cex.axis = 0.7)

# Diagrama de Ojiva Ascendente y Descendente Porcentual
x_por <- 1:length(TDF_TIFUS$hi...)
plot(x_pos, Hi_desc * 100,
main = "Gráfica N°6:Distribución de Frecuencias Ascendentes y Descendentes de casos de Tifus por cada 100.000 personas",
xlab = "casos de Tifus por cada 100.000 personas",
ylab = "Porcentaje (%)",
col = "red",
type = "p",
lwd = 2,
xaxt="n")
lines(x_pos, Hi_asc * 100,
col = "blue",
type = "p",
lwd = 3)
axis(side = 1, at = x_pos, labels = TDF_TIFUS$Intervalos, las = 2, cex.axis = 0.7)

# Diagrama de Caja
boxplot(casos_tifus,
horizontal = TRUE,
main = "Gráfica N°7:Distribución de Frecuencias de casos de Tifus por cada 100.000 personas",
xlab = "Casos de Tifus por cada 100.000 personas",
col = "red",
pch = 1)

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(casos_tifus), 2)
media
## [1] 49.27
# Moda
max_frecuencia <- max(TDF_TIFUS$ni)
moda <- TDF_TIFUS$Intervalos[TDF_TIFUS$ni == max_frecuencia]
moda
## [1] "80-90"
# Mediana
mediana <- median(casos_tifus)
mediana
## [1] 49
# INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(casos_tifus)
varianza
## [1] 840.0818
# Desviación Estándar
sd <- sd(casos_tifus)
sd
## [1] 28.98416
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 58.83
# INDICADORES DE FORMA
# Asimetría
library(e1071)
asimetria <- skewness(casos_tifus, type = 2)
asimetria
## [1] 0.01644577
#Curtosis
curtosis <- kurtosis(casos_tifus)
curtosis
## [1] -1.218473
tabla_indicadores <- data.frame("Variable" =c("casos de Tifus por cada 100.000 personas"),
"Rango" = c("[10 ;999]"),
"X" = c(media),
"Me" = c(round(mediana,2)),
"Mo" = c("[500;600]"),
"V" = c(round(varianza,2)),
"Sd" = c(round(sd,2)),
"Cv" = c(cv),
"As" = c(round(asimetria,2)),
"K" = c(round(curtosis,2)),
"Valores Atipicos" = "No hay presencia de valores atípicos")
library(kableExtra)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
##
## The following object is masked from 'package:kableExtra':
##
## group_rows
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
kable(tabla_indicadores, align = "c",
caption = "Tabla de Distribución de Frecuencias de casos de Tifus por cada 100.000 personas") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla de Distribución de Frecuencias de casos de Tifus por cada 100.000
personas
|
Variable
|
Rango
|
X
|
Me
|
Mo
|
V
|
Sd
|
Cv
|
As
|
K
|
Valores.Atipicos
|
|
casos de Tifus por cada 100.000 personas
|
[10 ;999]
|
49.27
|
49
|
[500;600]
|
840.08
|
28.98
|
58.83
|
0.02
|
-1.22
|
No hay presencia de valores atípicos
|