# =============================
# CARGA DE LIBRERÍAS
# =============================
library(kableExtra)
library(knitr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(e1071)
# =============================
# CARGA DE DATOS
# =============================
setwd("/cloud/project")
datos <- read.csv("china_water_pollution_data.csv", header = TRUE)
Oxigeno_disuelto <- datos$Dissolved_Oxygen_mg_L
# =============================
# TABLA DE FRECUENCIAS MANUAL
# =============================
minimo <- min(Oxigeno_disuelto)
maximo <- max(Oxigeno_disuelto)
R <- maximo - minimo
K <- floor(1 + 3.33 * log10(length(Oxigeno_disuelto)))
A <- R / K
lim_inf <- round(seq(from = minimo, to = maximo - A, by = A), 2)
lim_sup <- round(seq(from = minimo + A, to = maximo, by = A), 2)
MC <- (lim_inf + lim_sup) / 2
ni <- sapply(1:K, function(i) {
if (i < K) sum(Oxigeno_disuelto >= lim_inf[i] & Oxigeno_disuelto < lim_sup[i])
else sum(Oxigeno_disuelto >= lim_inf[i] & Oxigeno_disuelto <= lim_sup[i])
})
hi <- round((ni / sum(ni)) * 100, 2)
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_desc <- round(rev(cumsum(rev(hi))), 2)
TDF_Oxigeno <- data.frame(
Lim_inf = lim_inf,
Lim_sup = lim_sup,
MC = MC,
ni = ni,
hi = hi,
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = Hi_asc,
Hi_desc = Hi_desc
)
# fila de totales
totales <- data.frame(
Lim_inf = "TOTAL",
Lim_sup = "-",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_asc = "-",
Ni_desc = "-",
Hi_asc = "-",
Hi_desc = "-"
)
TDF_Oxigeno_total <- rbind(TDF_Oxigeno, totales)
kable(TDF_Oxigeno_total, align = 'c',
caption = "Tabla de distribución de Oxígeno Disuelto (mg/L)") %>%
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover"))
Tabla de distribución de Oxígeno Disuelto (mg/L)
|
Lim_inf
|
Lim_sup
|
MC
|
ni
|
hi
|
Ni_asc
|
Ni_desc
|
Hi_asc
|
Hi_desc
|
|
1.47
|
2.55
|
2.01
|
6
|
0.20
|
6
|
2997
|
0.2
|
99.99
|
|
2.55
|
3.64
|
3.095
|
30
|
1.00
|
36
|
2991
|
1.2
|
99.79
|
|
3.64
|
4.72
|
4.18
|
114
|
3.80
|
150
|
2961
|
5
|
98.79
|
|
4.72
|
5.81
|
5.265
|
247
|
8.24
|
397
|
2847
|
13.24
|
94.99
|
|
5.81
|
6.89
|
6.35
|
470
|
15.68
|
867
|
2600
|
28.92
|
86.75
|
|
6.89
|
7.98
|
7.435
|
608
|
20.29
|
1475
|
2130
|
49.21
|
71.07
|
|
7.98
|
9.06
|
8.52
|
629
|
20.99
|
2104
|
1522
|
70.2
|
50.78
|
|
9.06
|
10.15
|
9.605
|
467
|
15.58
|
2571
|
893
|
85.78
|
29.79
|
|
10.15
|
11.23
|
10.69
|
274
|
9.14
|
2845
|
426
|
94.92
|
14.21
|
|
11.24
|
12.32
|
11.78
|
110
|
3.67
|
2955
|
152
|
98.59
|
5.07
|
|
12.32
|
13.4
|
12.86
|
33
|
1.10
|
2988
|
42
|
99.69
|
1.4
|
|
13.4
|
14.49
|
13.945
|
9
|
0.30
|
2997
|
9
|
99.99
|
0.3
|
|
TOTAL
|
|
|
2997
|
99.99
|
|
|
|
|
# =============================
# HISTOGRAMA SIMPLIFICADO
# =============================
Hist_Oxigeno <- hist(Oxigeno_disuelto, breaks = 10, plot = FALSE)
Li <- Hist_Oxigeno$breaks[-length(Hist_Oxigeno$breaks)]
Ls <- Hist_Oxigeno$breaks[-1]
ni <- Hist_Oxigeno$counts
hi <- round((ni / sum(ni)) * 100, 2)
MC <- Hist_Oxigeno$mids
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# =============================
# GRÁFICAS
# =============================
# HISTOGRAMA 1
hist(Oxigeno_disuelto,
main = "Gráfica 1: Histograma",
xlab = "Oxígeno Disuelto (mg/L)",
ylab = "Frecuencia",
col = "skyblue")

# HISTOGRAMA 2 (GLOBAL)
hist(Oxigeno_disuelto,
main = "Gráfica 2: Histograma Global",
xlab = "Oxígeno Disuelto (mg/L)",
col = "lightgreen")

# BARPLOT DE PORCENTAJES
barplot(hi,
names.arg = round(MC,2),
col = "skyblue",
main = "Gráfica 3: Porcentaje por Intervalo",
ylab = "hi (%)")

# BARPLOT LOCAL
barplot(hi,
space = 0,
col = "lightblue",
main = "Gráfica 4: Porcentaje Detallado",
ylab = "Porcentaje (%)")

# =============================
# OJIVA ASCENDENTE Y DESCENDENTE
# =============================
plot(Li, Ni_desc,
type = "o",
col = "blue",
lwd = 2,
main = "Ojiva Ascendente y Descendente",
xlab = "Oxígeno Disuelto (mg/L)",
ylab = "Frecuencia")
lines(Ls, Ni_asc, type = "o", col = "red", lwd = 2)

# =============================
# DIAGRAMA DE CAJA
# =============================
boxplot(Oxigeno_disuelto,
horizontal = TRUE,
main = "Gráfica 8: Diagrama de Caja",
xlab = "Oxígeno Disuelto (mg/L)",
col = "orange")

# =============================
# INDICADORES ESTADÍSTICOS
# =============================
media <- round(mean(Oxigeno_disuelto), 2)
mediana <- round(median(Oxigeno_disuelto), 2)
varianza <- round(var(Oxigeno_disuelto), 2)
sd <- round(sd(Oxigeno_disuelto), 2)
cv <- round((sd / media) * 100, 2)
asimetria <- round(skewness(Oxigeno_disuelto), 4)
curtosis <- round(kurtosis(Oxigeno_disuelto), 2)
tabla_indicadores <- data.frame(
Variable = "Oxígeno Disuelto (mg/L)",
Rango = "[0;20]",
Media = media,
Mediana = mediana,
Moda = "No existe",
Varianza = varianza,
Desv_Estandar = sd,
CV = cv,
Asimetria = asimetria,
Curtosis = curtosis,
Valores_Atipicos = "Sí existen"
)
kable(tabla_indicadores, align = 'c',
caption = "Indicadores Estadísticos del Oxígeno Disuelto")
Indicadores Estadísticos del Oxígeno Disuelto
| Oxígeno Disuelto (mg/L) |
[0;20] |
8.01 |
8.03 |
No existe |
3.9 |
1.97 |
24.59 |
0.0056 |
-0.14 |
Sí existen |