# LIBRERÍAS
# -----------------------------
library(knitr)
library(kableExtra)
library(e1071)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
# -----------------------------
# CARGA DE DATOS
# -----------------------------
datos <- read.csv("china_water_pollution_data.csv",
header = TRUE, sep = ",", dec = ".")
Nitrito <- na.omit(datos$Nitrite_mg_L)
# -----------------------------
# TABLA DE FRECUENCIAS (K = 8)
# -----------------------------
K <- 8
Hist_Nitrito <- hist(Nitrito, breaks = K, plot = FALSE)
Li <- Hist_Nitrito$breaks[-length(Hist_Nitrito$breaks)]
Ls <- Hist_Nitrito$breaks[-1]
ni <- Hist_Nitrito$counts
Mc <- Hist_Nitrito$mids
n <- sum(ni)
hi <- ni / n
Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_desc <- rev(cumsum(rev(hi)))
TDFNitrito <- data.frame(
`Lim inf` = round(Li, 4),
`Lim sup` = round(Ls, 4),
MC = round(Mc, 4),
ni = ni,
`hi (%)` = round(hi * 100, 2),
`Ni asc` = Ni_asc,
`Ni desc` = Ni_desc,
`Hi asc (%)` = round(Hi_asc * 100, 2),
`Hi desc (%)`= round(Hi_desc * 100, 2)
)
totales <- data.frame(
`Lim inf` = "TOTAL",
`Lim sup` = "-",
MC = "-",
ni = sum(ni),
`hi (%)` = 100,
`Ni asc` = "-",
`Ni desc` = "-",
`Hi asc (%)` = "-",
`Hi desc (%)`= "-"
)
TDFNitrito_total <- rbind(TDFNitrito, totales)
# ============================================================
# TABLA N°1
# ============================================================
kable(TDFNitrito_total, align = "c",
caption = "Tabla N°1: Tabla de distribución de frecuencias del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023") %>%
kable_styling(full_width = FALSE, position = "center",
bootstrap_options = c("striped","hover","condensed"))
Tabla N°1: Tabla de distribución de frecuencias del Nitrito (mg/L) en el
estudio de contaminación del agua en China en el año 2023
|
Lim.inf
|
Lim.sup
|
MC
|
ni
|
hi….
|
Ni.asc
|
Ni.desc
|
Hi.asc….
|
Hi.desc….
|
|
-0.02
|
-0.01
|
-0.015
|
4
|
0.13
|
4
|
3000
|
0.13
|
100
|
|
-0.01
|
0
|
-0.005
|
72
|
2.40
|
76
|
2996
|
2.53
|
99.87
|
|
0
|
0.01
|
0.005
|
441
|
14.70
|
517
|
2924
|
17.23
|
97.47
|
|
0.01
|
0.02
|
0.015
|
1047
|
34.90
|
1564
|
2483
|
52.13
|
82.77
|
|
0.02
|
0.03
|
0.025
|
994
|
33.13
|
2558
|
1436
|
85.27
|
47.87
|
|
0.03
|
0.04
|
0.035
|
385
|
12.83
|
2943
|
442
|
98.1
|
14.73
|
|
0.04
|
0.05
|
0.045
|
54
|
1.80
|
2997
|
57
|
99.9
|
1.9
|
|
0.05
|
0.06
|
0.055
|
3
|
0.10
|
3000
|
3
|
100
|
0.1
|
|
TOTAL
|
|
|
3000
|
100.00
|
|
|
|
|
# ============================================================
# GRÁFICA N°1
# ============================================================
hist(Nitrito, breaks = Hist_Nitrito$breaks,
main = "Gráfica N°1: Distribución del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Cantidad de muestras",
col = "pink",
xaxt = "n")
axis(1, at = Hist_Nitrito$breaks,
labels = round(Hist_Nitrito$breaks, 4), las = 1)

# ============================================================
# TABLA N°2
# ============================================================
kable(TDFNitrito_total, align = "c",
caption = "Tabla N°2: Tabla de distribución de frecuencias del Nitrito (mg/L)
posterior a la representación gráfica en el estudio de contaminación
del agua en China en el año 2023") %>%
kable_styling(full_width = FALSE, position = "center",
bootstrap_options = c("striped","hover","condensed"))
Tabla N°2: Tabla de distribución de frecuencias del Nitrito (mg/L)
posterior a la representación gráfica en el estudio de contaminación del
agua en China en el año 2023
|
Lim.inf
|
Lim.sup
|
MC
|
ni
|
hi….
|
Ni.asc
|
Ni.desc
|
Hi.asc….
|
Hi.desc….
|
|
-0.02
|
-0.01
|
-0.015
|
4
|
0.13
|
4
|
3000
|
0.13
|
100
|
|
-0.01
|
0
|
-0.005
|
72
|
2.40
|
76
|
2996
|
2.53
|
99.87
|
|
0
|
0.01
|
0.005
|
441
|
14.70
|
517
|
2924
|
17.23
|
97.47
|
|
0.01
|
0.02
|
0.015
|
1047
|
34.90
|
1564
|
2483
|
52.13
|
82.77
|
|
0.02
|
0.03
|
0.025
|
994
|
33.13
|
2558
|
1436
|
85.27
|
47.87
|
|
0.03
|
0.04
|
0.035
|
385
|
12.83
|
2943
|
442
|
98.1
|
14.73
|
|
0.04
|
0.05
|
0.045
|
54
|
1.80
|
2997
|
57
|
99.9
|
1.9
|
|
0.05
|
0.06
|
0.055
|
3
|
0.10
|
3000
|
3
|
100
|
0.1
|
|
TOTAL
|
|
|
3000
|
100.00
|
|
|
|
|
# ============================================================
# GRÁFICA N°2
# ============================================================
hist(Nitrito, breaks = Hist_Nitrito$breaks,
main = "Gráfica N°2: Distribución global del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Cantidad de muestras",
col = "pink",
xaxt = "n")
axis(1, at = Hist_Nitrito$breaks,
labels = round(Hist_Nitrito$breaks, 4), las = 1)

# ============================================================
# GRÁFICA N°3
# ============================================================
hist(Nitrito, breaks = Hist_Nitrito$breaks,
main = "Gráfica N°3: Distribución comparativa del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Cantidad de muestras",
col = "green",
xaxt = "n")
axis(1, at = Hist_Nitrito$breaks,
labels = round(Hist_Nitrito$breaks, 4), las = 1)

# ============================================================
# GRÁFICA N°4
# ============================================================
hist(Nitrito, breaks = Hist_Nitrito$breaks,
main = "Gráfica N°4: Distribución local de frecuencia del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Cantidad de muestras",
col = "purple",
xaxt = "n")
axis(1, at = Hist_Nitrito$breaks,
labels = round(Hist_Nitrito$breaks, 4), las = 1)

# ============================================================
# GRÁFICA N°5
# ============================================================
intervalos <- paste0("[", round(Li,4), " - ", round(Ls,4), ")")
barplot(round(hi * 100, 2),
names.arg = intervalos,
col = "lightblue",
main = "Gráfica N°5: Distribución porcentual del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Intervalos de concentración de Nitrito (mg/L)",
ylab = "Porcentaje (%)",
las = 2,
cex.names = 0.7)

# ============================================================
# GRÁFICA N°6
# ============================================================
plot(Li, Ni_asc, type = "o",
main = "Gráfica N°6: Distribución de frecuencias acumuladas del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Frecuencia acumulada",
col = "orange",
xaxt = "n")
lines(Ls, Ni_desc, col = "green", type = "o")
axis(1, at = Li, labels = round(Li,4), las = 1)

# ============================================================
# GRÁFICA N°7
# ============================================================
plot(Li, Hi_asc * 100, type = "o",
main = "Gráfica N°7: Distribución porcentual acumulada del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
ylab = "Porcentaje acumulado (%)",
col = "blue",
xaxt = "n")
lines(Ls, Hi_desc * 100, col = "red", type = "o")
axis(1, at = Li, labels = round(Li,4), las = 1)

# ============================================================
# GRÁFICA N°8
# ============================================================
boxplot(Nitrito, horizontal = TRUE,
main = "Gráfica N°8: Diagrama de caja del Nitrito (mg/L)
en el estudio de contaminación del agua en China en el año 2023",
xlab = "Concentración de Nitrito (mg/L)",
col = "purple")

# ============================================================
# TABLA N°3: INDICADORES ESTADÍSTICOS
# ============================================================
media <- round(mean(Nitrito), 4)
mediana <- round(median(Nitrito), 4)
varianza <- round(var(Nitrito), 6)
sd_nit <- round(sd(Nitrito), 4)
cv <- round((sd(Nitrito)/mean(Nitrito))*100, 2)
asim <- round(skewness(Nitrito, type = 2), 2)
curt <- round(kurtosis(Nitrito), 2)
max_frec <- max(TDFNitrito$ni)
moda <- paste(TDFNitrito$MC[TDFNitrito$ni == max_frec], collapse = ", ")
out <- boxplot.stats(Nitrito)$out
val_atip <- ifelse(length(out)==0,
"No hay presencia de valores atípicos",
paste(length(out),"valores atípicos"))
tabla_indicadores <- data.frame(
Variable = "Nitrito (mg/L)",
Rango = paste0("[",round(min(Nitrito),4)," ; ",round(max(Nitrito),4),"]"),
X = media,
Me = mediana,
Mo = moda,
V = varianza,
Sd = sd_nit,
Cv = cv,
As = asim,
K = curt,
Valores_Atipicos = val_atip
)
tabla_indicadores %>%
gt() %>%
tab_header(
title = md("Tabla N°3"),
subtitle = md("*Indicadores estadísticos de la variable Nitrito (mg/L)*")
)
| Tabla N°3 |
| Indicadores estadísticos de la variable Nitrito (mg/L) |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores_Atipicos |
| Nitrito (mg/L) |
[-0.02 ; 0.053] |
0.02 |
0.02 |
0.015 |
9.9e-05 |
0.0099 |
49.73 |
-0.01 |
-0.08 |
10 valores atípicos |