0.Librerias
library(gt)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
1.Leer Datos
datos <- read.csv(
"waterPollution.csv",
sep = ",",
stringsAsFactors = FALSE
)
3.Tabla de distribución de frecuencias
# =========================
# TABLAS CUALITATIVAS NOMINALES
# =========================
TDF_OPD <- data.frame(table(OPD))
ni <- TDF_OPD$Freq
hi <- round((ni / sum(ni)) * 100, 2)
TDF_OPD <- data.frame(OPD = TDF_OPD$OPD, ni, hi)
Summary <- data.frame(OPD = "TOTAL", ni = sum(ni), hi = 100)
TDF_OPD_suma <- rbind(TDF_OPD, Summary)
colnames(TDF_OPD_suma) <- c("Propiedad observada", "ni", "hi(%)")
TDF_OPD_suma %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribución de las Propiedades obeservadas en los cuerpos de agua en el estudio de la calidad de agua en Europa(1991-2017)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| Distribución de las Propiedades obeservadas en los cuerpos de agua en el estudio de la calidad de agua en Europa(1991-2017) |
| Propiedad observada |
ni |
hi(%) |
| CAS_100-41-4 |
12 |
0.06 |
| CAS_100-42-5 |
1 |
0.00 |
| CAS_10061-02-6 |
1 |
0.00 |
| CAS_1007-28-9 |
1 |
0.00 |
| CAS_1024-57-3 |
2 |
0.01 |
| CAS_104-40-5 |
2 |
0.01 |
| CAS_106-46-7 |
9 |
0.04 |
| CAS_107-06-2 |
6 |
0.03 |
| CAS_1071-83-6 |
3 |
0.01 |
| CAS_108-67-8 |
1 |
0.00 |
| CAS_108-70-3 |
1 |
0.00 |
| CAS_108-88-3 |
4 |
0.02 |
| CAS_108-90-7 |
5 |
0.03 |
| CAS_108-95-2 |
4 |
0.02 |
| CAS_111988-49-9 |
20 |
0.10 |
| CAS_114-07-8 |
14 |
0.07 |
| CAS_118-74-1 |
5 |
0.03 |
| CAS_1194-65-6 |
1 |
0.00 |
| CAS_120-12-7 |
6 |
0.03 |
| CAS_120-82-1 |
1 |
0.00 |
| CAS_121-75-5 |
1 |
0.00 |
| CAS_122-34-9 |
7 |
0.03 |
| CAS_124-48-1 |
1 |
0.00 |
| CAS_124495-18-7 |
2 |
0.01 |
| CAS_127-18-4 |
7 |
0.03 |
| CAS_128-37-0 |
2 |
0.01 |
| CAS_135410-20-7 |
20 |
0.10 |
| CAS_138261-41-3 |
20 |
0.10 |
| CAS_139-40-2 |
4 |
0.02 |
| CAS_140-66-9 |
2 |
0.01 |
| CAS_14265-44-2 |
1243 |
6.22 |
| CAS_14797-55-8 |
1289 |
6.44 |
| CAS_14797-65-0 |
1266 |
6.33 |
| CAS_14798-03-9 |
1110 |
5.55 |
| CAS_15307-86-5 |
131 |
0.66 |
| CAS_153719-23-4 |
41 |
0.21 |
| CAS_1582-09-8 |
14 |
0.07 |
| CAS_15972-60-8 |
14 |
0.07 |
| CAS_1610-18-0 |
2 |
0.01 |
| CAS_1634-04-4 |
1 |
0.00 |
| CAS_16887-00-6 |
28 |
0.14 |
| CAS_16984-48-8 |
9 |
0.04 |
| CAS_18540-29-9 |
1 |
0.00 |
| CAS_18785-72-3 |
12 |
0.06 |
| CAS_189084-64-8 |
4 |
0.02 |
| CAS_191-24-2 |
12 |
0.06 |
| CAS_1912-24-9 |
9 |
0.04 |
| CAS_193-39-5 |
11 |
0.06 |
| CAS_19666-30-9 |
12 |
0.06 |
| CAS_2032-65-7 |
12 |
0.06 |
| CAS_205-99-2 |
10 |
0.05 |
| CAS_206-44-0 |
10 |
0.05 |
| CAS_207-08-9 |
15 |
0.07 |
| CAS_207122-15-4 |
1 |
0.00 |
| CAS_210880-92-5 |
12 |
0.06 |
| CAS_218-01-9 |
1 |
0.00 |
| CAS_2212-67-1 |
1 |
0.00 |
| CAS_2303-17-5 |
136 |
0.68 |
| CAS_2921-88-2 |
9 |
0.04 |
| CAS_298-00-0 |
2 |
0.01 |
| CAS_309-00-2 |
11 |
0.06 |
| CAS_319-84-6 |
7 |
0.03 |
| CAS_319-85-7 |
8 |
0.04 |
| CAS_319-86-8 |
5 |
0.03 |
| CAS_330-54-1 |
4 |
0.02 |
| CAS_33213-65-9 |
12 |
0.06 |
| CAS_34123-59-6 |
6 |
0.03 |
| CAS_35065-27-1 |
1 |
0.00 |
| CAS_35065-29-3 |
1 |
0.00 |
| CAS_35693-99-3 |
1 |
0.00 |
| CAS_37680-73-2 |
1 |
0.00 |
| CAS_40487-42-1 |
1 |
0.00 |
| CAS_41318-75-6 |
3 |
0.01 |
| CAS_41394-05-2 |
3 |
0.01 |
| CAS_465-73-6 |
9 |
0.04 |
| CAS_470-90-6 |
8 |
0.04 |
| CAS_50-28-2 |
2 |
0.01 |
| CAS_50-29-3 |
13 |
0.06 |
| CAS_50-32-8 |
11 |
0.06 |
| CAS_51218-45-2 |
14 |
0.07 |
| CAS_541-73-1 |
11 |
0.06 |
| CAS_5436-43-1 |
2 |
0.01 |
| CAS_5598-13-0 |
1 |
0.00 |
| CAS_56-23-5 |
6 |
0.03 |
| CAS_56-38-2 |
11 |
0.06 |
| CAS_57-63-6 |
2 |
0.01 |
| CAS_57837-19-1 |
1 |
0.00 |
| CAS_58-89-9 |
8 |
0.04 |
| CAS_5915-41-3 |
6 |
0.03 |
| CAS_60-57-1 |
16 |
0.08 |
| CAS_60348-60-9 |
2 |
0.01 |
| CAS_608-93-5 |
13 |
0.06 |
| CAS_6190-65-4 |
1 |
0.00 |
| CAS_67-66-3 |
6 |
0.03 |
| CAS_68631-49-2 |
1 |
0.00 |
| CAS_7012-37-5 |
2 |
0.01 |
| CAS_7085-19-0 |
2 |
0.01 |
| CAS_71-43-2 |
5 |
0.03 |
| CAS_71-52-3 |
3 |
0.01 |
| CAS_71-55-6 |
4 |
0.02 |
| CAS_72-20-8 |
18 |
0.09 |
| CAS_72-43-5 |
1 |
0.00 |
| CAS_72-54-8 |
18 |
0.09 |
| CAS_72-55-9 |
16 |
0.08 |
| CAS_7287-19-6 |
4 |
0.02 |
| CAS_74-83-9 |
1 |
0.00 |
| CAS_74-95-3 |
1 |
0.00 |
| CAS_74-97-5 |
1 |
0.00 |
| CAS_74070-46-5 |
1 |
0.00 |
| CAS_7429-90-5 |
8 |
0.04 |
| CAS_7439-89-6 |
9 |
0.04 |
| CAS_7439-92-1 |
16 |
0.08 |
| CAS_7439-95-4 |
7 |
0.03 |
| CAS_7439-96-5 |
5 |
0.03 |
| CAS_7439-97-6 |
10 |
0.05 |
| CAS_7439-98-7 |
1 |
0.00 |
| CAS_7440-02-0 |
7 |
0.03 |
| CAS_7440-09-7 |
15 |
0.07 |
| CAS_7440-23-5 |
14 |
0.07 |
| CAS_7440-24-6 |
2 |
0.01 |
| CAS_7440-31-5 |
2 |
0.01 |
| CAS_7440-38-2 |
9 |
0.04 |
| CAS_7440-39-3 |
3 |
0.01 |
| CAS_7440-42-8 |
5 |
0.03 |
| CAS_7440-43-9 |
15 |
0.07 |
| CAS_7440-47-3 |
13 |
0.06 |
| CAS_7440-48-4 |
5 |
0.03 |
| CAS_7440-50-8 |
13 |
0.06 |
| CAS_7440-62-2 |
1 |
0.00 |
| CAS_7440-66-6 |
11 |
0.06 |
| CAS_7440-70-2 |
6 |
0.03 |
| CAS_75-01-4 |
1 |
0.00 |
| CAS_75-09-2 |
6 |
0.03 |
| CAS_75-25-2 |
1 |
0.00 |
| CAS_75-27-4 |
2 |
0.01 |
| CAS_76-44-8 |
2 |
0.01 |
| CAS_7723-14-0 |
1240 |
6.20 |
| CAS_7782-49-2 |
5 |
0.03 |
| CAS_789-02-6 |
16 |
0.08 |
| CAS_79-01-6 |
6 |
0.03 |
| CAS_81103-11-9 |
6 |
0.03 |
| CAS_83-32-9 |
1 |
0.00 |
| CAS_83164-33-4 |
1 |
0.00 |
| CAS_834-12-8 |
2 |
0.01 |
| CAS_84852-15-3 |
1 |
0.00 |
| CAS_87-61-6 |
6 |
0.03 |
| CAS_87-68-3 |
8 |
0.04 |
| CAS_87-86-5 |
4 |
0.02 |
| CAS_886-50-0 |
3 |
0.01 |
| CAS_91-20-3 |
5 |
0.03 |
| CAS_94-74-6 |
5 |
0.03 |
| CAS_95-47-6 |
6 |
0.03 |
| CAS_95-50-1 |
10 |
0.05 |
| CAS_959-98-8 |
3 |
0.01 |
| EEA_31-01-6 |
9 |
0.04 |
| EEA_31-02-7 |
5 |
0.03 |
| EEA_3121-01-5 |
898 |
4.49 |
| EEA_3131-01-9 |
1145 |
5.73 |
| EEA_3132-01-2 |
22 |
0.11 |
| EEA_3133-01-5 |
1214 |
6.07 |
| EEA_3133-02-6 |
16 |
0.08 |
| EEA_3133-03-7 |
324 |
1.62 |
| EEA_3133-04-8 |
51 |
0.26 |
| EEA_3133-05-9 |
99 |
0.50 |
| EEA_3133-06-0 |
464 |
2.32 |
| EEA_3142-01-6 |
738 |
3.69 |
| EEA_3151-01-7 |
154 |
0.77 |
| EEA_3152-01-0 |
1157 |
5.78 |
| EEA_3161-01-1 |
623 |
3.12 |
| EEA_3161-02-2 |
1180 |
5.90 |
| EEA_3161-03-3 |
417 |
2.08 |
| EEA_3161-05-5 |
649 |
3.24 |
| EEA_31613-01-1 |
260 |
1.30 |
| EEA_31615-01-7 |
844 |
4.22 |
| EEA_3163-01-7 |
378 |
1.89 |
| EEA_3164-01-0 |
505 |
2.53 |
| EEA_3164-07-6 |
564 |
2.82 |
| EEA_3164-08-7 |
886 |
4.43 |
| EEA_33-06-7 |
5 |
0.03 |
| EEA_33-13-6 |
1 |
0.00 |
| EEA_33-18-1 |
2 |
0.01 |
| EEA_33-64-7 |
5 |
0.03 |
| TOTAL |
20000 |
100.00 |
| Autor: Grupo 3 |
3.1 Se agrupo las frecuencias mas grandes para la comparación debido
a la cantidad de valores que presenta la variable
# Tabla reducida
datos$observedPropertyDeterminandCode <- recode(
datos$observedPropertyDeterminandCode,
"CAS_14797-55-8" = "Nitrato (NO3)",
"CAS_14797-65-0" = "Nitrito (NO2)",
"CAS_14265-44-2" = "Fosfato/Ortofosfato (PO4)",
"CAS_7723-14-0" = "Fósforo compuesto",
"EEA_3133-01-5" = "EEA_3133-01-5",
"EEA_3161-02-2" = "EEA_3161-02-2",
"EEA_3152-01-0" = "EEA_3152-01-0",
"EEA_3131-01-9" = "EEA_3131-01-9",
"CAS_14798-03-9" = "Compuesto nitrogenado"
)
OPD <- datos$observedPropertyDeterminandCode
TDF_OPD <- data.frame(table(OPD))
# Ordenar de mayor a menor frecuencia
TDF_OPD <- TDF_OPD[order(-TDF_OPD$Freq), ]
# Mantener los 9 OPDes más frecuentes
top9 <- head(TDF_OPD, 9)
# Agrupar el resto en "Otros"
otros <- data.frame(
OPD = "Otros",
Freq = sum(TDF_OPD$Freq[-(1:9)])
)
# Unir
TDF_OPD <- rbind(top9, otros)
# Calcular frecuencias
ni <- TDF_OPD$Freq
hi <- round((ni / sum(ni)) * 100, 2)
TDF_OPD <- data.frame(
OPD = TDF_OPD$OPD,
ni,
hi
)
Summary <- data.frame(
OPD = "TOTAL",
ni = sum(ni),
hi = 100
)
TDF_OPD_suma <- rbind(TDF_OPD, Summary)
colnames(TDF_OPD_suma) <- c(
"OPD",
"ni",
"hi(%)"
)
colnames(TDF_OPD_suma) <- c("Propiedad observada", "ni", "hi(%)")
# =========================
# TABLA
# =========================
TDF_OPD_suma %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 2*"),
subtitle = md("**Distribución de las Propiedades mas frecuentes obeservadas en los cuerpos de agua en el estudio de la calidad de agua en Europa(1991-2017)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 |
| Distribución de las Propiedades mas frecuentes obeservadas en los cuerpos de agua en el estudio de la calidad de agua en Europa(1991-2017) |
| Propiedad observada |
ni |
hi(%) |
| Nitrato (NO3) |
1289 |
6.44 |
| Nitrito (NO2) |
1266 |
6.33 |
| Fosfato/Ortofosfato (PO4) |
1243 |
6.22 |
| Fósforo compuesto |
1240 |
6.20 |
| EEA_3133-01-5 |
1214 |
6.07 |
| EEA_3161-02-2 |
1180 |
5.90 |
| EEA_3152-01-0 |
1157 |
5.78 |
| EEA_3131-01-9 |
1145 |
5.73 |
| Compuesto nitrogenado |
1110 |
5.55 |
| Otros |
9156 |
45.78 |
| TOTAL |
20000 |
100.00 |
| Autor: Grupo 3 |
4.Gráficas
4.1 Histograma
# =========================
# DIAGRAMA DE BARRAS LOCAL ni
# =========================
par(mar = c(10,4,4,2))
barplot(ni,
main = "Gráfica N°1: Distribución de las Propiedades obeservadas
en los cuerpos de agua en el estudio de la calidad
de agua en Europa(1991-2017)",
xlab = "",
ylab = "Cantidad",
col = "greenyellow",
ylim = c(0,max(ni)),
las = 2,
cex.names = 0.6,
names.arg = TDF_OPD$OPD)
title(
xlab = "Propiedad observada",
line = 8
)

4.2 Histograma General
# =========================
# DIAGRAMA DE BARRAS GLOBAL ni
# =========================
par(mar = c(10,4,4,2))
barplot(ni,
main = "Gráfica N°2: Distribución general de las
Propiedades obeservadas en los cuerpos de agua en
el estudio de la calidad de agua en Europa(1991-2017)",
xlab = "",
ylab = "Cantidad",
col = "firebrick",
ylim = c(0,20000),
las = 2,
cex.names = 0.6,
names.arg = TDF_OPD$OPD)
title(
xlab = "Propiedad observada",
line = 8
)

4.3 Histograma Porcentual
# =========================
# DIAGRAMA DE BARRAS LOCAL hi
# =========================
par(mar = c(10,4,4,2))
barplot(hi,
main = "Gráfica N°3: Distribución porcentual de las
Propiedades obeservadas en los cuerpos de agua en el
estudio de la calidad de agua en Europa(1991-2017)",
xlab = "",
ylab = "Porcentaje",
col = "darkorange",
ylim = c(0,max(hi)),
las = 2,
cex.names = 0.6,
names.arg = TDF_OPD$OPD)
title(
xlab = "Propiedad observada",
line = 7
)

4.4 Histograma Porcentual General
# =========================
# DIAGRAMA DE BARRAS GLOBAL hi
# =========================
par(mar = c(10,4,4,2))
barplot(hi,
main = "Gráfica N°4: Distribución porcentual general de las
Propiedades obeservadas en los cuerpos de agua
en el estudio de la calidad de agua en Europa(1991-2017)",
xlab = "",
ylab = "Porcentaje",
col = "darkorchid",
ylim = c(0,100),
las = 2,
cex.names = 0.6,
names.arg = TDF_OPD$OPD)
title(
xlab = "Propiedad observada",
line = 7
)

4.5 Diagrama circular
# =========================
# DIAGRAMA CIRCULAR
# =========================
etiquetas <- paste0(hi, " %")
colores <- c("tomato", "turquoise", "violet", "lightsalmon", "wheat", "yellow",
"lemonchiffon", "lightblue", "green", "chocolate")
par(mar = c(2, 2, 4, 6))
pie(
hi,
labels = etiquetas,
col = colores,
main = "Gráfica N°5 Distribución porcentual de las
Propiedades obeservadas en los cuerpos de agua
en el estudio de la calidad de agua en Europa(1991-2017)",
cex = 1
)
legend(
"topright",
legend = TDF_OPD$OPD,
fill = colores,
title = "Leyenda",
cex = 0.5,
xpd = TRUE
)

5 Indicadores Estadísticos
# =========================
# INDICADORES ESTADÍSTICOS
# =========================
# Moda
frecuencia_max <- max(TDF_OPD$ni)
modas <- TDF_OPD$OPD[
TDF_OPD$ni == frecuencia_max
]
Mo_OPD <- paste(modas, collapse = " - ")
# Tabla resumen
tabla_indicadores <- data.frame(
Variable = "Propiedad Observada",
Rango = "-",
X = "-",
Me = "-",
Mo = "Nitrato (NO3)",
V = "-",
Sd = "-",
Cv = "-",
As = "-",
K = "-",
Valores_Atipicos = "No aplica"
)
# Mostrar tabla
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 3*"),
subtitle = md("**Indicadores estadísticos de la propiedad observada en los cuerpos de agua en el estudio de calidad de agua en Europa(1991-2017)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
column_labels.border.bottom.color = "black",
row.striping.include_table_body = TRUE
)
tabla_indicadores_gt
| Tabla Nro. 3 |
| Indicadores estadísticos de la propiedad observada en los cuerpos de agua en el estudio de calidad de agua en Europa(1991-2017) |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores_Atipicos |
| Propiedad Observada |
- |
- |
- |
Nitrato (NO3) |
- |
- |
- |
- |
- |
No aplica |
| Autor: Grupo 3 |
6 Conclusión
# =========================
# CONCLUSIÓN
# =========================
#El valor más frecuente de la variable Propiedad observada es el Nitrato(NO3)