FECHA: 25/11/2025
# Extraemos los datos
df <- read.csv("soil_pollution_diseases.csv", sep = ";", stringsAsFactors = FALSE)
# Convertir la columna a fecha (día/mes/año)
df$Date_Reported <- as.Date(df$Date_Reported, format = "%d/%m/%Y")
# Crear columnas Día, Mes y Año
df$Day <- as.integer(format(df$Date_Reported, "%d"))
df$Month <- as.integer(format(df$Date_Reported, "%m"))
df$Year <- as.integer(format(df$Date_Reported, "%Y"))
# Extraemos la variable Discreta
Dia <- df$Day
# Tabla de distribución de frecuencias por días
TDF.Dia <- table(Dia)
Tabla_Dia <- as.data.frame(TDF.Dia)
# Frecuencias relativas (porcentaje)
hi <- (Tabla_Dia$Freq / sum(Tabla_Dia$Freq)) * 100
ni <- Tabla_Dia$Freq
Tabla_Dia <- data.frame(Tabla_Dia, hi)
# Acumuladas
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nidsc <- rev(cumsum(rev(ni)))
Hidsc <- rev(cumsum(rev(hi)))
Tabla_dia_final <- data.frame(
Dia = Tabla_Dia$Dia,
ni = Tabla_Dia$Freq,
`hi (%)` = round(Tabla_Dia$hi, 2),
Ni_asc = Niasc,
`Hi_asc(%)` = round(Hiasc, 2),
Ni_dsc = Nidsc,
`Hi_dsc(%)` = round(Hidsc, 2)
)
# Totales
Tabla_dia_final$Dia <- as.character(Tabla_dia_final$Dia)
totales <- c(
Dia = "Total",
ni = sum(ni),
`hi (%)` = 100,
Ni_asc = "-",
`Hi_asc(%)` = "-",
Ni_dsc = "-",
`Hi_dsc(%)` = "-"
)
TDF_dias_total <- rbind(Tabla_dia_final, totales)
# Formato gt
library(gt)
tabla_dias_gt <- TDF_dias_total %>%
gt() %>%
tab_header(
title = md("*Tabla N°5*"),
subtitle = md("**Tabla de distribución
simples y acumuladas de los días del estudio**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Dia == "Total"
)
)
tabla_dias_gt
| Tabla N°5 |
| Tabla de distribución
simples y acumuladas de los días del estudio |
| Dia |
ni |
hi.... |
Ni_asc |
Hi_asc... |
Ni_dsc |
Hi_dsc... |
| 1 |
109 |
3.63 |
109 |
3.63 |
3000 |
100 |
| 2 |
99 |
3.3 |
208 |
6.93 |
2891 |
96.37 |
| 3 |
107 |
3.57 |
315 |
10.5 |
2792 |
93.07 |
| 4 |
92 |
3.07 |
407 |
13.57 |
2685 |
89.5 |
| 5 |
91 |
3.03 |
498 |
16.6 |
2593 |
86.43 |
| 6 |
96 |
3.2 |
594 |
19.8 |
2502 |
83.4 |
| 7 |
97 |
3.23 |
691 |
23.03 |
2406 |
80.2 |
| 8 |
99 |
3.3 |
790 |
26.33 |
2309 |
76.97 |
| 9 |
84 |
2.8 |
874 |
29.13 |
2210 |
73.67 |
| 10 |
103 |
3.43 |
977 |
32.57 |
2126 |
70.87 |
| 11 |
86 |
2.87 |
1063 |
35.43 |
2023 |
67.43 |
| 12 |
103 |
3.43 |
1166 |
38.87 |
1937 |
64.57 |
| 13 |
100 |
3.33 |
1266 |
42.2 |
1834 |
61.13 |
| 14 |
101 |
3.37 |
1367 |
45.57 |
1734 |
57.8 |
| 15 |
86 |
2.87 |
1453 |
48.43 |
1633 |
54.43 |
| 16 |
115 |
3.83 |
1568 |
52.27 |
1547 |
51.57 |
| 17 |
102 |
3.4 |
1670 |
55.67 |
1432 |
47.73 |
| 18 |
92 |
3.07 |
1762 |
58.73 |
1330 |
44.33 |
| 19 |
87 |
2.9 |
1849 |
61.63 |
1238 |
41.27 |
| 20 |
107 |
3.57 |
1956 |
65.2 |
1151 |
38.37 |
| 21 |
89 |
2.97 |
2045 |
68.17 |
1044 |
34.8 |
| 22 |
104 |
3.47 |
2149 |
71.63 |
955 |
31.83 |
| 23 |
81 |
2.7 |
2230 |
74.33 |
851 |
28.37 |
| 24 |
115 |
3.83 |
2345 |
78.17 |
770 |
25.67 |
| 25 |
114 |
3.8 |
2459 |
81.97 |
655 |
21.83 |
| 26 |
101 |
3.37 |
2560 |
85.33 |
541 |
18.03 |
| 27 |
114 |
3.8 |
2674 |
89.13 |
440 |
14.67 |
| 28 |
86 |
2.87 |
2760 |
92 |
326 |
10.87 |
| 29 |
95 |
3.17 |
2855 |
95.17 |
240 |
8 |
| 30 |
98 |
3.27 |
2953 |
98.43 |
145 |
4.83 |
| 31 |
47 |
1.57 |
3000 |
100 |
47 |
1.57 |
| Total |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
# Criterio de agrupación por semanas (1, 2, 3, 4)
Semana <- sapply(Dia, function(d) {
if (d >= 1 & d <= 7) {
1
} else if (d >= 8 & d <= 14) {
2
} else if (d >= 15 & d <= 21) {
3
} else {
4
}
})
Semana <- factor(Semana, levels = c(1, 2, 3, 4))
# Tabla de frecuencias por semanas numéricas
TDF_Semana <- table(Semana)
Tabla_Semana <- as.data.frame(TDF_Semana)
# Frecuencias relativas (porcentaje)
hi <- (Tabla_Semana$Freq / sum(Tabla_Semana$Freq)) * 100
ni <- Tabla_Semana$Freq
Tabla_Semana <- data.frame(Tabla_Semana, hi)
# Acumuladas
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nidsc <- rev(cumsum(rev(ni)))
Hidsc <- rev(cumsum(rev(hi)))
Tabla_semana_final <- data.frame(
Semana = Tabla_Semana$Semana,
ni = Tabla_Semana$Freq,
`hi (%)` = round(Tabla_Semana$hi, 2),
Ni_asc = Niasc,
`Hi_asc(%)` = round(Hiasc, 2),
Ni_dsc = Nidsc,
`Hi_dsc(%)` = round(Hidsc, 2)
)
# Totales
totales <- data.frame(
Semana = "Total",
ni = sum(ni),
`hi (%)` = 100,
Ni_asc = "-",
`Hi_asc(%)` = "-",
Ni_dsc = "-",
`Hi_dsc(%)` = "-",
stringsAsFactors = FALSE
)
TDF_semanas_total <- rbind(Tabla_semana_final, totales)
# Formato gt para semanas
tabla_semanas_gt <- TDF_semanas_total %>%
gt() %>%
tab_header(
title = md("*Tabla N°6*"),
subtitle = md("**Distribución de frecuencias de los días agrupados por semanas**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Semana == "Total"
)
)
tabla_semanas_gt
| Tabla N°6 |
| Distribución de frecuencias de los días agrupados por semanas |
| Semana |
ni |
hi.... |
Ni_asc |
Hi_asc... |
Ni_dsc |
Hi_dsc... |
| 1 |
691 |
23.03 |
691 |
23.03 |
3000 |
100 |
| 2 |
676 |
22.53 |
1367 |
45.57 |
2309 |
76.97 |
| 3 |
678 |
22.60 |
2045 |
68.17 |
1633 |
54.43 |
| 4 |
955 |
31.83 |
3000 |
100 |
955 |
31.83 |
| Total |
3000 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 3 |
# GRÁFICAS
# Frecuencia absoluta
barplot(Tabla_Semana$Freq,
main = "Gráfica N°14: Distribución de la contaminación
del suelo según la semana",
xlab = "Semana",
ylab = "Cantidad",
col = "skyblue",
ylim = c(0, max(Tabla_Semana$Freq)*1.1),
names.arg = Tabla_Semana$Semana,
las = 1,
cex.names = 1)

# Porcentaje
barplot(Tabla_Semana$hi,
main = "Gráfica N°15:Distribución porcentual de la contaminación
del suelo según la semana",
xlab = "Semana",
ylab = "%",
col = "lightgreen",
ylim = c(0, 100),
names.arg = Tabla_Semana$Semana,
las = 1,
cex.names = 1)

# Frecuencia absoluta global
barplot(Tabla_Semana$Freq,
main = "Gráfica N°16: Distribución de la contaminación
del suelo según la semana",
xlab = "Semana",
ylab = "Cantidad de registros",
col = "skyblue",
ylim = c(0, 3000),
names.arg = Tabla_Semana$Semana,
las = 1,
cex.names = 1)

# Porcentaje global
barplot(Tabla_Semana$hi,
main = "Gráfica N°17: Distribución porcentual de la contaminación
del suelo según la semana",
xlab = "Semana",
ylab = "%",
col = "lightgreen",
ylim = c(0, 100),
names.arg = Tabla_Semana$Semana,
las = 1,
cex.names = 1)

# Diagrama de caja
boxplot(as.numeric(Tabla_Semana$Semana),
horizontal = TRUE,
col = "brown",
main = "Gráfica N°18: Distribución segun la semana del Estudio de
Contaminación del Suelo",
xlab = "Semana")

# Ojiva acumulada
x_pos <- 1:length(Tabla_Semana$Semana)
plot(x_pos, Tabla_semana_final$Ni_dsc[1:4],
main = "Gráfica N°19: Distribución ascendente y descendente
de las semanas del estudio",
xlab = "Semana",
ylab = "Cantidad",
col = "orange",
type = "p",
lwd = 3,
xaxt="n")
lines(x_pos, Tabla_semana_final$Ni_asc[1:4],
col = "green",
type = "p",
lwd = 3)
axis(side = 1, at = x_pos, labels = Tabla_Semana$Semana, las = 1, cex.axis = 0.9)

# Ojiva porcentual
plot(x_pos, Tabla_semana_final$Hi_dsc.[1:4],
main = "Gráfica N°20: Distribución ascendente y descendente
de las semanas del estudio",
xlab = "Semana",
ylab = "%",
col = "red",
type = "p",
lwd = 2,
xaxt = "n")
lines(x_pos, Tabla_semana_final$Hi_asc.[1:4],
col = "blue",
type = "p",
lwd = 3)
# Personalizar etiquetas del eje X
axis(side = 1, at = x_pos, labels = Tabla_Semana$Semana, las = 1, cex.axis = 0.9)

# INDICADORES ESTADISTICOS
# Variable discreta: Semana (numérica)
Semana_num <- as.numeric(as.character(Semana))
# Media
media <- round(mean(Semana_num), 2)
# Moda
max_frecuencia <- max(Tabla_Semana$Freq)
moda <- Tabla_Semana$Semana[Tabla_Semana$Freq == max_frecuencia]
# Mediana
mediana <- median(Semana_num)
# Dispersión
varianza <- var(Semana_num)
sd <- sd(Semana_num)
cv <- round((sd / media) * 100, 2)
# Forma
library(e1071)
asimetria <- skewness(Semana_num, type = 2)
curtosis <- kurtosis(Semana_num)
# Tabla resumen
tabla_indicadores_semana <- data.frame(
Variable = "Semana",
Rango = paste0("[", min(Semana_num), " ; ", max(Semana_num), "]"),
X = media,
Me = mediana,
Mo = paste(moda, collapse = ", "),
V = round(varianza, 2),
Sd = round(sd, 2),
Cv = cv,
As = round(asimetria, 2),
K = round(curtosis, 2),
Valores_Atipicos = "No hay presencia de valores atípicos",
stringsAsFactors = FALSE
)
library(gt)
fila_semana <- which(tabla_indicadores_semana$Variable == "Semana")
tabla_indicadores_semana_gt <- tabla_indicadores_semana %>%
gt() %>%
tab_header(
title = md("*Tabla N°8*"),
subtitle = md("**Indicadores estadísticos de la variable Semana**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = fila_semana)
)
tabla_indicadores_semana_gt
| Tabla N°8 |
| Indicadores estadísticos de la variable Semana |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores_Atipicos |
| Semana |
[1 ; 4] |
2.63 |
3 |
4 |
1.33 |
1.15 |
43.85 |
-0.15 |
-1.42 |
No hay presencia de valores atípicos |
| Autor: Grupo 3 |