UNIVERSIDAD CENTRAL DEL ECUADOR

ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 06/12/2025

#Estadistica Descriptiva


#6/12/2025

datos<-read.csv("soil_pollution_diseases.csv",header = TRUE,dec = ".",
                sep = ",")


#Extracción Variable Cuantitativa Continua

Temperatura <- datos$Temperature_C

min <-min(Temperatura)
max <-max(Temperatura)
R <-max-min
K <- floor(1+3.33*log10(length(Temperatura)))
A <-R/K

Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2

ni<-c()
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(subset(Temperatura, Temperatura >= Li[i] & Temperatura < Ls[i]))
  } else {
    ni[i] <- length(subset(Temperatura, Temperatura >= Li[i] & Temperatura <= Ls[i]))
  }
}

sum(ni)
## [1] 3000
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))

TDF_Temperatura <- data.frame(
  Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)

colnames(TDF_Temperatura) <- c("Li","Ls","Mc","ni","hi","Ni_asc","Ni_desc","Hi_asc(%)","Hi_desc(%)")

#Crear fila de totales

totales<-c(
  Li="TOTAL",
  Ls="-",
  Mc="-",
  ni=sum(ni),
  hi=sum(hi),
  Ni_asc="-",
  Ni_desc="-",
  Hi_asc="-",
  Hi_desc="-")

TDF_Temperatura <-rbind(TDF_Temperatura,totales)

library(dplyr)
library(gt)

TDF_Temperatura %>%
  gt() %>%
  tab_header(
    title = md("Tabla Nro. 3"),
    subtitle = md("*Tabla de distribución de la Temperatura (°C) del Suelo*")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 3
Tabla de distribución de la Temperatura (°C) del Suelo
Li Ls Mc ni hi Ni_asc Ni_desc Hi_asc(%) Hi_desc(%)
10 12.92 11.46 258 8.6 258 3000 8.6 100
12.92 15.83 14.375 266 8.87 524 2742 17.47 91.4
15.83 18.75 17.29 251 8.37 775 2476 25.83 82.53
18.75 21.67 20.21 241 8.03 1016 2225 33.87 74.17
21.67 24.58 23.125 262 8.73 1278 1984 42.6 66.13
24.58 27.5 26.04 260 8.67 1538 1722 51.27 57.4
27.5 30.42 28.96 234 7.8 1772 1462 59.07 48.73
30.42 33.33 31.875 257 8.57 2029 1228 67.63 40.93
33.33 36.25 34.79 253 8.43 2282 971 76.07 32.37
36.25 39.17 37.71 249 8.3 2531 718 84.37 23.93
39.17 42.08 40.625 218 7.27 2749 469 91.63 15.63
42.08 45 43.54 251 8.37 3000 251 100 8.37
TOTAL - - 3000 100 - - - -
Autor: Grupo 3
# Histograma
histoT <- hist(
  Temperatura,
  main = "Gráfica Nº17: Distribución de la Temperatura",
  xlab = "Temperatura (°C)",
  ylab = "Cantidad",
  col = "blue"
)

#Simplificación con el histograma

Hist_Temperatura<-hist(Temperatura,breaks = 8,plot = F)
k<-length(Hist_Temperatura$breaks)
Li<-Hist_Temperatura$breaks[1:(length(Hist_Temperatura$breaks)-1)]
Ls<-Hist_Temperatura$breaks[2:length(Hist_Temperatura$breaks)]
ni<-Hist_Temperatura$counts
sum(ni)
## [1] 3000
Mc<-Hist_Temperatura$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Temperatura<-data.frame(Li=round(Li,2),
                            Ls=round(Ls,2),
                            Mc=round(Mc,2),
                            ni=ni,
                            hi=round(hi*100,2),
                            Ni_asc=Ni_asc,
                            Ni_desc=Ni_desc,
                            Hi_asc=round(Hi_asc*100,2),
                            Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Temperatura)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")

#Crear fila de totales
totales<-c(Li="TOTAL",
           Ls="-",
           Mc="-",
           ni = sum(as.numeric(TDF_Temperatura$ni)),
           hi = sum(as.numeric(TDF_Temperatura$hi)),
           Ni_asc="-",
           Ni_desc="-",
           Hi_asc="-",
           Hi_desc="-")

TDF_Temperatura<-rbind(TDF_Temperatura,totales)
library(dplyr)
library(gt)

TDF_Temperatura %>%
  gt() %>%
  tab_header(
    title = md("Tabla Nro. 4"),
    subtitle = md("*Tabla Simplificada de distribución de la Temperatura del Suelo*")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 4
Tabla Simplificada de distribución de la Temperatura del Suelo
Lim inf Lim sup MC ni hi(%) Ni asc Ni desc Hi asc(%) Hi desc(%)
10 15 12.5 448 14.93 448 3000 14.93 100
15 20 17.5 451 15.03 899 2552 29.97 85.07
20 25 22.5 429 14.3 1328 2101 44.27 70.03
25 30 27.5 417 13.9 1745 1672 58.17 55.73
30 35 32.5 428 14.27 2173 1255 72.43 41.83
35 40 37.5 414 13.8 2587 827 86.23 27.57
40 45 42.5 413 13.77 3000 413 100 13.77
TOTAL - - 3000 100 - - - -
Autor: Grupo 3
#Gráficas


hist(Temperatura, breaks = 10,
     main = "Gráfica N°18 Distribución para la Temperatura del suelo ",
     xlab = "Temperatura °C",
     ylab = "Cantidad",
     ylim = c(0,max(ni)),
     col = "yellow",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")

axis(1, at = Hist_Temperatura$breaks,
     labels = Hist_Temperatura$breaks, las = 1,
     cex.axis = 0.9)

hist(Temperatura, breaks = 10,
     main = "Gráfica N°19: Distribución de la Temperatura (°C) del Suelo",
     xlab = "Temperatura °C",
     ylab = "Cantidad",
     ylim = c(0, length(Temperatura)),
     col = "green",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = Hist_Temperatura$breaks,
     labels = Hist_Temperatura$breaks, las = 1,
     cex.axis = 0.9)

TDF_Temperatura$hi <- as.numeric(TDF_Temperatura$hi)
datos_grafico <- subset(TDF_Temperatura, !(MC %in% c("-", "TOTAL")))


barplot(datos_grafico$hi,
        space = 0,
        col = "blue",
        main = "Gráfica N°20: Distribución porcentual de la Temperatura del suelo",
        xlab = "Temperatura °C",
        ylab = "Porcentaje (%)",
        names.arg = datos_grafico$MC,
        ylim = c(0, 20))

barplot(datos_grafico$hi,
        space = 0,
        col = "skyblue",
        main = "Gráfica N°21: Distribución porcentual de la Temperatura del suelo",
        xlab = "Temperatura °C",
        ylab = "Porcentaje (%)",
        names.arg = datos_grafico$MC,
        ylim = c(0, 100))

# Boxplot 
boxplot(
  Temperatura_out,
  horizontal = TRUE,
  col = "pink",
  main = "Gráfica Nº22: Distribución de la Temperatura",
  xlab = "pH",
  outline = TRUE,
  pch = 19
)

# Ojivas
plot(
  Li, Ni_desc,
  main = "Gráfica Nº23: Distribución Ascendente y Descendente de la Temperatura",
  xlab = "Temperatura (°C)",
  ylab = "Cantidad",
  xlim = c(0, 100),
  col = "red",
  type = "o",
  lwd = 3
)

lines(
  Ls, Ni_asc,
  col = "green",
  type = "o",
  lwd = 3
)

# Ojiva Porcentual
plot(
  Li, Hi_desc,
  main = "Gráfica Nº24: Distribución Ascendente y Descendente de la Temperatura",
  xlab = "Temperatura (°C)",
  ylab = "Porcentaje (%)",
  xlim = c(0, 100),
  col = "red",
  type = "o",
  lwd = 2
)

lines(
  Ls, Hi_asc,
  col = "blue",
  type = "o",
  lwd = 3
)

# INDICADORES ESTADISTICOS

# Indicadores de Tendencia Central

# Media aritmética
media <- round(mean(Temperatura), 0)
media
## [1] 27
# Moda
# Moda
max_frecuencia <- max(TDF_Temperatura$ni)
moda <- TDF_Temperatura$MC[TDF_Temperatura$ni == max_frecuencia]
moda
## [1] "17.5"
# Mediana
mediana <- median(Temperatura)
mediana
## [1] 27
# INDICADORES DE DISPERSIÓN #
# Desviación Estándar

# Varianza
 varianza <- var(Temperatura)
 varianza
## [1] 101.6987
sd <- sd(Temperatura)
sd
## [1] 10.08458
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 37.35
# INDICADORES DE FORMA #

# Coeficiente deAsimetría
library("e1071")
asimetria <- skewness(Temperatura, type = 2)
asimetria
## [1] 0.03241807
#Curtosis
curtosis <- kurtosis(Temperatura)
curtosis
## [1] -1.200746
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
  "Variable" = c("Temperatura"),
  "Rango" = c(paste0("[", min(Temperatura), " ; ", max(Temperatura), "]")),
  "X" = c(round(media, 0)),
  "Me" = c(round(mediana, 0)),
  "Mo" = c(paste(moda, collapse = ", ")),
  "V" =  c(round(varianza,2)),
  "Sd" = c(round(sd, 0)),
  "Cv" = c(cv),
  "As" = c(round(asimetria, 2)),
  "K" = c(round(curtosis, 2)),
  "Valores Atípicos" = "-"
)

library(gt)

tabla_indicadores_gt <- tabla_indicadores %>% 
  gt() %>% 
  tab_header(
    title = md("Tabla N°18.1"),
    subtitle = md("*Indicadores estadísticos de la variable Temperatura*")
  ) %>% 
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>% 
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black",
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black"
  ) %>% 
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Variable == "Temperatura"
    )
  )

tabla_indicadores_gt
Tabla N°18.1
Indicadores estadísticos de la variable Temperatura
Variable Rango X Me Mo V Sd Cv As K Valores.Atípicos
Temperatura [10 ; 45] 27 27 17.5 101.7 10 37.35 0.03 -1.2 -
Autor: Grupo 3