This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

#Estadistica Descriptiva

#Alexander Sailema

#20/11/2025
library(gt)
library(dplyr)
#Cargar Datos

datos <- read.csv("C:/Users/Alexander/Downloads/soil_pollution_diseases (2).csv", sep=",", dec=".")

#Extracción Variable Cuantitativa Continua


Materia_Organica_S<- datos$Soil_Organic_Matter_.

#Manualmente
k<-1+(3.3*log10(3000))
k<-floor(k)
min<-min(Materia_Organica_S)
max<-max(Materia_Organica_S)
R<- max-min
A<-R/k
Li<-round(seq(from=min, to=max-A, by=A),4)
Ls<-round(seq(from=min+A, to=max, by=A), 4)
MC<-round((Li+Ls)/2,2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
  ni[i] <- sum(Materia_Organica_S>= Li[i] & Materia_Organica_S < Ls[i]) 
}
ni[length(Li)] <- sum(Materia_Organica_S >= Li[length(Li)] & Materia_Organica_S <= max)
sum(ni)
## [1] 3000
hi <- ni/sum(ni)*100

sum(hi)
## [1] 100
Niasc<-cumsum(ni)
Nidsc<-rev(cumsum(rev(ni)))
Hiasc<-round(cumsum(hi))
Hidsc<-round(rev(cumsum(rev(hi))))

TDFMateria_Organica_S <-data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)

total_ni<-sum(ni)
total_hi<-100

TDFMateria_Organica_SCompleto<-rbind(
  TDFMateria_Organica_S,
  data.frame(Li=" Total", Ls=" ", MC=" ",
             ni=total_ni, hi=total_hi, Niasc=" ", Nidsc=" ", 
             Hiasc=" ", Hidsc=" ")
)

#Formato tabla

#Tabla

library(gt)
library(dplyr)

# Redondeo opcional
TDFMateria_Organica_S$hi <- round(TDFMateria_Organica_S$hi, 2)

tabla_Materia_Organica_S<- TDFMateria_Organica_SCompleto %>%
  gt() %>%
  fmt_number(
    columns = hi,
    decimals = 2
  ) %>%
  
  tab_header(
    title = md("*Tabla Nº8*"),
    subtitle = md("**Tabla de Materia Organica Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Li == "Total"
    )
  )

tabla_Materia_Organica_S
Tabla Nº8
Tabla de Materia Organica Suelo
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
0.5 1.2917 0.9 280 9.33 280 3000 9 100
1.2917 2.0833 1.69 235 7.83 515 2720 17 91
2.0833 2.875 2.48 264 8.80 779 2485 26 83
2.875 3.6667 3.27 256 8.53 1035 2221 34 74
3.6667 4.4583 4.06 252 8.40 1287 1965 43 66
4.4583 5.25 4.85 250 8.33 1537 1713 51 57
5.25 6.0417 5.65 255 8.50 1792 1463 60 49
6.0417 6.8333 6.44 240 8.00 2032 1208 68 40
6.8333 7.625 7.23 236 7.87 2268 968 76 32
7.625 8.4167 8.02 236 7.87 2504 732 83 24
8.4167 9.2083 8.81 249 8.30 2753 496 92 17
9.2083 10 9.6 247 8.23 3000 247 100 8
Total 3000 100.00
Autor: Grupo3
#Histograma
histoP<-hist(
  Materia_Organica_S,
  main= "Distribución de la Materia Organica Suelo",
  xlab= "Precipitacion(mm)",
  ylab= "Cantidad", col="blue",
)

#Tabla simplificada en base al histograma
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites)-1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni/sum(ni)*100, 2)
sum(hi)
## [1] 100
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)

TDFM<-data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)


totalni <- sum(ni)
totalhi <- 100  

TDFMCompleto<-rbind(
  TDFM,
  data.frame(LimInf="Total",
             LimSup=" ", Mc=" ", ni=totalni,
             hi=totalhi, Ni_asc=" ", Ni_dsc=" ", 
             Hi_asc=" ", Hi_dsc=" ")
)

tablaMat<-TDFMCompleto %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº9*"),
    subtitle = md("**Tabla simplificada de Materia Organica Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor:Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )  %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = LimInf == "Total"
    )
  )

tablaMat
Tabla Nº9
Tabla simplificada de Materia Organica Suelo
LimInf LimSup Mc ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
0 1 0.5 185 6.17 185 3000 6.17 100
1 2 1.5 304 10.13 489 2815 16.3 93.83
2 3 2.5 329 10.97 818 2511 27.27 83.7
3 4 3.5 334 11.13 1152 2182 38.4 72.73
4 5 4.5 302 10.07 1454 1848 48.47 61.6
5 6 5.5 325 10.83 1779 1546 59.3 51.53
6 7 6.5 309 10.30 2088 1221 69.6 40.7
7 8 7.5 286 9.53 2374 912 79.13 30.4
8 9 8.5 308 10.27 2682 626 89.4 20.87
9 10 9.5 318 10.60 3000 318 100 10.6
Total 3000 100.00
Autor:Grupo 3
#Histograma

#Graficas
hist(
  Materia_Organica_S,
  breaks = seq(min, max, A),
  main = "Gráfica Nº30: Frecuencia de la Materia Organica Suelo (Local)",
  xlab = "Materia Organica Suelo",
  ylab = "Frecuencia",
  col = "#4A90E2",
  cex.main = 1.1,
  cex.lab = 1.1
)

#Gráficas
#Global
hist(
  Materia_Organica_S,
  breaks = seq(min, max, A),
  main = "Gráfica Nº31: Frecuencia de la Precipitación (mm) (Global)",
  xlab = "Precipitación (mm)",
  ylab = "Frecuencia",
  col = "green",
  ylim = c(0, 3000),
  cex.main = 1.1,
  cex.lab = 1.1
)

# Gráfica Local hi
barplot(
  TDFMateria_Organica_S$hi,
  space = 0,
  col = "skyblue",
  main = "Gráfica Nº32: Porcentaje de la Precipitación (mm) (Local)",
  xlab = "Precipitación (mm)",
  ylab = "Porcentaje (%)",
  names.arg = TDFMateria_Organica_S$MC,
  cex.names = 0.9,
  cex.main = 1.1,
  cex.lab = 1.1
)

# Gráfica Global hi

barplot(
  TDFMateria_Organica_S$hi,
  space = 0,
  col = "yellow",
  main = "Gráfica Nº33: Porcentaje de la Precipitación (mm)(Global)",
  xlab = "Precipitación (mm)",
  ylab = "Porcentaje (%)",
  names.arg = TDFMateria_Organica_S$MC,
  ylim = c(0, 100),
  cex.names = 0.9,
  cex.main = 1.1,
  cex.lab = 1.1
)

#caja

boxplot(
  Materia_Organica_S,
  horizontal = TRUE,
  col = "pink",
  main = "Grafica Nº34: Distribución de la Materia Organica Suelo",
  xlab = "Materia Organica Suelo"
)

# Ojivas de Frecuencia (Ascendente y Descendente)
plot(
  Li, Nidsc,
  main = "Gráfica Nº35: Distribución de Frecuencias Ascendente y Descendente 
  de la Materia Organica Suelo",
  xlab = "Materia Organica Suelo",
  ylab = "Cantidad",
  xlim = c(min, max),
  col = "red",
  cex.axis = 0.8,
  type = "o",
  lwd = 3,
  las = 1,
  xaxt = "n"
)

lines(
  Ls, Niasc,
  col = "green",
  type = "o",
  lwd = 3
)

axis(1, at = round(seq(min, max, length.out = 10), 0))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(
  Li, Hidsc,
  main = "Gráfica Nº36: Distribución Porcentual Ascendente y Descendente
  de la Materia Organica Suelo",
  xlab = "Materia Organica Suelo",
  ylab = "Porcentaje (%)",
  xlim = c(min, max),
  col = "red",
  type = "o",
  lwd = 2,
  xaxt = "n"
)

lines(
  Ls, Hiasc,
  col = "blue",
  type = "o",
  lwd = 3
)

axis(1, at = round(seq(min, max, length.out = 10), 0))

# INDICADORES


library(e1071)
library(gt)


# --- INDICADORES DE TENDENCIA CENTRAL ---

# Mediana
Me <- median(Materia_Organica_S)
Me
## [1] 5.13
# Media
X <- mean(Materia_Organica_S)
X
## [1] 5.175093
# Moda
Tabla_Mat <- as.data.frame(table(Materia_Organica_S))
max_frec <- max(Tabla_Mat$Freq)
moda <- Tabla_Mat$Materia_Organica_S[Tabla_Mat$Freq == max_frec]
moda
## [1] 2.5  5.87
## 913 Levels: 0.5 0.51 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6 0.61 0.62 ... 10
#Indicadores de Dispersión

# Varianza
V <-var(Materia_Organica_S)

# Desviación estandar
desv<-round(sd(Materia_Organica_S), 2)

# Coeficiente de variación 
CV <- (sd(Materia_Organica_S)/X)*100
CV
## [1] 53.36392
#Indicadores de Forma

# Coeficiente de Asimetría
library(e1071)
As <- skewness(Materia_Organica_S)
As
## [1] 0.03634696
# Curtosis
K <- kurtosis(Materia_Organica_S)
K
## [1] -1.195864
Variable <- "Materia Organica Suelo"
Rango <- c(paste0("[", min(Materia_Organica_S), " ; ", max(Materia_Organica_S), "]"))
Rango
## [1] "[0.5 ; 10]"
tabla_indicadores <- data.frame(
  Variable = "Materia Organica Suelo",
  Rango = Rango,
  Media = round(X, 3),
  Mediana = round(Me, 3),
  Moda = paste(moda, collapse = ", "),
  Varianza = round(V, 3),
  Desv_Estandar = round(desv, 3),
  CV = round(CV, 2),
  Asimetria = round(As, 3),
  Curtosis = round(K, 3)
)

tabla_indicadores
library(gt)
library(dplyr)
tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 10*"),
    subtitle = md("**Indicadores Estadísticos de Materia Organica Suelo **")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 10
**Indicadores Estadísticos de Materia Organica Suelo **
Variable Rango Media Mediana Moda Varianza Desv_Estandar CV Asimetria Curtosis
Materia Organica Suelo [0.5 ; 10] 5.175 5.13 2.5, 5.87 7.627 2.76 53.36 0.036 -1.196
Autor: Grupo 3