# =========================
# CARGAR LIBRERÍA
# =========================

library(gt)

# =========================
# CARGAR DATOS
# =========================

datos <- read.csv(
  "waterPollution.csv",
  sep = ",",
  stringsAsFactors = FALSE
)

# =========================
# VARIABLE CUANTITATIVA CONTINUA
# =========================

composition_food_organic_waste_percent <- as.numeric(
  datos$composition_food_organic_waste_percent
)

# Eliminar datos vacíos

composition_food_organic_waste_percent <- na.omit(
  composition_food_organic_waste_percent
)

# =========================
# NÚMERO DE INTERVALOS
# =========================

k <- floor(
  1 + 3.3 * log10(
    length(composition_food_organic_waste_percent)
  )
)

# =========================
# VALORES BÁSICOS
# =========================

min_val <- min(
  composition_food_organic_waste_percent
)

max_val <- max(
  composition_food_organic_waste_percent
)

R <- max_val - min_val

A <- R / k

# =========================
# LÍMITES DE CLASE
# =========================

Li <- round(
  seq(
    from = min_val,
    to = max_val - A,
    by = A
  ),
  2
)

Ls <- round(
  seq(
    from = min_val + A,
    to = max_val,
    by = A
  ),
  2
)

# Marca de clase

MC <- round((Li + Ls) / 2, 2)

# =========================
# FRECUENCIA ABSOLUTA
# =========================

ni <- numeric(length(Li))

for(i in 1:length(Li)){
  
  if(i < length(Li)){
    
    ni[i] <- sum(
      composition_food_organic_waste_percent >= Li[i] &
      composition_food_organic_waste_percent < Ls[i]
    )
    
  } else {
    
    ni[i] <- sum(
      composition_food_organic_waste_percent >= Li[i] &
      composition_food_organic_waste_percent <= max_val
    )
  }
}

# =========================
# FRECUENCIA RELATIVA
# =========================

hi <- round((ni / sum(ni)) * 100, 2)

# =========================
# FRECUENCIAS ACUMULADAS
# =========================

Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))

Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

# =========================
# CREAR TABLA
# =========================

TDF_Organic <- data.frame(
  Li,
  Ls,
  MC,
  ni,
  hi,
  Niasc,
  Nidsc,
  Hiasc,
  Hidsc
)

# =========================
# AGREGAR TOTAL
# =========================

totales <- TDF_Organic[1, ]

totales[1, ] <- c(
  "TOTAL",
  "",
  "",
  sum(ni),
  100,
  "",
  "",
  "",
  ""
)

TDF_Organic <- rbind(TDF_Organic, totales)

# =========================
# NOMBRES COLUMNAS
# =========================

colnames(TDF_Organic) <- c(
  "Li",
  "Ls",
  "MC",
  "ni",
  "hi(%)",
  "Ni_asc",
  "Ni_desc",
  "Hi_asc(%)",
  "Hi_desc(%)"
)

# =========================
# TABLA FINAL
# =========================

TDF_Organic %>%
  
  gt() %>%
  
  tab_header(
    
    title = md("*Tabla N°1*"),
    
    subtitle = md(
      "**Distribución de frecuencias de la Composición de Desechos Orgánicos (%)**"
    )
  ) %>%
  
  tab_source_note(
    
    source_note = md("Autor: Grupo 3")
  ) %>%
  
  tab_style(
    
    style = cell_text(weight = "bold"),
    
    locations = cells_body(
      rows = Li == "TOTAL"
    )
  )
Tabla N°1
Distribución de frecuencias de la Composición de Desechos Orgánicos (%)
Li Ls MC ni hi(%) Ni_asc Ni_desc Hi_asc(%) Hi_desc(%)
12.78 16.08 14.43 370 1.86 370 19893 1.86 100
16.08 19.38 17.73 4001 20.11 4371 19523 21.97 98.14
19.38 22.68 21.03 0 0 4371 15522 21.97 78.03
22.68 25.99 24.34 493 2.48 4864 15522 24.45 78.03
25.99 29.29 27.64 22 0.11 4886 15029 24.56 75.55
29.29 32.59 30.94 10332 51.94 15218 15007 76.5 75.44
32.59 35.89 34.24 460 2.31 15678 4675 78.81 23.5
35.89 39.19 37.54 168 0.84 15846 4215 79.65 21.19
39.19 42.49 40.84 228 1.15 16074 4047 80.8 20.35
42.49 45.79 44.14 0 0 16074 3819 80.8 19.2
45.79 49.09 47.44 3223 16.2 19297 3819 97 19.2
49.09 52.4 50.75 0 0 19297 596 97 3
52.4 55.7 54.05 0 0 19297 596 97 3
55.7 59 57.35 117 0.59 19414 596 97.59 3
59 62.3 60.65 479 2.41 19893 479 100 2.41
TOTAL 19893 100
Autor: Grupo 3