# =========================
# CARGAR LIBRERÍA
# =========================

library(gt)

# =========================
# CARGAR DATOS
# =========================

datos <- read.csv(
  "waterPollution.csv",
  sep = ",",
  stringsAsFactors = FALSE
)

# =========================
# VARIABLE CUANTITATIVA CONTINUA
# =========================

composition_food_organic_waste_percent <- as.numeric(
  datos$composition_food_organic_waste_percent
)

# Eliminar datos vacíos

composition_food_organic_waste_percent <- na.omit(
  composition_food_organic_waste_percent
)

# =========================
# NÚMERO DE INTERVALOS
# =========================

k <- 9

# =========================
# VALORES BÁSICOS
# =========================

min_val <- min(
  composition_food_organic_waste_percent
)

max_val <- max(
  composition_food_organic_waste_percent
)

R <- max_val - min_val

A <- R / k

# =========================
# LÍMITES DE CLASE
# =========================

Li <- round(
  seq(
    from = min_val,
    to = max_val - A,
    by = A
  ),
  2
)

Ls <- round(
  seq(
    from = min_val + A,
    to = max_val,
    by = A
  ),
  2
)

# Marca de clase

MC <- round((Li + Ls) / 2, 2)

# =========================
# FRECUENCIA ABSOLUTA
# =========================

ni <- numeric(length(Li))

for(i in 1:length(Li)){
  
  if(i < length(Li)){
    
    ni[i] <- sum(
      composition_food_organic_waste_percent >= Li[i] &
      composition_food_organic_waste_percent < Ls[i]
    )
    
  } else {
    
    ni[i] <- sum(
      composition_food_organic_waste_percent >= Li[i] &
      composition_food_organic_waste_percent <= max_val
    )
  }
}

# =========================
# FRECUENCIA RELATIVA
# =========================

hi <- (ni / sum(ni)) * 100

# =========================
# FRECUENCIAS ACUMULADAS
# =========================

Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))

Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

# Forzar acumulados a 100

Hiasc[length(Hiasc)] <- 100
Hidsc[1] <- 100

# Redondear hi para mostrar

hi <- round(hi, 2)

# =========================
# CREAR TABLA
# =========================

TDF_Organic <- data.frame(
  Li,
  Ls,
  MC,
  ni,
  hi,
  Niasc,
  Nidsc,
  Hiasc,
  Hidsc
)

# =========================
# AGREGAR TOTAL
# =========================

totales <- TDF_Organic[1, ]

totales[1, ] <- c(
  "TOTAL",
  "",
  "",
  sum(ni),
  100,
  "",
  "",
  "",
  ""
)

TDF_Organic <- rbind(TDF_Organic, totales)

# =========================
# NOMBRES COLUMNAS
# =========================

colnames(TDF_Organic) <- c(
  "Li",
  "Ls",
  "MC",
  "ni",
  "hi(%)",
  "Ni_asc",
  "Ni_desc",
  "Hi_asc(%)",
  "Hi_desc(%)"
)

# =========================
# TABLA FINAL
# =========================

TDF_Organic %>%
  
  gt() %>%
  
  tab_header(
    
    title = md("*Tabla N°1*"),
    
    subtitle = md(
      "**Distribución de frecuencias de la Composición de Desechos Orgánicos (%)**"
    )
  ) %>%
  
  tab_source_note(
    
    source_note = md("Autor: Grupo 3")
  ) %>%
  
  tab_style(
    
    style = cell_text(weight = "bold"),
    
    locations = cells_body(
      rows = Li == "TOTAL"
    )
  )
Tabla N°1
Distribución de frecuencias de la Composición de Desechos Orgánicos (%)
Li Ls MC ni hi(%) Ni_asc Ni_desc Hi_asc(%) Hi_desc(%)
12.78 18.28 15.53 4371 21.97 4371 19893 21.97 100
18.28 23.78 21.03 171 0.86 4542 15522 22.83 78.03
23.78 29.29 26.54 344 1.73 4886 15351 24.56 77.17
29.29 34.79 32.04 10433 52.45 15319 15007 77.01 75.44
34.79 40.29 37.54 527 2.65 15846 4574 79.66 22.99
40.29 45.79 43.04 228 1.15 16074 4047 80.8 20.34
45.79 51.3 48.55 3223 16.2 19297 3819 97 19.2
51.3 56.8 54.05 117 0.59 19414 596 97.59 3
56.8 62.3 59.55 479 2.41 19893 479 100 2.41
TOTAL 19893 100
Autor: Grupo 3