# ==================================================================
# UNIVERSIDAD CENTRAL DEL ECUADOR
# ANÁLISIS COMPLETO DE LA VARIABLE LONGITUD
# Sedimentos Marinos (2000-2024)
# Autor: Grace Vega - Grupo 2
# ==================================================================

# Ruta correcta (sin comillas curvas)
setwd("C:/Users/Grace/Favorites/Restudio (Estadistica)")

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)

# 1. Leer y limpiar datos
datos <- read.csv("Sedimentos Marinos.csv", header = TRUE, sep = ";", dec = ".")
longitud <- as.numeric(gsub("[^0-9.-]", "", datos$LONGITUDE))
## Warning: NAs introducidos por coerción
longitud <- na.omit(longitud)

# 2. Cálculos básicos (regla de Sturges)
n  <- length(longitud)
k  <- floor(1 + 3.3 * log10(n))
minimo <- min(longitud)
maximo <- max(longitud)
A  <- (maximo - minimo) / k

# 3. Límites y marca de clase
Li <- seq(minimo, maximo - A, by = A)
Ls <- seq(minimo + A, maximo + 1e-6, by = A)
MC <- (Li + Ls) / 2

# 4. Frecuencias
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
  if (i == length(Li)) {
    ni[i] <- sum(longitud >= Li[i])
  } else {
    ni[i] <- sum(longitud >= Li[i] & longitud < Ls[i])
  }
}

hi    <- round((ni / n) * 100, 4)
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 4)
Hidsc <- round(rev(cumsum(rev(hi))), 4)

# 5. Tabla completa
TDF <- data.frame(
  Li = round(Li, 6), 
  Ls = round(Ls, 6), 
  MC = round(MC, 6),
  ni, hi, Niasc, Nidsc, Hiasc, Hidsc
)

total <- data.frame(
  Li = "--", Ls = "--", MC = "--",
  ni = sum(ni), hi = 100,
  Niasc = "--", Nidsc = "--",
  Hiasc = "--", Hidsc = "--"
)

TDF <- rbind(TDF, total)

# 6. Tabla kable
cat("\n\n====================================================================\n")
## 
## 
## ====================================================================
cat("          TABLA Nº 1 - DISTRIBUCIÓN DE FRECUENCIAS DE LONGITUD        \n")
##           TABLA Nº 1 - DISTRIBUCIÓN DE FRECUENCIAS DE LONGITUD
cat("====================================================================\n")
## ====================================================================
cat("en sedimentos marinos (2000-2024)  |  Autor: Grace Vega - Grupo 2\n\n")
## en sedimentos marinos (2000-2024)  |  Autor: Grace Vega - Grupo 2
kable(
  TDF,
  col.names = c("Lím. Inf.", "Lím. Sup.", "Marca Clase", "ni", "hi (%)",
                "Ni ↑", "Ni ↓", "Hi ↑ (%)", "Hi ↓ (%)"),
  align = "c"
)
Lím. Inf. Lím. Sup. Marca Clase ni hi (%) Ni ↑ Ni ↓ Hi ↑ (%) Hi ↓ (%)
-939.165 -789.423833 -864.294417 261 11.5794 261 2254 11.5794 99.9999
-789.423833 -639.682667 -714.55325 1300 57.6752 1561 1993 69.2546 88.4205
-639.682667 -489.9415 -564.812083 0 0.0000 1561 693 69.2546 30.7453
-489.9415 -340.200333 -415.070917 19 0.8429 1580 693 70.0975 30.7453
-340.200333 -190.459167 -265.32975 4 0.1775 1584 674 70.275 29.9024
-190.459167 -40.718 -115.588583 512 22.7152 2096 670 92.9902 29.7249
-40.718 109.023167 34.152583 36 1.5972 2132 158 94.5874 7.0097
109.023167 258.764333 183.89375 0 0.0000 2132 122 94.5874 5.4125
258.764333 408.5055 333.634917 25 1.1091 2157 122 95.6965 5.4125
408.5055 558.246667 483.376083 77 3.4161 2234 97 99.1126 4.3034
558.246667 707.987833 633.11725 0 0.0000 2234 20 99.1126 0.8873
707.987833 857.729 782.858417 20 0.8873 2254 20 99.9999 0.8873
2254 100.0000
# 7. Gráficos
colores <- gray.colors(length(ni), start = 0.3, end = 0.9)

# Histogramas
hist(
  longitud,
  breaks = seq(minimo, maximo + 1e-6, A),
  right = FALSE,
  col = colores,
  main = "Gráfica 1 y 2 - Distribución absoluta de Longitud",
  xlab = "Longitud (grados)",
  ylab = "Frecuencia absoluta"
)

# Barplot
barplot(
  hi[-nrow(TDF)],
  space = 0,
  col = colores,
  names.arg = round(MC, 4),
  main = "Gráfica 3 y 4 - Distribución relativa de Longitud",
  xlab = "Longitud (grados)",
  ylab = "Porcentaje (%)",
  ylim = c(0, 100)
)

# Boxplot corregido
boxplot(
  longitud,
  horizontal = TRUE,
  col = "lightblue",
  main = "Gráfica 5 - Boxplot de Longitud",
  xlab = "Longitud (grados)"
)