# ====================================================
#  Librerias 
# ====================================================
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)

# ====================================================
#  Cargar base de datos
# ====================================================
datos_nuevoartes <- read_excel("datos_nuevoartes.xlsx")


# ====================================================
#  VARIABLE: LONGITUDE
# ====================================================

longitude <- datos_nuevoartes$longitude
longitude <- longitude[!is.na(longitude)]

# ====================================================
# PARÁMETROS DE CLASIFICACIÓN
# ====================================================

k_long <- 12
n_long <- length(longitude)

min_long <- min(longitude)
max_long <- max(longitude)

R_long <- max_long - min_long
A_real <- R_long / k_long

# ====================================================
# AJUSTE DE AMPLITUD ("BONITA")
# ====================================================

A_long <- ifelse(
  A_real <= 2, 2,
  ifelse(
    A_real <= 5, 5,
    ifelse(
      A_real <= 10, 10,
      ceiling(A_real / 10) * 10
    )
  )
)

# ====================================================
# LÍMITES DE CLASE
# ====================================================

Li0 <- floor(min_long / A_long) * A_long

Li_long <- seq(Li0, by = A_long, length.out = k_long)
Ls_long <- Li_long + A_long

MC_long <- round((Li_long + Ls_long) / 2, 2)

# ====================================================
# FRECUENCIAS
# ====================================================

ni_long <- numeric(k_long)

for (i in 1:k_long) {
  if (i < k_long) {
    ni_long[i] <- sum(longitude >= Li_long[i] & longitude < Ls_long[i])
  } else {
    ni_long[i] <- sum(longitude >= Li_long[i] & longitude <= max_long)
  }
}

hi_long <- round((ni_long / sum(ni_long)) * 100, 2)

Ni_asc_long <- cumsum(ni_long)
Ni_dsc_long <- rev(cumsum(rev(ni_long)))

Hi_asc_long <- round(cumsum(hi_long), 2)
Hi_dsc_long <- round(rev(cumsum(rev(hi_long))), 2)

# ====================================================
# TABLA DE FRECUENCIAS
# ====================================================

TDF_longitude <- data.frame(
  Li = Li_long,
  Ls = Ls_long,
  MC = MC_long,
  ni = ni_long,
  hi = hi_long,
  Ni_asc = Ni_asc_long,
  Ni_dsc = Ni_dsc_long,
  Hi_asc = Hi_asc_long,
  Hi_dsc = Hi_dsc_long
)

TDF_longitude <- rbind(
  TDF_longitude,
  data.frame(
    Li = "TOTAL",
    Ls = "",
    MC = "",
    ni = sum(ni_long),
    hi = 100,
    Ni_asc = "",
    Ni_dsc = "",
    Hi_asc = "",
    Hi_dsc = ""
  )
)

# ====================================================
# FORMATO GT
# ====================================================

tabla_longitude <- TDF_longitude %>%
  gt() %>%
  fmt_number(columns = MC, decimals = 2) %>%
  tab_header(
    title = md("Tabla N° 1"),
    subtitle = md("Distribución de frecuencias de Longitude (12 clases)")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo Geología")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(rows = Li == "TOTAL")
  )

tabla_longitude
Tabla N° 1
Distribución de frecuencias de Longitude (12 clases)
Li Ls MC ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
-180 -150 -165 85 0.77 85 11033 0.77 100.02
-150 -120 -135 1893 17.16 1978 10948 17.93 99.25
-120 -90 -105 1221 11.07 3199 9055 29 82.09
-90 -60 -75 1526 13.83 4725 7834 42.83 71.02
-60 -30 -45 228 2.07 4953 6308 44.9 57.19
-30 0 -15 318 2.88 5271 6080 47.78 55.12
0 30 15 334 3.03 5605 5762 50.81 52.24
30 60 45 279 2.53 5884 5428 53.34 49.21
60 90 75 2034 18.44 7918 5149 71.78 46.68
90 120 105 1784 16.17 9702 3115 87.95 28.24
120 150 135 1083 9.82 10785 1331 97.77 12.07
150 180 165 248 2.25 11033 248 100.02 2.25
TOTAL 11033 100.00
Autor: Grupo Geología
# ====================================================
# HISTOGRAMAS – LONGITUDE
# ====================================================
# HISTOGRAMA LOCAL – ni
hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = TRUE,
  col = "grey",
  border = "black",
  main = "Histograma local de Longitude (ni)",
  xlab = "Longitude (°)",
  ylab = "Frecuencia absoluta (ni)"
)

# HISTOGRAMA GLOBAL – ni
hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = TRUE,
  col = "grey",
  border = "black",
  ylim = c(0, sum(ni_long)),
  main = "Histograma global de Longitude (ni)",
  xlab = "Longitude (°)",
  ylab = "Frecuencia absoluta acumulable"
)

# HISTOGRAMA LOCAL – hi (%)
hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = FALSE,                 # <- frecuencia relativa
  col = "grey",
  border = "black",
  main = "Histograma local de Longitude (hi)",
  xlab = "Longitude (°)",
  ylab = "Frecuencia relativa"
)

# HISTOGRAMA GLOBAL – hi (%)
hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = FALSE,                 # <- frecuencia relativa
  col = "grey",
  border = "black",
  ylim = c(0, max(hi_long) / 100 * 1.2),
  main = "Histograma global de Longitude (hi)",
  xlab = "Longitude (°)",
  ylab = "Frecuencia relativa"
)

# ==========================================
# OJIVA COMBINADA – LONGITUDE
# ==========================================

plot(
  Ls_long,
  Ni_asc_long,
  type = "o",
  pch = 19,
  col = "blue",
  ylim = c(0, max(Ni_asc_long)),
  main = "Ojiva combinada de Longitude",
  xlab = "Límites de clase (°)",
  ylab = "Frecuencia acumulada"
)

lines(
  Li_long,
  Ni_dsc_long,
  type = "o",
  pch = 17,
  col = "red"
)
legend(
  "right",
  legend = c("Ojiva ascendente (Ni ≤)", "Ojiva descendente (Ni ≥)"),
  col = c("blue", "red"),
  pch = c(19, 17),
  lty = 1,
  cex = 0.8,
  bty = "b"
)

# =============================
# DIAGRAMA DE CAJA – LONGITUDE
# =============================

boxplot(
  longitude,
  horizontal = TRUE,
  col = "grey",
  border = "black",
  main = "Diagrama de caja de Longitude (con outliers)",
  xlab = "Longitude (°)",
  outline = TRUE,        # asegura que se muestren los outliers
  pch = 19,              # símbolo de los outliers
  outcol = "red"         # color de los outliers
)

# ====================================================
# INDICADORES ESTADÍSTICOS
# ====================================================

ri <- -180
rs <-  180
x  <- mean(longitude)
Me <- median(longitude)

Mo <- as.numeric(
  names(sort(table(round(longitude, 1)), decreasing = TRUE)[1])
)

sd_long <- sd(longitude)
CV <- (sd_long / x) * 100

As <- mean((longitude - x)^3) / sd_long^3
K  <- mean((longitude - x)^4) / sd_long^4 - 3

TablaIndicadores_longitude <- data.frame(
  Var = "Longitude",
  ri = ri,
  rs = rs,
  x  = round(x, 2),
  Me = round(Me, 2),
  Mo = round(Mo, 2),
  sd = round(sd_long, 2),
  CV = round(CV, 2),
  As = round(As, 2),
  K  = round(K, 2)
)

tabla_longitude_indicadores <- TablaIndicadores_longitude %>%
  gt() %>%
  tab_header(
    title = md("Tabla N° X"),
    subtitle = md("Indicadores estadísticos de la variable Longitude")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo Geología")
  )

tabla_longitude_indicadores
Tabla N° X
Indicadores estadísticos de la variable Longitude
Var ri rs x Me Mo sd CV As K
Longitude -180 180 2.52 19.69 -122.3 100.91 4003.6 -0.06 -1.67
Autor: Grupo Geología
# ====================================================
# DETECCIÓN DE OUTLIERS – VARIABLE LONGITUDE (IQR)
# ====================================================
# =========================
# CUARTILES E IQR
# =========================

Q1 <- quantile(longitude, 0.25)
Q3 <- quantile(longitude, 0.75)
IQR_long <- Q3 - Q1

# =========================
# LÍMITES DE OUTLIERS
# =========================

minimo <- Q1 - 1.5 * IQR_long
maximo <- Q3 + 1.5 * IQR_long

# =========================
# IDENTIFICAR OUTLIERS
# =========================

outliers <- longitude[longitude < minimo | longitude > maximo]

n_outliers <- length(outliers)

# =========================
# TABLA DE OUTLIERS
# =========================

Tabla_outliers <- data.frame(
  Outliers = ifelse(n_outliers == 0, "No se detectan", n_outliers),
  minimo   = round(minimo, 2),
  máximo   = round(maximo, 2)
)

# =========================
# TABLA PLOTEADA (GT)
# =========================

tabla_outliers_gt <- Tabla_outliers %>%
  gt() %>%
  tab_header(
    title = md("Tabla 11.1"),
    subtitle = md("Detección de outliers – Variable Longitude (método IQR)")
  ) %>%
  cols_label(
    Outliers = "Outliers",
    minimo = "Límite inferior",
    máximo = "Límite superior"
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo Geología")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    column_labels.border.bottom.color = "black",
    heading.border.bottom.color = "black"
  )

tabla_outliers_gt
Tabla 11.1
Detección de outliers – Variable Longitude (método IQR)
Outliers Límite inferior Límite superior
No se detectan -410.6 396.68
Autor: Grupo Geología