1. CARGA DE DATOS Y LIBRERÍAS

# Cargar librerías
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)

# Cargar base de datos
datos_nuevoartes <- read_excel("datos_deslizamientos.xlsx")

2. EXTRAER LA VARIABLE

# Extraer la variable Longitude
longitude <- datos_nuevoartes$longitude

# Eliminar valores NA
longitude <- longitude[!is.na(longitude)]

3. CONTEO

# Número de clases
k_long <- 12

# Número de observaciones
n_long <- length(longitude)

# Valores mínimo y máximo
min_long <- min(longitude)
max_long <- max(longitude)

# Rango
R_long <- max_long - min_long

# Amplitud real
A_real <- R_long / k_long

# Ajuste de la amplitud

A_long <- ifelse(
  A_real <= 2, 2,
  ifelse(
    A_real <= 5, 5,
    ifelse(
      A_real <= 10, 10,
      ceiling(A_real/10)*10
    )
  )
)

# Límite inferior inicial

Li0 <- floor(min_long/A_long)*A_long

# Límites inferiores

Li_long <- seq(
  Li0,
  by = A_long,
  length.out = k_long
)

# Límites superiores

Ls_long <- Li_long + A_long

# Marcas de clase

MC_long <- round((Li_long + Ls_long)/2,2)

# Frecuencia absoluta

ni_long <- numeric(k_long)

for(i in 1:k_long){

  if(i < k_long){

    ni_long[i] <- sum(
      longitude >= Li_long[i] &
      longitude < Ls_long[i]
    )

  }else{

    ni_long[i] <- sum(
      longitude >= Li_long[i] &
      longitude <= max_long
    )

  }

}

# Frecuencia relativa

hi_long <- round((ni_long/sum(ni_long))*100,2)

# Frecuencias acumuladas

Ni_asc_long <- cumsum(ni_long)
Ni_dsc_long <- rev(cumsum(rev(ni_long)))

Hi_asc_long <- round(cumsum(hi_long),2)
Hi_dsc_long <- round(rev(cumsum(rev(hi_long))),2)

4. TABLA DE FRECUENCIAS

4.1. Cálculo de la tabla

TDF_longitude <- data.frame(
  Li = Li_long,
  Ls = Ls_long,
  MC = MC_long,
  ni = ni_long,
  hi = hi_long,
  Ni_asc = Ni_asc_long,
  Ni_dsc = Ni_dsc_long,
  Hi_asc = Hi_asc_long,
  Hi_dsc = Hi_dsc_long
)

# Agregar fila de totales

TDF_longitude <- rbind(
  TDF_longitude,
  data.frame(
    Li = "TOTAL",
    Ls = "",
    MC = "",
    ni = sum(ni_long),
    hi = 100,
    Ni_asc = "",
    Ni_dsc = "",
    Hi_asc = "",
    Hi_dsc = ""
  )
)

4.2. Presentación de la tabla

tabla_longitude <- TDF_longitude %>%
  gt() %>%
  fmt_number(
    columns = MC,
    decimals = 2
  ) %>%
  tab_header(
    title = md("**Tabla N° 1**"),
    subtitle = md("Distribución de frecuencias de la variable Longitude")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo Geología")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Li == "TOTAL"
    )
  )

tabla_longitude
Tabla N° 1
Distribución de frecuencias de la variable Longitude
Li Ls MC ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
-180 -150 -165 85 0.77 85 11033 0.77 100.02
-150 -120 -135 1893 17.16 1978 10948 17.93 99.25
-120 -90 -105 1221 11.07 3199 9055 29 82.09
-90 -60 -75 1526 13.83 4725 7834 42.83 71.02
-60 -30 -45 228 2.07 4953 6308 44.9 57.19
-30 0 -15 318 2.88 5271 6080 47.78 55.12
0 30 15 334 3.03 5605 5762 50.81 52.24
30 60 45 279 2.53 5884 5428 53.34 49.21
60 90 75 2034 18.44 7918 5149 71.78 46.68
90 120 105 1784 16.17 9702 3115 87.95 28.24
120 150 135 1083 9.82 10785 1331 97.77 12.07
150 180 165 248 2.25 11033 248 100.02 2.25
TOTAL 11033 100.00
Autor: Grupo Geología

5. GRÁFICAS

5.1 Histograma de frecuencias absolutas (ni)

hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = TRUE,
  col = "grey80",
  border = "black",
  main = "Histograma de la variable Longitude",
  xlab = "Longitude (°)",
  ylab = "Frecuencia absoluta"
)


5.2 Histograma de frecuencias relativas (hi)

hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  right = FALSE,
  freq = FALSE,
  col = "grey80",
  border = "black",
  main = "Histograma relativo de Longitude",
  xlab = "Longitude (°)",
  ylab = "Frecuencia relativa"
)


5.3 Histograma con boxplot superpuesto

par(mar = c(5,4,3,2))

hist(
  longitude,
  breaks = c(Li_long, max(Ls_long)),
  freq = TRUE,
  right = FALSE,
  col = "grey85",
  border = "black",
  main = "Histograma y Boxplot de Longitude",
  xlab = "Longitude (°)",
  ylab = "Frecuencia"
)

boxplot(
  longitude,
  horizontal = TRUE,
  add = TRUE,
  at = par("usr")[4]*0.96,
  boxwex = par("usr")[4]*0.04,
  axes = FALSE,
  outline = TRUE,
  col = "lightblue",
  border = "blue",
  whisklty = 1,
  staplelty = 1,
  medcol = "red"
)


5.4 Ojivas

plot(
  Ls_long,
  Ni_asc_long,
  type = "o",
  pch = 19,
  col = "blue",
  ylim = c(0,max(Ni_asc_long)),
  main = "Ojivas de la variable Longitude",
  xlab = "Límite de clase",
  ylab = "Frecuencia acumulada"
)

lines(
  Li_long,
  Ni_dsc_long,
  type = "o",
  pch = 17,
  col = "red"
)

legend(
  "right",
  legend = c(
    "Ojiva ascendente",
    "Ojiva descendente"
  ),
  col = c("blue","red"),
  pch = c(19,17),
  lty = 1,
  bty = "n"
)

6. INDICADORES

6.1 Cálculo de indicadores estadísticos

# Límites teóricos de la variable
ri <- -180
rs <- 180

# Medidas de tendencia central
x <- mean(longitude)
Me <- median(longitude)

Mo <- as.numeric(
  names(sort(table(round(longitude,1)),
             decreasing = TRUE)[1])
)

# Medidas de dispersión
sd_long <- sd(longitude)
CV <- (sd_long/x)*100

# Forma de la distribución
As <- mean((longitude-x)^3)/sd_long^3
K <- mean((longitude-x)^4)/sd_long^4 - 3

6.2 Tabla de indicadores

TablaIndicadores_longitude <- data.frame(

  Variable = "Longitude",

  ri = ri,

  rs = rs,

  Media = round(x,2),

  Mediana = round(Me,2),

  Moda = round(Mo,2),

  Desv_Est = round(sd_long,2),

  CV = round(CV,2),

  Asimetria = round(As,2),

  Curtosis = round(K,2)

)

tabla_indicadores <- TablaIndicadores_longitude %>%

  gt() %>%

  tab_header(

    title = md("**Tabla N° 2**"),

    subtitle = md("Indicadores estadísticos de la variable Longitude")

  ) %>%

  tab_source_note(

    source_note = md("Autor: Grupo Geología")

  )

tabla_indicadores
Tabla N° 2
Indicadores estadísticos de la variable Longitude
Variable ri rs Media Mediana Moda Desv_Est CV Asimetria Curtosis
Longitude -180 180 2.52 19.69 -122.3 100.91 4003.6 -0.06 -1.67
Autor: Grupo Geología

6.3 Detección de outliers (Método IQR)

Q1 <- quantile(longitude,0.25)

Q3 <- quantile(longitude,0.75)

IQR_long <- IQR(longitude)

Limite_inferior <- Q1 - 1.5*IQR_long

Limite_superior <- Q3 + 1.5*IQR_long

outliers <- longitude[
  longitude < Limite_inferior |
  longitude > Limite_superior
]

n_outliers <- length(outliers)

6.4 Tabla de outliers

Tabla_outliers <- data.frame(

  Outliers = n_outliers,

  Limite_inferior = round(Limite_inferior,2),

  Limite_superior = round(Limite_superior,2)

)

tabla_outliers <- Tabla_outliers %>%

  gt() %>%

  tab_header(

    title = md("**Tabla N° 3**"),

    subtitle = md("Detección de valores atípicos (Método IQR)")

  ) %>%

  tab_source_note(

    source_note = md("Autor: Grupo Geología")

  )

tabla_outliers
Tabla N° 3
Detección de valores atípicos (Método IQR)
Outliers Limite_inferior Limite_superior
0 -410.6 396.68
Autor: Grupo Geología