CARGA DE DATOS Y LIBRERÍAS

# CARGA DE DATOS 
datos <- read.csv("C:/Users/Grace/OneDrive - Universidad Central del Ecuador/Documentos/dataset_geologico_limpio_80.csv",
                  header = TRUE,
                  sep = ",",
                  dec = ".",
                  stringsAsFactors = FALSE)

# Extraer variable Arcilla
arcilla_raw <- as.numeric(gsub("[^0-9.-]", "", datos$CLAY_PCT))

arcilla <- na.omit(arcilla_raw)
arcilla <- arcilla[arcilla >= 0 & arcilla <= 100]

n <- length(arcilla)

cat("Tamaño de muestra:", n)
## Tamaño de muestra: 27240
# CARGA DE LIBRERÍAS
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(e1071)
library(moments)
## 
## Adjuntando el paquete: 'moments'
## The following objects are masked from 'package:e1071':
## 
##     kurtosis, moment, skewness

TABLA DE DISTRIBUCIÓN DE FRECUENCIA

Agrupación por la regla de Sturges

minimo <- min(arcilla)
maximo <- max(arcilla)

R <- maximo - minimo

k <- floor(1 + 3.3 * log10(n))

A <- R / k

Li <- round(seq(minimo, maximo - A + 1e-6, by = A),2)

Ls <- round(Li + A,2)

Ls[length(Ls)] <- maximo

MC <- round((Li + Ls)/2,2)

# FRECUENCIAS
ni <- numeric(length(Li))

for(i in 1:length(Li)){
  
  if(i == length(Li)){
    ni[i] <- sum(arcilla >= Li[i] & arcilla <= Ls[i])
  } else {
    ni[i] <- sum(arcilla >= Li[i] & arcilla < Ls[i])
  }
}

total <- sum(ni)

hi <- round((ni/total)*100,2)

Ni_Asc <- cumsum(ni)

Hi_Asc <- cumsum(hi)

Ni_Desc <- rev(cumsum(rev(ni)))

Hi_Desc <- rev(cumsum(rev(hi)))

# TABLA DE DISTRIBUCIÓN
tabla_final <- data.frame(
  Limite_Inferior = Li,
  Limite_Superior = Ls,
  Marca_Clase = MC,
  ni = ni,
  hi = hi,
  Ni_Asc = Ni_Asc,
  Hi_Asc = Hi_Asc,
  Ni_Desc = Ni_Desc,
  Hi_Desc = Hi_Desc
)

suma_ni <- sum(tabla_final$ni)
suma_hi <- sum(tabla_final$hi)

tabla_final
##    Limite_Inferior Limite_Superior Marca_Clase    ni    hi Ni_Asc Hi_Asc
## 1             0.00            6.31        3.15 14156 51.98  14156  51.98
## 2             6.31           12.62        9.46  2706  9.94  16862  61.92
## 3            12.63           18.94       15.79  2536  9.31  19398  71.23
## 4            18.94           25.25       22.09  1725  6.33  21123  77.56
## 5            25.26           31.57       28.41  1437  5.28  22560  82.84
## 6            31.57           37.88       34.73  1325  4.87  23885  87.71
## 7            37.88           44.19       41.03  1037  3.81  24922  91.52
## 8            44.20           50.51       47.36   680  2.50  25602  94.02
## 9            50.51           56.82       53.66   457  1.68  26059  95.70
## 10           56.83           63.14       59.98   371  1.36  26430  97.06
## 11           63.14           69.45       66.30   298  1.09  26728  98.15
## 12           69.45           75.76       72.61   250  0.92  26978  99.07
## 13           75.77           82.08       78.92   162  0.59  27140  99.66
## 14           82.08           88.39       85.24    74  0.27  27214  99.93
## 15           88.40           94.71       91.56    19  0.07  27233 100.00
##    Ni_Desc Hi_Desc
## 1    27233  100.00
## 2    13077   48.02
## 3    10371   38.08
## 4     7835   28.77
## 5     6110   22.44
## 6     4673   17.16
## 7     3348   12.29
## 8     2311    8.48
## 9     1631    5.98
## 10    1174    4.30
## 11     803    2.94
## 12     505    1.85
## 13     255    0.93
## 14      93    0.34
## 15      19    0.07

# FILA TOTAL
fila_total <- data.frame(
  Limite_Inferior = "TOTAL",
  Limite_Superior = "",
  Marca_Clase = "",
  ni = suma_ni,
  hi = suma_hi,
  Ni_Asc = "-",
  Hi_Asc = "-",
  Ni_Desc = "-",
  Hi_Desc = "-"
)

tabla_final <- rbind(tabla_final, fila_total)

tabla_final
##    Limite_Inferior Limite_Superior Marca_Clase    ni     hi Ni_Asc Hi_Asc
## 1                0            6.31        3.15 14156  51.98  14156  51.98
## 2             6.31           12.62        9.46  2706   9.94  16862  61.92
## 3            12.63           18.94       15.79  2536   9.31  19398  71.23
## 4            18.94           25.25       22.09  1725   6.33  21123  77.56
## 5            25.26           31.57       28.41  1437   5.28  22560  82.84
## 6            31.57           37.88       34.73  1325   4.87  23885  87.71
## 7            37.88           44.19       41.03  1037   3.81  24922  91.52
## 8             44.2           50.51       47.36   680   2.50  25602  94.02
## 9            50.51           56.82       53.66   457   1.68  26059   95.7
## 10           56.83           63.14       59.98   371   1.36  26430  97.06
## 11           63.14           69.45        66.3   298   1.09  26728  98.15
## 12           69.45           75.76       72.61   250   0.92  26978  99.07
## 13           75.77           82.08       78.92   162   0.59  27140  99.66
## 14           82.08           88.39       85.24    74   0.27  27214  99.93
## 15            88.4           94.71       91.56    19   0.07  27233    100
## 16           TOTAL                             27233 100.00      -      -
##    Ni_Desc Hi_Desc
## 1    27233     100
## 2    13077   48.02
## 3    10371   38.08
## 4     7835   28.77
## 5     6110   22.44
## 6     4673   17.16
## 7     3348   12.29
## 8     2311    8.48
## 9     1631    5.98
## 10    1174     4.3
## 11     803    2.94
## 12     505    1.85
## 13     255    0.93
## 14      93    0.34
## 15      19    0.07
## 16       -       -

# TABLA DE DISTRIBUCIÓN FORMATO PROFESIONAL
TablaArcilla <- tabla_final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla Nº1**"),
    subtitle = md("Distribución de frecuencias de la variable Arcilla (%) en sedimentos marinos")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    table_body.hlines.color = "gray"
  )

TablaArcilla
Tabla Nº1
Distribución de frecuencias de la variable Arcilla (%) en sedimentos marinos
Limite_Inferior Limite_Superior Marca_Clase ni hi Ni_Asc Hi_Asc Ni_Desc Hi_Desc
0 6.31 3.15 14156 51.98 14156 51.98 27233 100
6.31 12.62 9.46 2706 9.94 16862 61.92 13077 48.02
12.63 18.94 15.79 2536 9.31 19398 71.23 10371 38.08
18.94 25.25 22.09 1725 6.33 21123 77.56 7835 28.77
25.26 31.57 28.41 1437 5.28 22560 82.84 6110 22.44
31.57 37.88 34.73 1325 4.87 23885 87.71 4673 17.16
37.88 44.19 41.03 1037 3.81 24922 91.52 3348 12.29
44.2 50.51 47.36 680 2.50 25602 94.02 2311 8.48
50.51 56.82 53.66 457 1.68 26059 95.7 1631 5.98
56.83 63.14 59.98 371 1.36 26430 97.06 1174 4.3
63.14 69.45 66.3 298 1.09 26728 98.15 803 2.94
69.45 75.76 72.61 250 0.92 26978 99.07 505 1.85
75.77 82.08 78.92 162 0.59 27140 99.66 255 0.93
82.08 88.39 85.24 74 0.27 27214 99.93 93 0.34
88.4 94.71 91.56 19 0.07 27233 100 19 0.07
TOTAL 27233 100.00 - - - -
Autor: Grupo 3

GRÁFICAS DE DISTRIBUCIÓN DE FRECUENCIA

## Histograma de frecuencia absoluta local
hist(arcilla,
     breaks = k,
     col = "gray",
     main = "Gráfica Nº1: Distribución de frecuencia absoluta local de Arcilla (%)",
     xlab = "Arcilla (%)",
     ylab = "Cantidad")


# Histograma de frecuencia absoluta global
hist(arcilla,
     breaks = k,
     col = "gray",
     main = "Gráfica Nº2: Distribución de frecuencia absoluta global de Arcilla (%)",
     xlab = "Arcilla (%)",
     ylab = "Cantidad",
     ylim = c(0, max(ni)+200))


# Frecuencia relativa local
barplot(hi,
        space = 0,
        main = "Gráfica Nº3: Distribución de frecuencia relativa local de Arcilla (%)",
        col = "gray",
        xlab = "Intervalos de Arcilla (%)",
        ylab = "Porcentaje",
        names.arg = MC)


# Frecuencia relativa global
barplot(hi,
        space = 0,
        main = "Gráfica Nº4: Distribución de frecuencia relativa global de Arcilla (%)",
        col = "gray",
        xlab = "Intervalos de Arcilla (%)",
        ylab = "Porcentaje",
        names.arg = MC,
        ylim = c(0,100))


# Ojiva combinada Ni
lim_sup <- Ls

plot(lim_sup, Ni_Desc, type="o",
     main="Gráfica Nº5: Ojiva combinada de la arcilla (Ni)",
     ylab="Cantidad acumulada",
     xlab="Arcilla (%)",
     col="blue")

lines(Li, Ni_Asc,
      col="red",
      type="o")

legend("topleft",
       legend=c("Descendente","Ascendente"),
       col=c("blue","red"),
       lty=1,
       pch=1)


# Ojiva combinada Hi
lim_sup <- Ls

plot(lim_sup, Hi_Desc, type="o",
     main="Gráfica Nº6: Ojiva combinada de la arcilla (Hi)",
     ylab="Porcentaje acumulado",
     xlab="Arcilla (%)",
     col="blue",
     ylim=c(0,100))

lines(Li, Hi_Asc,
      col="red",
      type="o")

legend("topleft",
       legend=c("Descendente","Ascendente"),
       col=c("blue","red"),
       lty=1,
       pch=1)


# DIAGRAMA DE CAJA
boxplot(arcilla,
        horizontal = TRUE,
        main = "Gráfica Nº7: Diagrama de caja de la variable Arcilla (%)",
        xlab = "Arcilla (%)",
        col = "lightblue")


INDICADORES ESTADÍSTICOS

# Cálculo de indicadores

media <- mean(arcilla)
mediana <- median(arcilla)
desv <- sd(arcilla)

CV <- round((desv/media)*100,2)

asimetria <- round(skewness(arcilla),2)
curtosis <- round(kurtosis(arcilla),2)

minimo <- min(arcilla)
maximo <- max(arcilla)

TablaIndicadores <- data.frame(
  Variable = "Arcilla (%)",
  Minimo = round(minimo,2),
  Maximo = round(maximo,2),
  Media = round(media,2),
  Mediana = round(mediana,2),
  Desv_Est = round(desv,2),
  CV = CV,
  Asimetria = asimetria,
  Curtosis = curtosis
)

TablaIndicadores
##      Variable Minimo Maximo Media Mediana Desv_Est     CV Asimetria Curtosis
## 1 Arcilla (%)      0  94.71 14.21    5.37    18.39 129.46      1.55     4.89

# Tabla Mejorada
TablaIndicadores %>%
  gt() %>%
  tab_header(
    title = md("**Tabla Nº2**"),
    subtitle = md("Indicadores estadísticos de la variable Arcilla (%)")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )
Tabla Nº2
Indicadores estadísticos de la variable Arcilla (%)
Variable Minimo Maximo Media Mediana Desv_Est CV Asimetria Curtosis
Arcilla (%) 0 94.71 14.21 5.37 18.39 129.46 1.55 4.89
Autor: Grupo 3

OUTLIERS

outliers <- boxplot.stats(arcilla)$out

num_outliers <- length(outliers)

min_out <- ifelse(num_outliers > 0, round(min(outliers),2), NA)
max_out <- ifelse(num_outliers > 0, round(max(outliers),2), NA)

TablaOutliers <- data.frame(
  Cantidad_Outliers = num_outliers,
  Minimo = min_out,
  Maximo = max_out
)

TablaOutliers
##   Cantidad_Outliers Minimo Maximo
## 1              1275  55.43  94.71

#Tabla Mejorada 
TablaOutliers %>%
  gt() %>%
  tab_header(
    title = md("**Tabla Nº3**"),
    subtitle = md("Valores atípicos de la variable Arcilla (%)")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )
Tabla Nº3
Valores atípicos de la variable Arcilla (%)
Cantidad_Outliers Minimo Maximo
1275 55.43 94.71
Autor: Grupo 3

CONCLUSIONES