CARGA DE DATOS Y LIBRERÍAS
# CARGA DE DATOS
datos <- read.csv("C:/Users/Grace/OneDrive - Universidad Central del Ecuador/Documentos/dataset_geologico_limpio_80.csv",
header = TRUE,
sep = ",",
dec = ".",
stringsAsFactors = FALSE)
# Extraer variable Arcilla
arcilla_raw <- as.numeric(gsub("[^0-9.-]", "", datos$CLAY_PCT))
arcilla <- na.omit(arcilla_raw)
arcilla <- arcilla[arcilla >= 0 & arcilla <= 100]
n <- length(arcilla)
cat("Tamaño de muestra:", n)
## Tamaño de muestra: 27240
# CARGA DE LIBRERÍAS
library(gt)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(e1071)
library(moments)
##
## Adjuntando el paquete: 'moments'
## The following objects are masked from 'package:e1071':
##
## kurtosis, moment, skewness
TABLA DE DISTRIBUCIÓN DE FRECUENCIA
Agrupación por la regla de Sturges
minimo <- min(arcilla)
maximo <- max(arcilla)
R <- maximo - minimo
k <- floor(1 + 3.3 * log10(n))
A <- R / k
Li <- round(seq(minimo, maximo - A + 1e-6, by = A),2)
Ls <- round(Li + A,2)
Ls[length(Ls)] <- maximo
MC <- round((Li + Ls)/2,2)
# FRECUENCIAS
ni <- numeric(length(Li))
for(i in 1:length(Li)){
if(i == length(Li)){
ni[i] <- sum(arcilla >= Li[i] & arcilla <= Ls[i])
} else {
ni[i] <- sum(arcilla >= Li[i] & arcilla < Ls[i])
}
}
total <- sum(ni)
hi <- round((ni/total)*100,2)
Ni_Asc <- cumsum(ni)
Hi_Asc <- cumsum(hi)
Ni_Desc <- rev(cumsum(rev(ni)))
Hi_Desc <- rev(cumsum(rev(hi)))
# TABLA DE DISTRIBUCIÓN
tabla_final <- data.frame(
Limite_Inferior = Li,
Limite_Superior = Ls,
Marca_Clase = MC,
ni = ni,
hi = hi,
Ni_Asc = Ni_Asc,
Hi_Asc = Hi_Asc,
Ni_Desc = Ni_Desc,
Hi_Desc = Hi_Desc
)
suma_ni <- sum(tabla_final$ni)
suma_hi <- sum(tabla_final$hi)
tabla_final
## Limite_Inferior Limite_Superior Marca_Clase ni hi Ni_Asc Hi_Asc
## 1 0.00 6.31 3.15 14156 51.98 14156 51.98
## 2 6.31 12.62 9.46 2706 9.94 16862 61.92
## 3 12.63 18.94 15.79 2536 9.31 19398 71.23
## 4 18.94 25.25 22.09 1725 6.33 21123 77.56
## 5 25.26 31.57 28.41 1437 5.28 22560 82.84
## 6 31.57 37.88 34.73 1325 4.87 23885 87.71
## 7 37.88 44.19 41.03 1037 3.81 24922 91.52
## 8 44.20 50.51 47.36 680 2.50 25602 94.02
## 9 50.51 56.82 53.66 457 1.68 26059 95.70
## 10 56.83 63.14 59.98 371 1.36 26430 97.06
## 11 63.14 69.45 66.30 298 1.09 26728 98.15
## 12 69.45 75.76 72.61 250 0.92 26978 99.07
## 13 75.77 82.08 78.92 162 0.59 27140 99.66
## 14 82.08 88.39 85.24 74 0.27 27214 99.93
## 15 88.40 94.71 91.56 19 0.07 27233 100.00
## Ni_Desc Hi_Desc
## 1 27233 100.00
## 2 13077 48.02
## 3 10371 38.08
## 4 7835 28.77
## 5 6110 22.44
## 6 4673 17.16
## 7 3348 12.29
## 8 2311 8.48
## 9 1631 5.98
## 10 1174 4.30
## 11 803 2.94
## 12 505 1.85
## 13 255 0.93
## 14 93 0.34
## 15 19 0.07
# FILA TOTAL
fila_total <- data.frame(
Limite_Inferior = "TOTAL",
Limite_Superior = "",
Marca_Clase = "",
ni = suma_ni,
hi = suma_hi,
Ni_Asc = "-",
Hi_Asc = "-",
Ni_Desc = "-",
Hi_Desc = "-"
)
tabla_final <- rbind(tabla_final, fila_total)
tabla_final
## Limite_Inferior Limite_Superior Marca_Clase ni hi Ni_Asc Hi_Asc
## 1 0 6.31 3.15 14156 51.98 14156 51.98
## 2 6.31 12.62 9.46 2706 9.94 16862 61.92
## 3 12.63 18.94 15.79 2536 9.31 19398 71.23
## 4 18.94 25.25 22.09 1725 6.33 21123 77.56
## 5 25.26 31.57 28.41 1437 5.28 22560 82.84
## 6 31.57 37.88 34.73 1325 4.87 23885 87.71
## 7 37.88 44.19 41.03 1037 3.81 24922 91.52
## 8 44.2 50.51 47.36 680 2.50 25602 94.02
## 9 50.51 56.82 53.66 457 1.68 26059 95.7
## 10 56.83 63.14 59.98 371 1.36 26430 97.06
## 11 63.14 69.45 66.3 298 1.09 26728 98.15
## 12 69.45 75.76 72.61 250 0.92 26978 99.07
## 13 75.77 82.08 78.92 162 0.59 27140 99.66
## 14 82.08 88.39 85.24 74 0.27 27214 99.93
## 15 88.4 94.71 91.56 19 0.07 27233 100
## 16 TOTAL 27233 100.00 - -
## Ni_Desc Hi_Desc
## 1 27233 100
## 2 13077 48.02
## 3 10371 38.08
## 4 7835 28.77
## 5 6110 22.44
## 6 4673 17.16
## 7 3348 12.29
## 8 2311 8.48
## 9 1631 5.98
## 10 1174 4.3
## 11 803 2.94
## 12 505 1.85
## 13 255 0.93
## 14 93 0.34
## 15 19 0.07
## 16 - -
# TABLA DE DISTRIBUCIÓN FORMATO PROFESIONAL
TablaArcilla <- tabla_final %>%
gt() %>%
tab_header(
title = md("**Tabla Nº1**"),
subtitle = md("Distribución de frecuencias de la variable Arcilla (%) en sedimentos marinos")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
table_body.hlines.color = "gray"
)
TablaArcilla
| Tabla Nº1 |
| Distribución de frecuencias de la variable Arcilla (%) en sedimentos marinos |
| Limite_Inferior |
Limite_Superior |
Marca_Clase |
ni |
hi |
Ni_Asc |
Hi_Asc |
Ni_Desc |
Hi_Desc |
| 0 |
6.31 |
3.15 |
14156 |
51.98 |
14156 |
51.98 |
27233 |
100 |
| 6.31 |
12.62 |
9.46 |
2706 |
9.94 |
16862 |
61.92 |
13077 |
48.02 |
| 12.63 |
18.94 |
15.79 |
2536 |
9.31 |
19398 |
71.23 |
10371 |
38.08 |
| 18.94 |
25.25 |
22.09 |
1725 |
6.33 |
21123 |
77.56 |
7835 |
28.77 |
| 25.26 |
31.57 |
28.41 |
1437 |
5.28 |
22560 |
82.84 |
6110 |
22.44 |
| 31.57 |
37.88 |
34.73 |
1325 |
4.87 |
23885 |
87.71 |
4673 |
17.16 |
| 37.88 |
44.19 |
41.03 |
1037 |
3.81 |
24922 |
91.52 |
3348 |
12.29 |
| 44.2 |
50.51 |
47.36 |
680 |
2.50 |
25602 |
94.02 |
2311 |
8.48 |
| 50.51 |
56.82 |
53.66 |
457 |
1.68 |
26059 |
95.7 |
1631 |
5.98 |
| 56.83 |
63.14 |
59.98 |
371 |
1.36 |
26430 |
97.06 |
1174 |
4.3 |
| 63.14 |
69.45 |
66.3 |
298 |
1.09 |
26728 |
98.15 |
803 |
2.94 |
| 69.45 |
75.76 |
72.61 |
250 |
0.92 |
26978 |
99.07 |
505 |
1.85 |
| 75.77 |
82.08 |
78.92 |
162 |
0.59 |
27140 |
99.66 |
255 |
0.93 |
| 82.08 |
88.39 |
85.24 |
74 |
0.27 |
27214 |
99.93 |
93 |
0.34 |
| 88.4 |
94.71 |
91.56 |
19 |
0.07 |
27233 |
100 |
19 |
0.07 |
| TOTAL |
|
|
27233 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 3 |
GRÁFICAS DE DISTRIBUCIÓN DE FRECUENCIA
## Histograma de frecuencia absoluta local
hist(arcilla,
breaks = k,
col = "gray",
main = "Gráfica Nº1: Distribución de frecuencia absoluta local de Arcilla (%)",
xlab = "Arcilla (%)",
ylab = "Cantidad")

# Histograma de frecuencia absoluta global
hist(arcilla,
breaks = k,
col = "gray",
main = "Gráfica Nº2: Distribución de frecuencia absoluta global de Arcilla (%)",
xlab = "Arcilla (%)",
ylab = "Cantidad",
ylim = c(0, max(ni)+200))

# Frecuencia relativa local
barplot(hi,
space = 0,
main = "Gráfica Nº3: Distribución de frecuencia relativa local de Arcilla (%)",
col = "gray",
xlab = "Intervalos de Arcilla (%)",
ylab = "Porcentaje",
names.arg = MC)

# Frecuencia relativa global
barplot(hi,
space = 0,
main = "Gráfica Nº4: Distribución de frecuencia relativa global de Arcilla (%)",
col = "gray",
xlab = "Intervalos de Arcilla (%)",
ylab = "Porcentaje",
names.arg = MC,
ylim = c(0,100))

# Ojiva combinada Ni
lim_sup <- Ls
plot(lim_sup, Ni_Desc, type="o",
main="Gráfica Nº5: Ojiva combinada de la arcilla (Ni)",
ylab="Cantidad acumulada",
xlab="Arcilla (%)",
col="blue")
lines(Li, Ni_Asc,
col="red",
type="o")
legend("topleft",
legend=c("Descendente","Ascendente"),
col=c("blue","red"),
lty=1,
pch=1)

# Ojiva combinada Hi
lim_sup <- Ls
plot(lim_sup, Hi_Desc, type="o",
main="Gráfica Nº6: Ojiva combinada de la arcilla (Hi)",
ylab="Porcentaje acumulado",
xlab="Arcilla (%)",
col="blue",
ylim=c(0,100))
lines(Li, Hi_Asc,
col="red",
type="o")
legend("topleft",
legend=c("Descendente","Ascendente"),
col=c("blue","red"),
lty=1,
pch=1)

# DIAGRAMA DE CAJA
boxplot(arcilla,
horizontal = TRUE,
main = "Gráfica Nº7: Diagrama de caja de la variable Arcilla (%)",
xlab = "Arcilla (%)",
col = "lightblue")

INDICADORES ESTADÍSTICOS
# Cálculo de indicadores
media <- mean(arcilla)
mediana <- median(arcilla)
desv <- sd(arcilla)
CV <- round((desv/media)*100,2)
asimetria <- round(skewness(arcilla),2)
curtosis <- round(kurtosis(arcilla),2)
minimo <- min(arcilla)
maximo <- max(arcilla)
TablaIndicadores <- data.frame(
Variable = "Arcilla (%)",
Minimo = round(minimo,2),
Maximo = round(maximo,2),
Media = round(media,2),
Mediana = round(mediana,2),
Desv_Est = round(desv,2),
CV = CV,
Asimetria = asimetria,
Curtosis = curtosis
)
TablaIndicadores
## Variable Minimo Maximo Media Mediana Desv_Est CV Asimetria Curtosis
## 1 Arcilla (%) 0 94.71 14.21 5.37 18.39 129.46 1.55 4.89
# Tabla Mejorada
TablaIndicadores %>%
gt() %>%
tab_header(
title = md("**Tabla Nº2**"),
subtitle = md("Indicadores estadísticos de la variable Arcilla (%)")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
| Tabla Nº2 |
| Indicadores estadísticos de la variable Arcilla (%) |
| Variable |
Minimo |
Maximo |
Media |
Mediana |
Desv_Est |
CV |
Asimetria |
Curtosis |
| Arcilla (%) |
0 |
94.71 |
14.21 |
5.37 |
18.39 |
129.46 |
1.55 |
4.89 |
| Autor: Grupo 3 |
OUTLIERS
outliers <- boxplot.stats(arcilla)$out
num_outliers <- length(outliers)
min_out <- ifelse(num_outliers > 0, round(min(outliers),2), NA)
max_out <- ifelse(num_outliers > 0, round(max(outliers),2), NA)
TablaOutliers <- data.frame(
Cantidad_Outliers = num_outliers,
Minimo = min_out,
Maximo = max_out
)
TablaOutliers
## Cantidad_Outliers Minimo Maximo
## 1 1275 55.43 94.71
#Tabla Mejorada
TablaOutliers %>%
gt() %>%
tab_header(
title = md("**Tabla Nº3**"),
subtitle = md("Valores atípicos de la variable Arcilla (%)")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
| Tabla Nº3 |
| Valores atípicos de la variable Arcilla (%) |
| Cantidad_Outliers |
Minimo |
Maximo |
| 1275 |
55.43 |
94.71 |
| Autor: Grupo 3 |
CONCLUSIONES