datos <- read.csv("C:/Users/li/Downloads/Sedimentos Marinos (2).csv",
header = TRUE,
sep = ";",
dec = ".")
# Convertir a numérico
day_coll <- as.numeric(datos$DAY_COLL)
## Warning: NAs introducidos por coerción
# Eliminar valores inválidos (-9999) y filtrar días válidos
day_coll <- day_coll[day_coll != -9999]
day_coll <- day_coll[day_coll >= 1 & day_coll <= 31]
day_coll <- na.omit(day_coll)
# Tamaño de muestra
n <- length(day_coll)
cat("Tamaño de la muestra (n):", n)
## Tamaño de la muestra (n): 2169
# Carga de librerías necesarias para el análisis
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(e1071)
library(moments)
##
## Adjuntando el paquete: 'moments'
## The following objects are masked from 'package:e1071':
##
## kurtosis, moment, skewness
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(gt)
TABLA DE DISTRIBUCIÓN DE FRECUENCIA
# Número de intervalos (Sturges)
k <- floor(1 + 3.3 * log10(n))
# Límites reales de la variable
minimo <- 1
maximo <- 31
# Amplitud
A <- ceiling((maximo - minimo) / k)
# Crear intervalos SIN exceder 31
breaks <- seq(minimo, maximo, by = A)
breaks[length(breaks)] <- maximo
clasificacion <- cut(
day_coll,
breaks = breaks,
right = FALSE,
include.lowest = TRUE
)
# Frecuencias
ni <- as.numeric(table(clasificacion))
total <- sum(ni)
# Frecuencia relativa SIN redondear
hi <- ni / total * 100
# Acumuladas
Ni_Asc <- cumsum(ni)
Hi_Asc <- cumsum(hi)
Ni_Desc <- rev(cumsum(rev(ni)))
Hi_Desc <- rev(cumsum(rev(hi)))
# Tabla final (redondeo SOLO visual)
tabla_final <- data.frame(
Intervalo = levels(clasificacion),
ni = ni,
hi = round(hi, 2),
Ni_Asc = Ni_Asc,
Hi_Asc = round(Hi_Asc, 2),
Ni_Desc = Ni_Desc,
Hi_Desc = round(Hi_Desc, 2)
)
# Ajuste exacto del 100%
tabla_final$Hi_Asc[nrow(tabla_final)] <- 100
tabla_final$Hi_Desc[1] <- 100
# Fila TOTAL
fila_total <- data.frame(
Intervalo = "TOTAL",
ni = sum(tabla_final$ni),
hi = 100,
Ni_Asc = "-",
Hi_Asc = "-",
Ni_Desc = "-",
Hi_Desc = "-"
)
tabla_final <- rbind(tabla_final, fila_total)
TABLA FINAL CON ESTILO
TablaDisc <- tabla_final %>%
gt() %>%
tab_header(
title = md("Tabla Nº 1"),
subtitle = md("*Tabla de distribución de frecuencias del día de colecta*")
) %>%
tab_source_note(md("*Autor: Grupo 3*")) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = Intervalo == "TOTAL")
)
TablaDisc
| Tabla Nº 1 | ||||||
| Tabla de distribución de frecuencias del día de colecta | ||||||
| Intervalo | ni | hi | Ni_Asc | Hi_Asc | Ni_Desc | Hi_Desc |
|---|---|---|---|---|---|---|
| [1,4) | 102 | 4.70 | 102 | 4.7 | 2169 | 100 |
| [4,7) | 45 | 2.07 | 147 | 6.78 | 2067 | 95.3 |
| [7,10) | 181 | 8.34 | 328 | 15.12 | 2022 | 93.22 |
| [10,13) | 484 | 22.31 | 812 | 37.44 | 1841 | 84.88 |
| [13,16) | 209 | 9.64 | 1021 | 47.07 | 1357 | 62.56 |
| [16,19) | 139 | 6.41 | 1160 | 53.48 | 1148 | 52.93 |
| [19,22) | 151 | 6.96 | 1311 | 60.44 | 1009 | 46.52 |
| [22,25) | 404 | 18.63 | 1715 | 79.07 | 858 | 39.56 |
| [25,28) | 269 | 12.40 | 1984 | 91.47 | 454 | 20.93 |
| [28,31] | 185 | 8.53 | 2169 | 100 | 185 | 8.53 |
| TOTAL | 2169 | 100.00 | - | - | - | - |
| Autor: Grupo 3 | ||||||
# =========================================================
# GRÁFICAS DE DISTRIBUCIÓN DE FRECUENCIA
# Variable: Día de colecta
# =========================================================
# Intervalos usados en la tabla
breaks_dia <- c(1,4,7,10,13,16,19,22,25,28,31)
# Histograma base
h <- hist(day_coll,
breaks = breaks_dia,
right = FALSE,
plot = FALSE)
# Frecuencias
fi <- h$counts
hi <- fi / sum(fi) * 100
Fi_Asc <- cumsum(fi)
Fi_Desc <- rev(cumsum(rev(fi)))
Hi_Asc <- cumsum(hi)
Hi_Desc <- rev(cumsum(rev(hi)))
# Marcas de clase
marcas <- c(2.5, 5.5, 8.5, 11.5, 14.5, 17.5, 20.5, 23.5, 26.5, 29.5)
# =========================================================
# Gráfica Nº1: Frecuencia absoluta local (Histograma)
# =========================================================
plot(h,
col = "gray",
border = "black",
main = "Gráfica Nº1: Histograma de frecuencia absoluta local",
xlab = "Día de colecta",
ylab = "Frecuencia")
# =========================================================
# Gráfica Nº2: Frecuencia relativa local (Histograma)
# =========================================================
h_rel <- h
h_rel$counts <- hi
plot(h_rel,
col = "gray",
border = "black",
main = "Gráfica Nº2: Histograma de frecuencia relativa local",
xlab = "Día de colecta",
ylab = "Porcentaje (%)")
# =========================================================
# Gráfica Nº3: Frecuencia absoluta global (Ojiva combinada)
# =========================================================
plot(marcas, Fi_Asc, type = "o", pch = 16,
col = "blue",
main = "Gráfica Nº3: Ojiva combinada de frecuencia absoluta",
xlab = "Día de colecta",
ylab = "Frecuencia acumulada")
lines(marcas, Fi_Desc, type = "o", pch = 16, col = "red")
legend("topleft",
legend = c("Ascendente", "Descendente"),
col = c("blue", "red"),
lty = 1,
pch = 16)
# =========================================================
# Gráfica Nº4: Frecuencia relativa global (Ojiva combinada)
# =========================================================
plot(marcas, Hi_Asc, type = "o", pch = 16,
col = "blue",
main = "Gráfica Nº4: Ojiva combinada de frecuencia relativa",
xlab = "Día de colecta",
ylab = "Porcentaje acumulado (%)",
ylim = c(0,100))
lines(marcas, Hi_Desc, type = "o", pch = 16, col = "red")
legend("bottomright",
legend = c("Ascendente", "Descendente"),
col = c("blue", "red"),
lty = 1,
pch = 16)
# =========================================================
# Gráfica Nº5: Boxplot (detección de outliers)
# =========================================================
boxplot(day_coll,
horizontal = TRUE,
col = "lightgray",
main = "Gráfica Nº5: Boxplot del día de colecta",
xlab = "Día de colecta")
## Medidas estadísticas completas
Cálculo de indicadores de posición, dispersión y forma.
# =====================================
# INDICADORES ESTADÍSTICOS (TABLA FORMAL)
# =====================================
library(moments)
# Datos originales
x <- as.numeric(day_coll)
# Función para la moda
moda <- function(v) {
v <- v[!is.na(v)]
u <- unique(v)
u[which.max(tabulate(match(v, u)))]
}
# Medidas de posición
minimo <- min(x)
maximo <- max(x)
rango <- maximo - minimo
media <- mean(x)
mediana <- median(x)
moda_x <- moda(x)
# Medidas de dispersión
varianza <- var(x)
desviacion <- sd(x)
coef_var <- (desviacion / media) * 100
# Medidas de forma
asimetria <- skewness(x)
curtosis <- kurtosis(x)
# Tabla elegante de indicadores
tabla_indicadores <- data.frame(
Símbolo = c(
"x_min",
"x_max",
"R",
"x̄",
"Me",
"Mo",
"σ²",
"σ",
"CV (%)",
"γ₁",
"γ₂"
),
Indicador = c(
"Mínimo",
"Máximo",
"Rango",
"Media aritmética",
"Mediana",
"Moda",
"Varianza",
"Desviación estándar",
"Coeficiente de variación",
"Asimetría",
"Curtosis"
),
Valor = round(c(
minimo,
maximo,
rango,
media,
mediana,
moda_x,
varianza,
desviacion,
coef_var,
asimetria,
curtosis
), 2)
)
knitr::kable(
tabla_indicadores,
align = "c",
caption = "Indicadores estadísticos del día de colecta"
)
| Símbolo | Indicador | Valor |
|---|---|---|
| x_min | Mínimo | 1.00 |
| x_max | Máximo | 31.00 |
| R | Rango | 30.00 |
| x̄ | Media aritmética | 17.06 |
| Me | Mediana | 17.00 |
| Mo | Moda | 22.00 |
| σ² | Varianza | 59.05 |
| σ | Desviación estándar | 7.68 |
| CV (%) | Coeficiente de variación | 45.03 |
| γ₁ | Asimetría | -0.11 |
| γ₂ | Curtosis | 1.97 |
Se utiliza la regla del 1.5 × IQR para identificar outliers.
IQR_val <- IQR(day_coll)
lim_inf <- quantile(day_coll, 0.25) - 1.5 * IQR_val
lim_sup <- quantile(day_coll, 0.75) + 1.5 * IQR_val
outliers <- day_coll[day_coll < lim_inf | day_coll > lim_sup]
Tabla_out <- data.frame(
Cantidad = length(outliers),
Limite_Inferior = round(lim_inf, 1),
Limite_Superior = round(lim_sup, 1),
Outliers = if(length(outliers) > 0) paste(outliers, collapse = ", ") else "Ninguno"
)
kable(Tabla_out, caption = "Resumen de Outliers (1.5 × IQR)") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| Cantidad | Limite_Inferior | Limite_Superior | Outliers | |
|---|---|---|---|---|
| 25% | 0 | -8.5 | 43.5 | Ninguno |