Se carga el conjunto de datos de arrendamientos de hidrocarburos del estado de Kansas, EE.UU., registrados por el Kansas Geological Survey.
Instrucción: Cambia la ruta del CSV en la línea
ruta_csvsegún donde tengas guardado el archivokansas.csv.
ruta_csv <- "C:/Users/luisq/OneDrive/Desktop/ESTADISTICA/kansas.csv"
datos <- read_delim(ruta_csv, delim = ";", show_col_types = FALSE)
cat("Base de datos cargada correctamente.\n")
## Base de datos cargada correctamente.
cat("Total de registros evaluados (filas):", nrow(datos), "\n")
## Total de registros evaluados (filas): 104173
La variable TOWNSHIP representa la división norte-sur del sistema de agrimensura de Kansas (valores enteros 1–35).
township_data <- datos %>%
mutate(TOWNSHIP_NUM = suppressWarnings(as.integer(TOWNSHIP))) %>%
filter(!is.na(TOWNSHIP_NUM), TOWNSHIP_NUM >= 1, TOWNSHIP_NUM <= 35) %>%
select(TOWNSHIP_NUM)
n_total <- nrow(township_data)
cat("Observaciones válidas de TOWNSHIP:", n_total, "\n")
## Observaciones válidas de TOWNSHIP: 97708
cat("Valores únicos:", length(unique(township_data$TOWNSHIP_NUM)), "\n")
## Valores únicos: 35
| Criterio | Clasificación |
|---|---|
| Tipo | Cuantitativa Discreta |
| Escala | De razón |
| Variable | TOWNSHIP (división norte-sur del sistema de agrimensura) |
| Rango | 1 a 35 |
| Fuente | Kansas Geological Survey – Kansas, EE.UU. |
Justificación: El township toma valores enteros contables (1, 2, …, 35) dentro del sistema de agrimensura rectangular. Es una variable cuantitativa discreta de escala de razón, ya que el cero absoluto tiene significado y las diferencias son interpretables.
Se construye la tabla de distribución de frecuencias de la variable cuantitativa discreta Township, correspondiente a los arrendamientos de hidrocarburos registrados en Kansas, EE.UU., durante el período histórico disponible en la base de datos.
# Frecuencias absolutas
freq_table <- township_data %>%
group_by(Variable = TOWNSHIP_NUM) %>%
summarise(ni = n(), .groups = "drop") %>%
arrange(Variable)
n <- sum(freq_table$ni)
# Frecuencia relativa y acumuladas
freq_table <- freq_table %>%
mutate(
hi_dec = ni / n,
Ni_asc = cumsum(ni),
Hi_asc = cumsum(hi_dec),
Ni_desc = n - lag(Ni_asc, default = 0),
Hi_desc = 1 - lag(Hi_asc, default = 0)
)
# Formatear para presentación
tabla_presentacion <- freq_table %>%
mutate(
Variable = as.character(Variable),
hi_pct = sprintf("%.2f%%", hi_dec * 100),
hi_real = sprintf("%.4f", hi_dec),
Ni_asc_c = as.character(Ni_asc),
Hi_asc_c = sprintf("%.4f", Hi_asc),
Ni_dsc_c = as.character(Ni_desc),
Hi_dsc_c = sprintf("%.4f", Hi_desc)
) %>%
select(Variable, ni, hi_pct, hi_real, Ni_asc_c, Hi_asc_c, Ni_dsc_c, Hi_dsc_c)
total_row <- data.frame(
Variable = "TOTAL", ni = n,
hi_pct = "100.00%", hi_real = "1.0000",
Ni_asc_c = "—", Hi_asc_c = "—",
Ni_dsc_c = "—", Hi_dsc_c = "—",
stringsAsFactors = FALSE
)
tabla_final_disp <- bind_rows(tabla_presentacion, total_row)
kable(
tabla_final_disp,
caption = paste0(
"Tabla N°1: Distribución de Frecuencias de la Variable Cuantitativa Discreta Township, ",
"registrada en los arrendamientos de hidrocarburos del estado de Kansas, EE.UU., ",
"período histórico disponible (n = ", format(n, big.mark = ","), " registros válidos)."
),
col.names = c(
"Township (Xi)", "ni (FA)",
"hi %", "hi (decimal)",
"Ni ↑ (FAAa)", "Hi ↑ (FRAa)",
"Ni ↓ (FAAd)", "Hi ↓ (FRAd)"
),
align = rep("c", 8)
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = TRUE, font_size = 12
) %>%
row_spec(0, bold = TRUE, background = "#d3d3d3", color = "black") %>%
row_spec(nrow(tabla_final_disp), bold = TRUE, background = "#a9a9a9", color = "black") %>%
column_spec(1, bold = TRUE)
| Township (Xi) | ni (FA) | hi % | hi (decimal) | Ni ↑ (FAAa) | Hi ↑ (FRAa) | Ni ↓ (FAAd) | Hi ↓ (FRAd) |
|---|---|---|---|---|---|---|---|
| 1 | 768 | 0.79% | 0.0079 | 768 | 0.0079 | 97708 | 1.0000 |
| 2 | 632 | 0.65% | 0.0065 | 1400 | 0.0143 | 96940 | 0.9921 |
| 3 | 731 | 0.75% | 0.0075 | 2131 | 0.0218 | 96308 | 0.9857 |
| 4 | 699 | 0.72% | 0.0072 | 2830 | 0.0290 | 95577 | 0.9782 |
| 5 | 664 | 0.68% | 0.0068 | 3494 | 0.0358 | 94878 | 0.9710 |
| 6 | 568 | 0.58% | 0.0058 | 4062 | 0.0416 | 94214 | 0.9642 |
| 7 | 769 | 0.79% | 0.0079 | 4831 | 0.0494 | 93646 | 0.9584 |
| 8 | 1202 | 1.23% | 0.0123 | 6033 | 0.0617 | 92877 | 0.9506 |
| 9 | 1497 | 1.53% | 0.0153 | 7530 | 0.0771 | 91675 | 0.9383 |
| 10 | 1437 | 1.47% | 0.0147 | 8967 | 0.0918 | 90178 | 0.9229 |
| 11 | 1465 | 1.50% | 0.0150 | 10432 | 0.1068 | 88741 | 0.9082 |
| 12 | 1258 | 1.29% | 0.0129 | 11690 | 0.1196 | 87276 | 0.8932 |
| 13 | 1510 | 1.55% | 0.0155 | 13200 | 0.1351 | 86018 | 0.8804 |
| 14 | 3787 | 3.88% | 0.0388 | 16987 | 0.1739 | 84508 | 0.8649 |
| 15 | 2538 | 2.60% | 0.0260 | 19525 | 0.1998 | 80721 | 0.8261 |
| 16 | 2991 | 3.06% | 0.0306 | 22516 | 0.2304 | 78183 | 0.8002 |
| 17 | 3004 | 3.07% | 0.0307 | 25520 | 0.2612 | 75192 | 0.7696 |
| 18 | 2719 | 2.78% | 0.0278 | 28239 | 0.2890 | 72188 | 0.7388 |
| 19 | 3381 | 3.46% | 0.0346 | 31620 | 0.3236 | 69469 | 0.7110 |
| 20 | 2848 | 2.91% | 0.0291 | 34468 | 0.3528 | 66088 | 0.6764 |
| 21 | 2678 | 2.74% | 0.0274 | 37146 | 0.3802 | 63240 | 0.6472 |
| 22 | 2610 | 2.67% | 0.0267 | 39756 | 0.4069 | 60562 | 0.6198 |
| 23 | 3134 | 3.21% | 0.0321 | 42890 | 0.4390 | 57952 | 0.5931 |
| 24 | 3199 | 3.27% | 0.0327 | 46089 | 0.4717 | 54818 | 0.5610 |
| 25 | 3105 | 3.18% | 0.0318 | 49194 | 0.5035 | 51619 | 0.5283 |
| 26 | 3481 | 3.56% | 0.0356 | 52675 | 0.5391 | 48514 | 0.4965 |
| 27 | 3481 | 3.56% | 0.0356 | 56156 | 0.5747 | 45033 | 0.4609 |
| 28 | 4061 | 4.16% | 0.0416 | 60217 | 0.6163 | 41552 | 0.4253 |
| 29 | 4772 | 4.88% | 0.0488 | 64989 | 0.6651 | 37491 | 0.3837 |
| 30 | 5576 | 5.71% | 0.0571 | 70565 | 0.7222 | 32719 | 0.3349 |
| 31 | 5307 | 5.43% | 0.0543 | 75872 | 0.7765 | 27143 | 0.2778 |
| 32 | 5593 | 5.72% | 0.0572 | 81465 | 0.8338 | 21836 | 0.2235 |
| 33 | 6435 | 6.59% | 0.0659 | 87900 | 0.8996 | 16243 | 0.1662 |
| 34 | 6621 | 6.78% | 0.0678 | 94521 | 0.9674 | 9808 | 0.1004 |
| 35 | 3187 | 3.26% | 0.0326 | 97708 | 1.0000 | 3187 | 0.0326 |
| TOTAL | 97708 | 100.00% | 1.0000 | — | — | — | — |
x <- township_data$TOWNSHIP_NUM
n_x <- length(x)
media <- mean(x)
mediana <- median(x)
moda_val <- as.integer(names(sort(table(x), decreasing = TRUE)[1]))
varianza <- var(x)
desv_std <- sd(x)
cv <- (desv_std / media) * 100
rango_val <- max(x) - min(x)
q1 <- as.numeric(quantile(x, 0.25))
q3 <- as.numeric(quantile(x, 0.75))
iqr_val <- IQR(x)
asimetria <- (3 * (media - mediana)) / desv_std
curtosis_val <- (sum((x - media)^4) / n_x) / (desv_std^4)
indicadores <- data.frame(
Indicador = c(
"Tamaño muestral (n)", "Mínimo", "Máximo", "Rango",
"Media", "Mediana", "Moda",
"Varianza (s²)", "Desviación estándar (s)", "Coef. de variación (CV%)",
"Cuartil 1 (Q1)", "Cuartil 3 (Q3)", "Rango intercuartílico (IQR)",
"Asimetría de Pearson", "Curtosis"
),
Valor = c(
format(n_x, big.mark = ","), min(x), max(x), rango_val,
round(media, 4), mediana, moda_val,
round(varianza, 4), round(desv_std, 4), paste0(round(cv, 2), "%"),
q1, q3, iqr_val,
round(asimetria, 4), round(curtosis_val, 4)
),
stringsAsFactors = FALSE
)
kable(
indicadores,
caption = "Tabla N°2: Indicadores Estadísticos de la Variable Township, arrendamientos de hidrocarburos, Kansas, EE.UU.",
col.names = c("Indicador", "Valor"),
align = c("l", "c")
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = FALSE, font_size = 12
) %>%
row_spec(0, bold = TRUE, background = "#d3d3d3", color = "black")
| Indicador | Valor |
|---|---|
| Tamaño muestral (n) | 97,708 |
| Mínimo | 1 |
| Máximo | 35 |
| Rango | 34 |
| Media | 23.5815 |
| Mediana | 25 |
| Moda | 34 |
| Varianza (s²) | 74.6058 |
| Desviación estándar (s) | 8.6375 |
| Coef. de variación (CV%) | 36.63% |
| Cuartil 1 (Q1) | 17 |
| Cuartil 3 (Q3) | 31 |
| Rango intercuartílico (IQR) | 14 |
| Asimetría de Pearson | -0.4927 |
| Curtosis | 2.4578 |
La variable Township indica la posición norte-sur de cada arrendamiento petrolífero en Kansas (1 = norte, 35 = sur). Con base en 97,708 registros válidos:
grises <- gray(seq(0.25, 0.80, length.out = nrow(freq_table)))
par(mar = c(5, 6, 6, 2))
barplot(
freq_table$ni,
col = grises, border = "black",
ylim = c(0, max(freq_table$ni) * 1.18),
names.arg = freq_table$Variable,
cex.names = 0.75, las = 1,
main = "", xlab = "", ylab = ""
)
mtext("Frecuencia Absoluta (ni)", side = 2, line = 4.5, cex = 1)
mtext("Township", side = 1, line = 3.5, cex = 1)
mtext(
"Gráfica N°1: Distribución de Frecuencias Absolutas por Township,\narrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 3, cex = 0.9, font = 2
)
par(mar = c(5, 6, 6, 2))
bp2 <- barplot(
freq_table$hi_dec * 100,
col = grises, border = "black",
ylim = c(0, max(freq_table$hi_dec * 100) * 1.20),
names.arg = freq_table$Variable,
cex.names = 0.75, las = 1,
main = "", xlab = "", ylab = ""
)
mtext("Frecuencia Relativa hi (%)", side = 2, line = 4.5, cex = 1)
mtext("Township", side = 1, line = 3.5, cex = 1)
mtext(
"Gráfica N°2: Distribución Porcentual por Township,\narrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 3, cex = 0.9, font = 2
)
text(bp2, freq_table$hi_dec * 100,
labels = sprintf("%.1f%%", freq_table$hi_dec * 100),
pos = 3, cex = 0.6, col = "black")
par(mar = c(5, 5, 6, 2))
boxplot(
x, col = "gray75", border = "black",
horizontal = FALSE, outline = TRUE, pch = 16, cex = 0.6,
main = "", ylab = "", xlab = ""
)
mtext("Township", side = 2, line = 3.5, cex = 1)
mtext(
"Gráfica N°3: Boxplot de la Variable Township,\narrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 3, cex = 0.9, font = 2
)
text(1.3, q1, labels = paste0("Q1 = ", q1), cex = 0.8, pos = 4)
text(1.3, mediana, labels = paste0("Me = ", mediana), cex = 0.8, pos = 4)
text(1.3, q3, labels = paste0("Q3 = ", q3), cex = 0.8, pos = 4)
par(mar = c(5, 7, 6, 2))
plot(
freq_table$Variable, freq_table$Ni_asc,
type = "b", pch = 16, col = "black", lwd = 2,
ylim = c(0, max(freq_table$Ni_asc) * 1.05),
xlab = "", ylab = "", main = "", las = 1
)
grid(col = "gray85", lty = "dotted")
mtext("Frec. Absoluta Acumulada Creciente Ni ↑", side = 2, line = 5, cex = 0.9)
mtext("Township", side = 1, line = 3.5, cex = 1)
mtext(
"Gráfica N°4: Ojiva Creciente de la Variable Township,\narrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 3, cex = 0.9, font = 2
)
par(mar = c(5, 7, 6, 2))
plot(
freq_table$Variable, freq_table$Ni_desc,
type = "b", pch = 16, col = "black", lwd = 2,
ylim = c(0, max(freq_table$Ni_desc) * 1.05),
xlab = "", ylab = "", main = "", las = 1
)
grid(col = "gray85", lty = "dotted")
mtext("Frec. Absoluta Acumulada Decreciente Ni ↓", side = 2, line = 5, cex = 0.9)
mtext("Township", side = 1, line = 3.5, cex = 1)
mtext(
"Gráfica N°5: Ojiva Decreciente de la Variable Township,\narrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 3, cex = 0.9, font = 2
)
Autor: Leslye Quinchiguango — Análisis Estadístico, Kansas Hydrocarbon Leases Dataset