Para iniciar el procesamiento estadístico, se verifica la estructura global del conjunto de datos correspondiente a los bloques contractuales y arrendamientos de hidrocarburos en el estado de Kansas.
# Selección manual del archivo
datos <- read_csv(file.choose(), show_col_types = FALSE)
cat("Base de datos cargada correctamente.\n")
## Base de datos cargada correctamente.
cat("Total de registros evaluados (filas):", nrow(datos), "\n")
## Total de registros evaluados (filas): 47757
Se extrae la variable cuantitativa discreta Township
(TOWNSHIP), que representa la coordenada norte-sur de la
cuadrícula de arrendamientos de hidrocarburos en Kansas. Sus valores son
enteros entre 1 y 35, tratados como variable continua para el análisis
por intervalos.
township <- datos$TOWNSHIP
township <- township[!is.na(township)]
cat("Total de registros válidos:", length(township), "\n")
## Total de registros válidos: 47757
cat("Valor mínimo:", min(township), "\n")
## Valor mínimo: 1
cat("Valor máximo:", max(township), "\n")
## Valor máximo: 35
Dado el volumen de datos (n = 47 757), se agrupan los valores en 9 intervalos de igual amplitud.
# ── Parámetros ────────────────────────────────────────────────────
n_intervalos <- 9
rango <- max(township) - min(township)
amplitud <- ceiling(rango / n_intervalos)
lim_inf <- min(township)
lim_sup <- lim_inf + amplitud * n_intervalos
cat("Rango:", rango, "\n")
## Rango: 34
cat("Amplitud (c):", amplitud, "\n")
## Amplitud (c): 4
# ── Breaks y etiquetas ────────────────────────────────────────────
breaks <- seq(lim_inf, lim_sup, by = amplitud)
etiquetas <- paste0("[", breaks[-length(breaks)], " - ", breaks[-1], ")")
etiquetas[n_intervalos] <- paste0(
"[", breaks[n_intervalos], " - ", breaks[n_intervalos + 1], "]"
)
# ── Cortes ────────────────────────────────────────────────────────
cortes <- cut(township, breaks = breaks,
include.lowest = TRUE, right = FALSE, labels = etiquetas)
# ── Frecuencias ───────────────────────────────────────────────────
ni <- as.vector(table(cortes))
N <- sum(ni)
hi <- ni / N
hi_p <- hi * 100
Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi_p)
Ni_desc <- N - c(0, cumsum(ni)[-length(ni)])
Hi_desc <- Ni_desc / N * 100
mc <- (breaks[-length(breaks)] + breaks[-1]) / 2
# ── Tabla base ────────────────────────────────────────────────────
tabla_freq <- data.frame(
Intervalo = etiquetas,
`Marca de Clase` = mc,
`ni` = ni,
`hi (%)` = round(hi_p, 2),
`Ni (Asc)` = Ni_asc,
`Hi (Asc)` = round(Hi_asc, 2),
`Ni (Desc)` = Ni_desc,
`Hi (Desc)` = round(Hi_desc, 2),
check.names = FALSE
)
total_fila <- data.frame(
Intervalo = "TOTAL",
`Marca de Clase` = sum(mc),
`ni` = N,
`hi (%)` = round(sum(hi_p), 2),
`Ni (Asc)` = N,
`Hi (Asc)` = round(sum(Hi_asc), 2),
`Ni (Desc)` = sum(Ni_desc),
`Hi (Desc)` = round(sum(Hi_desc), 2),
check.names = FALSE
)
tabla_completa <- bind_rows(tabla_freq, total_fila)
# ── Tabla GT ──────────────────────────────────────────────────────
tabla_completa %>%
gt() %>%
tab_header(
title = md("**Tabla N°1: Distribución de Frecuencias por Intervalos**"),
subtitle = md("*Variable Cuantitativa Discreta tratada como Continua: Township*")
) %>%
cols_label(
Intervalo = md("**Intervalo**"),
`Marca de Clase` = md("**Marca de Clase**"),
`ni` = md("**ni**"),
`hi (%)` = md("**hi (%)**"),
`Ni (Asc)` = md("**Ni (Asc)**"),
`Hi (Asc)` = md("**Hi (Asc) %**"),
`Ni (Desc)` = md("**Ni (Desc)**"),
`Hi (Desc)` = md("**Hi (Desc) %**")
) %>%
tab_style(
style = list(
cell_fill(color = "#2C2C2C"),
cell_text(color = "white", weight = "bold")
),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_fill(color = "#F5F5F5"),
locations = cells_body(rows = seq(1, nrow(tabla_completa), by = 2))
) %>%
tab_style(
style = list(
cell_fill(color = "#D6D6D6"),
cell_text(weight = "bold")
),
locations = cells_body(
rows = Intervalo == "TOTAL",
columns = everything()
)
) %>%
fmt_missing(columns = everything(), missing_text = "\u2014") %>%
tab_source_note(source_note = md("*Autor: Fernando Almeida*")) %>%
tab_options(
table.width = pct(80),
heading.title.font.size = px(16),
heading.subtitle.font.size = px(12),
table.font.size = px(13),
data_row.padding = px(6)
)
| Tabla N°1: Distribución de Frecuencias por Intervalos | |||||||
| Variable Cuantitativa Discreta tratada como Continua: Township | |||||||
| Intervalo | Marca de Clase | ni | hi (%) | Ni (Asc) | Hi (Asc) % | Ni (Desc) | Hi (Desc) % |
|---|---|---|---|---|---|---|---|
| [1 - 5) | 3 | 1299 | 2.72 | 1299 | 2.72 | 47757 | 100.00 |
| [5 - 9) | 7 | 1700 | 3.56 | 2999 | 6.28 | 46458 | 97.28 |
| [9 - 13) | 11 | 3292 | 6.89 | 6291 | 13.17 | 44758 | 93.72 |
| [13 - 17) | 15 | 5190 | 10.87 | 11481 | 24.04 | 41466 | 86.83 |
| [17 - 21) | 19 | 6084 | 12.74 | 17565 | 36.78 | 36276 | 75.96 |
| [21 - 25) | 23 | 5826 | 12.20 | 23391 | 48.98 | 30192 | 63.22 |
| [25 - 29) | 27 | 6449 | 13.50 | 29840 | 62.48 | 24366 | 51.02 |
| [29 - 33) | 31 | 10227 | 21.41 | 40067 | 83.90 | 17917 | 37.52 |
| [33 - 37] | 35 | 7690 | 16.10 | 47757 | 100.00 | 7690 | 16.10 |
| TOTAL | 171 | 47757 | 100.00 | 47757 | 378.35 | 296880 | 621.65 |
| Autor: Fernando Almeida | |||||||
Se presentan cuatro representaciones visuales en escala de grises para analizar la distribución de la variable Township.
par(mar = c(9, 6, 5, 2))
bp1 <- barplot(
tabla_freq$`ni`,
main = "",
xlab = "",
ylab = "",
col = rep(c("gray30", "gray60", "gray85"), length.out = n_intervalos),
ylim = c(0, max(tabla_freq$`ni`) * 1.15),
names.arg = tabla_freq$Intervalo,
cex.names = 0.75,
las = 2
)
text(x = bp1,
y = tabla_freq$`ni`,
labels = formatC(tabla_freq$`ni`, format = "d", big.mark = ","),
pos = 3, cex = 0.75, col = "black", font = 2)
mtext("Frecuencia (ni)", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Township", side = 1, line = 7, cex = 1)
mtext("Gráfica N\u00b01: Distribución de Frecuencias Absolutas por Township",
side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
par(mar = c(5, 6, 6, 2))
# Histograma usando barplot para respetar los mismos breaks
bp_hist <- barplot(
ni,
space = 0,
col = rep(c("gray30", "gray60", "gray85"), length.out = n_intervalos),
border = "white",
ylim = c(0, max(ni) * 1.2),
axes = FALSE,
names.arg = rep("", n_intervalos)
)
# Eje X con etiquetas en los límites de los intervalos
axis(1, at = seq(0, n_intervalos, by = 1), labels = breaks, cex.axis = 0.85)
# Eje Y
axis(2, las = 1, cex.axis = 0.85)
# Polígono de frecuencias (marcas de clase en el centro de cada barra)
x_mc <- seq(0.5, n_intervalos - 0.5, by = 1)
x_poly <- c(x_mc[1] - 1, x_mc, x_mc[length(x_mc)] + 1)
y_poly <- c(0, ni, 0)
lines(x_poly, y_poly, type = "o",
pch = 15, cex = 0.8,
col = "gray10", lty = 1, lwd = 1.6)
# Leyenda
legend("topleft",
inset = c(0.52, 0),
legend = c("Histograma", "Pol\u00edgono de frecuencias"),
fill = c("gray60", NA),
border = c("gray40", NA),
lty = c(NA, 1),
pch = c(NA, 15),
lwd = c(NA, 1.6),
col = c(NA, "gray10"),
cex = 0.82,
bty = "n",
x.intersp = 0.5)
mtext("Frecuencia Absoluta (ni)", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Township", side = 1, line = 3, cex = 1)
mtext("Gr\u00e1fica N\u00b02: Pol\u00edgono de Frecuencias de la Variable Township,",
side = 3, line = 3, adj = 0.5, cex = 0.9, font = 2)
mtext("arrendamientos de hidrocarburos, Kansas, EE.UU.",
side = 3, line = 1.7, adj = 0.5, cex = 0.9, font = 2)
par(mar = c(9, 6, 5, 2))
bp3 <- barplot(
tabla_freq$`hi (%)`,
main = "",
xlab = "",
ylab = "Porcentaje %",
col = rep(c("gray30", "gray60", "gray85"), length.out = n_intervalos),
ylim = c(0, max(tabla_freq$`hi (%)`) * 1.2),
names.arg = tabla_freq$Intervalo,
cex.names = 0.75,
las = 2
)
text(x = bp3,
y = tabla_freq$`hi (%)`,
labels = paste0(tabla_freq$`hi (%)`, "%"),
pos = 3, cex = 0.8, col = "black")
mtext("Township", side = 1, line = 7, cex = 1)
mtext("Gr\u00e1fica N\u00b03: Distribuci\u00f3n Porcentual por Township",
side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
par(mar = c(6, 3, 5, 3))
boxplot(township,
horizontal = TRUE,
col = "gray60",
border = "gray20",
outcol = "gray30",
outpch = 1,
outcex = 0.6,
whisklty = 2,
staplelty = 1,
frame = FALSE,
ylim = c(min(township) - 1, max(township) + 1))
mtext("Township", side = 1, line = 3, cex = 1, font = 1)
mtext("Gráfica N\u00b04: Diagrama de Cajas \u2014 Township",
side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
# Etiquetas encima de la caja para no taparla
stats_bp <- boxplot.stats(township)$stats
etiq_bp <- c(
paste0("Min: ", stats_bp[1]),
paste0("Q1: ", stats_bp[2]),
paste0("Me: ", stats_bp[3]),
paste0("Q3: ", stats_bp[4]),
paste0("Max: ", stats_bp[5])
)
text(x = stats_bp,
y = 1.38,
labels = etiq_bp,
cex = 0.72, col = "gray10", font = 2)
# Líneas punteadas de referencia
segments(x0 = stats_bp, y0 = 1.30,
x1 = stats_bp, y1 = 1.12,
col = "gray50", lty = 3, lwd = 0.8)
par(mar = c(6, 6, 5, 2))
mc_ext <- breaks
Ni_asc_ext <- c(0, cumsum(ni))
Ni_desc_ext <- c(N, N - cumsum(ni))
plot(mc_ext, Ni_asc_ext,
type = "o",
pch = 19, cex = 0.8,
col = "gray20",
lty = 1, lwd = 1.5,
xlab = "", ylab = "",
ylim = c(0, N * 1.05),
xlim = c(breaks[1] - amplitud, breaks[length(breaks)]),
axes = TRUE,
panel.first = grid(col = "gray85", lty = 1))
lines(mc_ext, Ni_desc_ext,
type = "o",
pch = 19, cex = 0.8,
col = "gray60",
lty = 2, lwd = 1.5)
mtext("Frecuencia Acumulada", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Township", side = 1, line = 3.5, cex = 1)
mtext("Gráfica N\u00b05: Ojivas Ascendente y Descendente \u2014 Township",
side = 3, line = 2, adj = 0.5, cex = 0.9, font = 2)
legend("right",
legend = c("Ascendente", "Descendente"),
col = c("gray20", "gray60"),
lty = c(1, 2),
pch = 19,
lwd = 1.5,
cex = 0.85,
bty = "n")
Se calculan todos los indicadores estadísticos de la variable Township, que al ser cuantitativa discreta tratada como continua admite medidas de tendencia central, dispersión y forma.
# ── Tendencia central ─────────────────────────────────────────────
media_agrupada <- sum(mc * ni) / N
media_directa <- mean(township)
mediana_val <- median(township)
idx_modal <- which.max(ni)
L_mo <- breaks[idx_modal]
d1 <- ni[idx_modal] - ifelse(idx_modal > 1, ni[idx_modal - 1], 0)
d2 <- ni[idx_modal] - ifelse(idx_modal < n_intervalos, ni[idx_modal + 1], 0)
moda_val <- L_mo + (d1 / (d1 + d2)) * amplitud
# ── Dispersión ────────────────────────────────────────────────────
varianza_val <- var(township)
desv_val <- sd(township)
rango_val <- max(township) - min(township)
Q1_val <- as.numeric(quantile(township, 0.25))
Q3_val <- as.numeric(quantile(township, 0.75))
RIC_val <- Q3_val - Q1_val
CV_val <- (desv_val / media_directa) * 100
# ── Forma ─────────────────────────────────────────────────────────
asim_val <- skewness(township)
kurt_val <- kurtosis(township)
# ── Tabla de indicadores ──────────────────────────────────────────
tabla_indicadores <- data.frame(
Indicador = c(
"Variable",
"Tipo de variable",
"Rango de valores",
"Media (\u03bc)",
"Mediana (Me)",
"Moda (Mo)",
"Varianza (S\u00b2)",
"Desv. Est\u00e1ndar (S)",
"Rango (R)",
"Q1",
"Q3",
"Rango Intercuart\u00edlico (RIC)",
"Coef. de Variaci\u00f3n (CV%)",
"Asimetr\u00eda (As)",
"Curtosis (K)"
),
Valor = c(
"Township",
"Cuantitativa Discreta (tratada como Continua)",
paste0(min(township), " \u2014 ", max(township)),
sprintf("%.0f", media_directa),
sprintf("%.0f", mediana_val),
sprintf("%.0f [intervalo modal: %s]", moda_val, etiquetas[idx_modal]),
sprintf("%.2f", varianza_val),
sprintf("%.2f", desv_val),
as.character(rango_val),
sprintf("%.0f", Q1_val),
sprintf("%.0f", Q3_val),
sprintf("%.0f", RIC_val),
sprintf("%.2f%%", CV_val),
sprintf("%.2f", asim_val),
sprintf("%.2f", kurt_val)
)
)
tabla_indicadores %>%
gt() %>%
tab_header(
title = md("**Tabla N°2: Indicadores Estad\u00edsticos**"),
subtitle = md("*Variable Cuantitativa Discreta: Township*")
) %>%
cols_label(
Indicador = md("**Indicador**"),
Valor = md("**Valor**")
) %>%
tab_style(
style = list(
cell_fill(color = "#2C2C2C"),
cell_text(color = "white", weight = "bold")
),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_fill(color = "#F5F5F5"),
locations = cells_body(rows = seq(1, nrow(tabla_indicadores), by = 2))
) %>%
tab_style(
style = list(
cell_fill(color = "#D6D6D6"),
cell_text(weight = "bold")
),
locations = cells_body(
rows = Indicador %in% c("Media (\u03bc)", "Mediana (Me)", "Moda (Mo)"),
columns = everything()
)
) %>%
tab_source_note(source_note = md("*Autor: Fernando Almeida*")) %>%
tab_options(
table.width = pct(70),
heading.title.font.size = px(16),
heading.subtitle.font.size = px(12),
table.font.size = px(13),
data_row.padding = px(6)
)
| Tabla N°2: Indicadores Estadísticos | |
| Variable Cuantitativa Discreta: Township | |
| Indicador | Valor |
|---|---|
| Variable | Township |
| Tipo de variable | Cuantitativa Discreta (tratada como Continua) |
| Rango de valores | 1 — 35 |
| Media (μ) | 23 |
| Mediana (Me) | 25 |
| Moda (Mo) | 31 [intervalo modal: [29 - 33)] |
| Varianza (S²) | 75.07 |
| Desv. Estándar (S) | 8.66 |
| Rango (R) | 34 |
| Q1 | 17 |
| Q3 | 31 |
| Rango Intercuartílico (RIC) | 14 |
| Coef. de Variación (CV%) | 37.17% |
| Asimetría (As) | -0.56 |
| Curtosis (K) | -0.66 |
| Autor: Fernando Almeida | |
La variable Township presenta 47,757 registros válidos, con valores enteros entre 1 y 35. La media se ubica en 23.31 y la mediana en 25, lo que indica una distribución con asimetría negativa (cola izquierda más larga). El intervalo con mayor concentración de datos es [29 - 33), con 10,227 registros (21.41% del total). En cuanto a la forma, la distribución es platicúrtica (más aplanada que la normal). El coeficiente de variación de 37.17% indica una dispersión moderada respecto a la media.
Autor: Fernando Almeida