xNombre Variable: LON
Tipo: Cuantitativa Continua
Escala: Relación
La longitud geográfica permite identificar la distribución espacial este-oeste de las instalaciones mineras en EE.UU. Su análisis revela en qué regiones geográficas se concentra la actividad minera, complementando el análisis de correlación con las emisiones de CO2, NOx y CH4 por estado.
library(dplyr)
library(gt)
library(e1071)
library(MASS)
datos <- read.csv("~/Estudio/TERCER SEMESTRE/Estadistica/Dataset.csv",
sep = ";", stringsAsFactors = FALSE)
# X viene como texto con coma decimal — convertir a numero
LON <- as.numeric(gsub(",", ".", datos$x))
LON <- LON[!is.na(LON) & LON >= -170 & LON <= -60]
n <- length(LON)
cat("n validos:", n, "\n")## n validos: 2996
## Min: -165.393
## Max: -65.948
k <- ceiling(1 + 3.322 * log10(n))
am <- round((max(LON) - min(LON)) / k, 1)
cat("k:", k, "| am:", am, "\n")## k: 13 | am: 7.6
breaks <- seq(min(LON), min(LON) + k * am, length.out = k + 1)
breaks[length(breaks)] <- max(LON) + 0.001
cat("Breaks OK:", length(breaks), "limites\n")## Breaks OK: 14 limites
k <- ceiling(1 + 3.322 * log10(n))
am <- (max(LON) - min(LON)) / k
am <- round(am, 1)
cat("k (Sturges) :", k, "\n")## k (Sturges) : 13
## Amplitud ajustada: 7.6
# Eliminar valores infinitos o NaN que puedan quedar
LON <- LON[is.finite(LON)]
n <- length(LON)
cat("n despues de limpiar:", n, "\n")## n despues de limpiar: 2996
## Min: -165.393
## Max: -65.948
breaks <- seq(min(LON), min(LON) + k * am, length.out = k + 1)
breaks[length(breaks)] <- max(LON) + 0.001
cat("Límites de clases:\n")## Límites de clases:
## [1] -165.393 -157.793 -150.193 -142.593 -134.993 -127.393 -119.793 -112.193
## [9] -104.593 -96.993 -89.393 -81.793 -74.193 -65.947
cortes <- cut(LON, breaks = breaks, include.lowest = TRUE, right = FALSE)
fi <- as.vector(table(cortes))
fri <- round(fi / n * 100, 2)
Ni <- cumsum(fi)
Nd <- rev(cumsum(rev(fi)))
Hi <- round(Ni / n * 100, 2)
Hd <- round(Nd / n * 100, 2)
mc <- round((breaks[-length(breaks)] + breaks[-1]) / 2, 3)TDF <- data.frame(
Desde = round(breaks[-length(breaks)], 3),
Hasta = round(breaks[-1], 3),
MC = mc,
fi = fi, fri = fri,
Ni = Ni, Nd = Nd,
Hi = Hi, Hd = Hd
)
print(TDF)## Desde Hasta MC fi fri Ni Nd Hi Hd
## 1 -165.393 -157.793 -161.593 4 0.13 4 2996 0.13 100.00
## 2 -157.793 -150.193 -153.993 3 0.10 7 2992 0.23 99.87
## 3 -150.193 -142.593 -146.393 13 0.43 20 2989 0.67 99.77
## 4 -142.593 -134.993 -138.793 1 0.03 21 2976 0.70 99.33
## 5 -134.993 -127.393 -131.193 1 0.03 22 2975 0.73 99.30
## 6 -127.393 -119.793 -123.593 42 1.40 64 2974 2.14 99.27
## 7 -119.793 -112.193 -115.993 175 5.84 239 2932 7.98 97.86
## 8 -112.193 -104.593 -108.393 222 7.41 461 2757 15.39 92.02
## 9 -104.593 -96.993 -100.793 115 3.84 576 2535 19.23 84.61
## 10 -96.993 -89.393 -93.193 219 7.31 795 2420 26.54 80.77
## 11 -89.393 -81.793 -85.593 1200 40.05 1995 2201 66.59 73.46
## 12 -81.793 -74.193 -77.993 904 30.17 2899 1001 96.76 33.41
## 13 -74.193 -65.947 -70.070 97 3.24 2996 97 100.00 3.24
TDF %>%
gt() %>%
tab_header(
title = md("**Tabla 1**"),
subtitle = md("Distribución de frecuencias — Longitud (x)")
) %>%
cols_label(
Desde = "Desde (°)", Hasta = "Hasta (°)", MC = "Marca Clase",
fi = "fi", fri = "fri (%)",
Ni = "Ni Asc.", Nd = "Ni Desc.",
Hi = "Hi Asc. %", Hd = "Hi Desc. %"
) %>%
tab_source_note(md("Fuente: Dataset MSHA — Instalaciones Mineras EE.UU."))| Tabla 1 | ||||||||
| Distribución de frecuencias — Longitud (x) | ||||||||
| Desde (°) | Hasta (°) | Marca Clase | fi | fri (%) | Ni Asc. | Ni Desc. | Hi Asc. % | Hi Desc. % |
|---|---|---|---|---|---|---|---|---|
| -165.393 | -157.793 | -161.593 | 4 | 0.13 | 4 | 2996 | 0.13 | 100.00 |
| -157.793 | -150.193 | -153.993 | 3 | 0.10 | 7 | 2992 | 0.23 | 99.87 |
| -150.193 | -142.593 | -146.393 | 13 | 0.43 | 20 | 2989 | 0.67 | 99.77 |
| -142.593 | -134.993 | -138.793 | 1 | 0.03 | 21 | 2976 | 0.70 | 99.33 |
| -134.993 | -127.393 | -131.193 | 1 | 0.03 | 22 | 2975 | 0.73 | 99.30 |
| -127.393 | -119.793 | -123.593 | 42 | 1.40 | 64 | 2974 | 2.14 | 99.27 |
| -119.793 | -112.193 | -115.993 | 175 | 5.84 | 239 | 2932 | 7.98 | 97.86 |
| -112.193 | -104.593 | -108.393 | 222 | 7.41 | 461 | 2757 | 15.39 | 92.02 |
| -104.593 | -96.993 | -100.793 | 115 | 3.84 | 576 | 2535 | 19.23 | 84.61 |
| -96.993 | -89.393 | -93.193 | 219 | 7.31 | 795 | 2420 | 26.54 | 80.77 |
| -89.393 | -81.793 | -85.593 | 1200 | 40.05 | 1995 | 2201 | 66.59 | 73.46 |
| -81.793 | -74.193 | -77.993 | 904 | 30.17 | 2899 | 1001 | 96.76 | 33.41 |
| -74.193 | -65.947 | -70.070 | 97 | 3.24 | 2996 | 97 | 100.00 | 3.24 |
| Fuente: Dataset MSHA — Instalaciones Mineras EE.UU. | ||||||||
par(mar = c(5, 5, 4, 2))
hist(LON, breaks = breaks, col = "#C00000", border = "white",
main = "Gráfica 1: Frecuencia Absoluta Global — LON",
xlab = "Longitud (°)", ylab = "Frecuencia absoluta")LON_z <- LON[LON >= -108 & LON <= -60]
if (length(LON_z) > 1) {
k_z <- ceiling(1 + 3.322 * log10(length(LON_z)))
br_z <- seq(min(LON_z), max(LON_z), length.out = k_z + 1)
par(mar = c(5, 5, 4, 2))
hist(LON_z, breaks = br_z, col = "#C00000", border = "white",
main = "Gráfica 2: Rango Principal — LON (-108° a -60°)",
xlab = "Longitud (°)", ylab = "Frecuencia absoluta")
}ojiva <- data.frame(mc = mc, Ni = Ni, Nd = Nd)
plot(ojiva$mc, ojiva$Ni, type = "b", col = "black", pch = 16, lwd = 1.5,
main = "Gráfica 5: Ojivas Combinadas — LON",
xlab = "Longitud (°)", ylab = "Frecuencia acumulada")
lines(ojiva$mc, ojiva$Nd, type = "b", col = "#C00000", pch = 16, lwd = 1.5)
legend("right", legend = c("Ascendente","Descendente"),
col = c("black","#C00000"), lwd = 2, pch = 16, bty = "n")boxplot(LON, horizontal = TRUE, col = "#F4CCCC", border = "#7B0000",
main = "Gráfica 6: Diagrama de Caja — LON",
xlab = "Longitud (°)")g1 <- LON[LON >= -165 & LON < -100]
g1_pos <- abs(g1) - 100 + 0.001
lam <- 1 / mean(g1_pos)
k1 <- ceiling(1 + 3.322 * log10(max(length(g1), 2)))
br1 <- seq(min(g1), max(g1), length.out = k1 + 1)
cat("n =", length(g1), "| lambda =", round(lam, 4), "\n")## n = 488 | lambda = 0.0736
par(mar = c(5, 5, 4, 2))
hist(g1, breaks = br1, col = "#FFC7CE", border = "white", freq = FALSE,
main = "Gráfica 7: Modelo Exponencial — LON (-165° a -100°)",
xlab = "Longitud (°)", ylab = "Densidad")
curve(dexp(abs(x) - 100 + 0.001, rate = lam), add = TRUE, col = "red", lwd = 2)
legend("topright", legend = c("Histograma","Modelo exponencial"),
fill = c("#FFC7CE", NA), lty = c(NA,1),
col = c("black","red"), lwd = c(NA,2), border = c("black",NA), bty = "n")g2 <- LON[LON >= -100 & LON < -80]
mu2 <- mean(g2)
sig2 <- sd(g2)
k2 <- ceiling(1 + 3.322 * log10(length(g2)))
br2 <- seq(min(g2), max(g2), length.out = k2 + 1)
cat("n =", length(g2), "| mu =", round(mu2,3), "| sigma =", round(sig2,3), "\n")## n = 1953 | mu = -84.994 | sigma = 4.865
par(mar = c(5, 5, 4, 2))
hist(g2, breaks = br2, col = "#C6EFCE", border = "white", freq = FALSE,
main = "Gráfica 8: Modelo Normal — LON (-100° a -80°)",
xlab = "Longitud (°)", ylab = "Densidad")
curve(dnorm(x, mean = mu2, sd = sig2), add = TRUE, col = "darkgreen", lwd = 2)
legend("topright", legend = c("Histograma","Modelo normal"),
fill = c("#C6EFCE", NA), lty = c(NA,1),
col = c("black","darkgreen"), lwd = c(NA,2), border = c("black",NA), bty = "n")g3 <- LON[LON >= -80 & LON <= -65]
g3_pos <- abs(g3) - 65 + 0.001
k3 <- ceiling(1 + 3.322 * log10(length(g3)))
br3 <- seq(min(g3), max(g3), length.out = k3 + 1)
fg <- fitdistr(g3_pos, "gamma")
sh <- fg$estimate["shape"]
rt <- fg$estimate["rate"]
cat("n =", length(g3), "| shape =", round(sh,4), "| rate =", round(rt,4), "\n")## n = 554 | shape = 15.6911 | rate = 1.3313
par(mar = c(5, 5, 4, 2))
hist(g3, breaks = br3, col = "#DDEBF7", border = "white", freq = FALSE,
main = "Gráfica 9: Modelo Gamma — LON (-80° a -65°)",
xlab = "Longitud (°)", ylab = "Densidad")
x_g3 <- seq(0.001, max(g3_pos), length.out = 200)
lines(-(x_g3 + 65), dgamma(x_g3, shape = sh, rate = rt), col = "blue", lwd = 2)
legend("topright", legend = c("Histograma","Modelo gamma"),
fill = c("#DDEBF7", NA), lty = c(NA,1),
col = c("black","blue"), lwd = c(NA,2), border = c("black",NA), bty = "n")data.frame(
Agrupacion = c("-165° a -100°","-100° a -80°","-80° a -65°"),
n = c(length(g1), length(g2), length(g3)),
Pct = c(round(length(g1)/n*100,1),
round(length(g2)/n*100,1),
round(length(g3)/n*100,1)),
Modelo = c("Exponencial","Normal","Gamma"),
Parametros = c(paste0("lambda=",round(lam,4)),
paste0("mu=",round(mu2,3)," | sigma=",round(sig2,3)),
paste0("shape=",round(sh,3)," | rate=",round(rt,3))),
Region = c("Alaska y oeste lejano",
"Centro y Appalachia",
"Costa atlántica este")
) %>%
gt() %>%
tab_header(
title = md("**Tabla 2**"),
subtitle = md("Resumen de modelos probabilísticos — Longitud (x)")
) %>%
cols_label(
Agrupacion="Agrupación", n="n", Pct="% total",
Modelo="Modelo", Parametros="Parámetros", Region="Región"
) %>%
tab_source_note(md("Fuente: Dataset MSHA — Instalaciones Mineras EE.UU."))| Tabla 2 | |||||
| Resumen de modelos probabilísticos — Longitud (x) | |||||
| Agrupación | n | % total | Modelo | Parámetros | Región |
|---|---|---|---|---|---|
| -165° a -100° | 488 | 16.3 | Exponencial | lambda=0.0736 | Alaska y oeste lejano |
| -100° a -80° | 1953 | 65.2 | Normal | mu=-84.994 | sigma=4.865 | Centro y Appalachia |
| -80° a -65° | 554 | 18.5 | Gamma | shape=15.691 | rate=1.331 | Costa atlántica este |
| Fuente: Dataset MSHA — Instalaciones Mineras EE.UU. | |||||
media <- mean(LON)
mediana <- median(LON)
desv <- sd(LON)
cv <- desv / abs(media) * 100
asim <- skewness(LON)
kurt <- kurtosis(LON)
ic_inf <- media - qt(0.975, n-1) * desv / sqrt(n)
ic_sup <- media + qt(0.975, n-1) * desv / sqrt(n)
data.frame(
Indicador = c("Media (IC 95%)","Mediana","Desv. S",
"CV (%)","Asimetria","Curtosis"),
Resultado = c(
paste0(round(media,3)," [",round(ic_inf,3)," - ",round(ic_sup,3),"]"),
round(mediana,3), round(desv,3),
round(cv,2), round(asim,4), round(kurt,4)
)
) %>%
gt() %>%
tab_header(
title = md("**Tabla 3**"),
subtitle = md("Indicadores estadísticos — LON")
) %>%
cols_label(Indicador="Indicador", Resultado="Resultado") %>%
tab_source_note(md("Fuente: Dataset MSHA — Instalaciones Mineras EE.UU."))| Tabla 3 | |
| Indicadores estadísticos — LON | |
| Indicador | Resultado |
|---|---|
| Media (IC 95%) | -88.16 [-88.624 - -87.695] |
| Mediana | -82.804 |
| Desv. S | 12.973 |
| CV (%) | 14.71 |
| Asimetria | -1.7628 |
| Curtosis | 3.5791 |
| Fuente: Dataset MSHA — Instalaciones Mineras EE.UU. | |
q1 <- quantile(LON, 0.25)
q3 <- quantile(LON, 0.75)
outs <- LON[LON < q1-1.5*(q3-q1) | LON > q3+1.5*(q3-q1)]
cat("Número de valores atípicos:", length(outs), "\n")## Número de valores atípicos: 372
if (length(outs) > 0) {
cat("Mínimo outlier:", round(min(outs),3), "grados\n")
cat("Máximo outlier:", round(max(outs),3), "grados\n")
}## Mínimo outlier: -165.393 grados
## Máximo outlier: -107.536 grados
cat(sprintf(
"La variable LON registra la longitud geografica de cada instalacion
minera. Los valores oscilan entre %.3f° y %.3f°, con media de %.3f°
y mediana de %.3f°.
El analisis por agrupaciones muestra tres zonas geograficas:
- Alaska y oeste (-165° a -100°): %.1f%% de las minas, distribucion
exponencial (baja densidad, datos dispersos).
- Centro y Appalachia (-100° a -80°): %.1f%% de las minas, distribucion
normal (zona de mayor concentracion minera).
- Costa atlantica este (-80° a -65°): %.1f%% de las minas, distribucion
gamma (asimetrica, cola hacia el este).
La mayor concentracion en la zona central confirma que Appalachia es
el nucleo minero de EE.UU., consistente con los estados que reportan
mayores emisiones de CO2, NOx y CH4 en 2018.\n",
min(LON), max(LON), media, mediana,
round(length(g1)/n*100,1),
round(length(g2)/n*100,1),
round(length(g3)/n*100,1)
))## La variable LON registra la longitud geografica de cada instalacion
## minera. Los valores oscilan entre -165.393° y -65.948°, con media de -88.160°
## y mediana de -82.804°.
##
## El analisis por agrupaciones muestra tres zonas geograficas:
## - Alaska y oeste (-165° a -100°): 16.3% de las minas, distribucion
## exponencial (baja densidad, datos dispersos).
## - Centro y Appalachia (-100° a -80°): 65.2% de las minas, distribucion
## normal (zona de mayor concentracion minera).
## - Costa atlantica este (-80° a -65°): 18.5% de las minas, distribucion
## gamma (asimetrica, cola hacia el este).
##
## La mayor concentracion en la zona central confirma que Appalachia es
## el nucleo minero de EE.UU., consistente con los estados que reportan
## mayores emisiones de CO2, NOx y CH4 en 2018.