La variable STATE representa el estado administrativo responsable de cada instalación minera. Esta variable es importante porque las emisiones de CO₂, NOx y CH₄ se registran en función del estado al que pertenece administrativamente cada mina.
library(dplyr)
library(gt)
datos <- read.csv2("Dataset.csv")
names(datos)
## [1] "index" "FID"
## [3] "NGAID" "SRCLNKID"
## [5] "METLNKID" "FEATTYPE"
## [7] "SECCLASS" "NAME"
## [9] "AREA_" "PHONE"
## [11] "ADDRESS" "ADDRESS2"
## [13] "CITY" "STATE"
## [15] "ZIP" "ZIPP4"
## [17] "COUNTY" "FIPS"
## [19] "DIRECTIONS" "EMERGTITLE"
## [21] "EMERGPHONE" "EMERGEXT"
## [23] "CONTDATE" "CONTHOW"
## [25] "GEODATE" "GEOHOW"
## [27] "HSIPTHEMES" "NAICSCODE"
## [29] "NAICSDESCR" "GEOLINKID"
## [31] "SOURCE" "x"
## [33] "y" "ST_VENDOR"
## [35] "ST_VERSION" "GEOPREC"
## [37] "PHONELOC" "QC_QA"
## [39] "MINE_ID" "INSPEC_OFF"
## [41] "SIC" "CANVASS"
## [43] "MINE_TYPE" "STAT_CODE"
## [45] "STAT_DATE" "COMPNAME"
## [47] "MINE_PLANT" "MADDRESS"
## [49] "MCITY" "MSTATE"
## [51] "MZIP" "MCOUNTY"
## [53] "SUBUNIT_NU" "SUBUNIT_1"
## [55] "SUBUNIT_2" "SUBUNIT_3"
## [57] "SUBUNIT_4" "CO2.total.emissions..tons..by.state"
## [59] "NOx.total.emissions..tons..by.state" "CH4.total.emissions..tons..by.state"
ESTADO <- trimws(as.character(datos$STATE))
ESTADO <- ESTADO[!is.na(ESTADO) & ESTADO != ""]
n <- length(ESTADO)
data.frame(
Variable = "STATE",
Registros_Validos = n
)
## Variable Registros_Validos
## 1 STATE 2996
region <- character(length(ESTADO))
for(i in seq_along(ESTADO)){
if(ESTADO[i] %in% c("KY","WV","VA","PA","TN")){
region[i] <- "Appalachia"
} else if(ESTADO[i] %in% c("AL","GA","MS","AR","LA","FL","SC","NC","TX","OK")){
region[i] <- "Sur"
} else if(ESTADO[i] %in% c("OH","IN","MI","NY","MD","NJ","CT","MA",
"VT","NH","ME","RI","DE")){
region[i] <- "Noreste"
} else if(ESTADO[i] %in% c("IL","MO","KS","IA","MN","WI",
"ND","SD","NE")){
region[i] <- "Centro"
} else if(ESTADO[i] %in% c("AZ","NM","CO","UT","NV","CA")){
region[i] <- "Suroeste"
} else if(ESTADO[i] %in% c("WA","OR","ID","MT","WY")){
region[i] <- "Noroeste"
} else if(ESTADO[i] %in% c("AK","HI")){
region[i] <- "Alaska/Hawaii"
} else{
region[i] <- "Otros"
}
}
table(region)
## region
## Alaska/Hawaii Appalachia Centro Noreste Noroeste
## 22 1509 148 359 123
## Otros Sur Suroeste
## 3 507 325
TDF <- as.data.frame(table(region)) %>%
rename(Region = region,
fi = Freq) %>%
arrange(desc(fi)) %>%
mutate(
fri = round(fi/n*100,2),
Ni_asc = cumsum(fi),
Ni_des = rev(cumsum(rev(fi))),
Hi_asc = round(cumsum(fri),2),
Hi_des = round(rev(cumsum(rev(fri))),2)
)
TDF_total <- TDF %>%
add_row(
Region = "Total",
fi = sum(TDF$fi),
fri = round(sum(TDF$fri),2),
Ni_asc = max(TDF$Ni_asc),
Ni_des = min(TDF$Ni_des),
Hi_asc = max(TDF$Hi_asc),
Hi_des = min(TDF$Hi_des)
)
TDF_total
## Region fi fri Ni_asc Ni_des Hi_asc Hi_des
## 1 Appalachia 1509 50.37 1509 2996 50.37 100.00
## 2 Sur 507 16.92 2016 1487 67.29 49.63
## 3 Noreste 359 11.98 2375 980 79.27 32.71
## 4 Suroeste 325 10.85 2700 621 90.12 20.73
## 5 Centro 148 4.94 2848 296 95.06 9.88
## 6 Noroeste 123 4.11 2971 148 99.17 4.94
## 7 Alaska/Hawaii 22 0.73 2993 25 99.90 0.83
## 8 Otros 3 0.10 2996 3 100.00 0.10
## 9 Total 2996 100.00 2996 3 100.00 0.10
TDF_total %>%
gt() %>%
tab_header(
title = md("**Tabla 1**"),
subtitle = md("Distribución de frecuencias por región geográfica")
) %>%
cols_label(
Region = "Región",
fi = "fi",
fri = "fri (%)",
Ni_asc = "Ni Asc.",
Ni_des = "Ni Desc.",
Hi_asc = "Hi Asc. %",
Hi_des = "Hi Desc. %"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = Region == "Total")
)
| Tabla 1 | ||||||
| Distribución de frecuencias por región geográfica | ||||||
| Región | fi | fri (%) | Ni Asc. | Ni Desc. | Hi Asc. % | Hi Desc. % |
|---|---|---|---|---|---|---|
| Appalachia | 1509 | 50.37 | 1509 | 2996 | 50.37 | 100.00 |
| Sur | 507 | 16.92 | 2016 | 1487 | 67.29 | 49.63 |
| Noreste | 359 | 11.98 | 2375 | 980 | 79.27 | 32.71 |
| Suroeste | 325 | 10.85 | 2700 | 621 | 90.12 | 20.73 |
| Centro | 148 | 4.94 | 2848 | 296 | 95.06 | 9.88 |
| Noroeste | 123 | 4.11 | 2971 | 148 | 99.17 | 4.94 |
| Alaska/Hawaii | 22 | 0.73 | 2993 | 25 | 99.90 | 0.83 |
| Otros | 3 | 0.10 | 2996 | 3 | 100.00 | 0.10 |
| Total | 2996 | 100.00 | 2996 | 3 | 100.00 | 0.10 |
TDF_graf <- TDF_total %>%
filter(Region != "Total")
barplot(
TDF_graf$fi,
names.arg = TDF_graf$Region,
las = 2,
col = "#C00000",
main = "Frecuencia Absoluta por Región",
ylab = "fi"
)
barplot(
TDF_graf$fri,
names.arg = TDF_graf$Region,
las = 2,
col = "#FF9999",
main = "Frecuencia Relativa por Región",
ylab = "Porcentaje (%)",
ylim = c(0,max(TDF_graf$fri)+5)
)
colores <- c(
"#C00000","#FF9999","#7B0000","#FF6666",
"#FF3333","#CC0000","#FF0000","#FFB3B3"
)
pie(
TDF_graf$fri,
labels = NA,
col = colores[1:nrow(TDF_graf)],
main = "Distribución porcentual por región"
)
legend(
"topright",
legend = paste0(
TDF_graf$Region,
" (",
TDF_graf$fri,
"%)"
),
fill = colores[1:nrow(TDF_graf)],
cex = 0.8
)
moda_idx <- which.max(TDF_graf$fi)
moda <- TDF_graf$Region[moda_idx]
moda_fi <- TDF_graf$fi[moda_idx]
moda_fri <- TDF_graf$fri[moda_idx]
data.frame(
Variable = "Estado administrativo (STATE)",
Indicador = "Moda",
Resultado = paste0(
moda,
" (",
moda_fi,
" instalaciones, ",
moda_fri,
"%)"
)
) %>%
gt() %>%
tab_header(
title = md("**Tabla 2**"),
subtitle = md("Indicadores estadísticos")
)
| Tabla 2 | ||
| Indicadores estadísticos | ||
| Variable | Indicador | Resultado |
|---|---|---|
| Estado administrativo (STATE) | Moda | Appalachia (1509 instalaciones, 50.37%) |
La variable STATE representa el estado administrativo responsable de cada instalación minera. Se analizaron 2996 registros válidos agrupados en ocho regiones geográficas. La región con mayor concentración administrativa es Appalachia, con 1509 instalaciones que representan 50.37% del total. Esta distribución resulta fundamental para estudiar la relación entre la actividad minera y las emisiones de CO₂, NOx y CH₄.