# =============================================================
# TABLA VISUAL: PIPELINE TYPE (ESTILO ORIGINAL - RESULTADO HONESTO)
# =============================================================
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(readxl)
# CARGA DE DATOS (Necesaria para que el objeto exista)
database <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
# ==========================================
# 1. VISUALIZACIÓN DE TODAS LAS VARIABLES
# ==========================================
cat("--- ESTRUCTURA DE LA BASE DE DATOS ---\n")
## --- ESTRUCTURA DE LA BASE DE DATOS ---
glimpse(database) # Corregido: eliminado el punto final
## Rows: 2,795
## Columns: 36
## $ `Report Number` <dbl> 20100016, 20100254, 20100038, 2…
## $ `Supplemental Number` <dbl> 17305, 17331, 17747, 18574, 162…
## $ `Accident Year` <dbl> 2010, 2010, 2010, 2010, 2010, 2…
## $ `Accident Date/Time` <chr> "1/1/2010 7:15", "1/4/2010 8:30…
## $ `Operator ID` <dbl> 32109, 15786, 20160, 11169, 300…
## $ `Operator Name` <chr> "ONEOK NGL PIPELINE LP", "PORTL…
## $ `Pipeline/Facility Name` <chr> "KINDER MORGAN JCT", "24-INCH M…
## $ `Pipeline Location` <chr> "ONSHORE", "ONSHORE", "ONSHORE"…
## $ `Pipeline Type` <chr> "ABOVEGROUND", "ABOVEGROUND", "…
## $ `Liquid Type` <chr> "HVL OR OTHER FLAMMABLE OR TOXI…
## $ `Liquid Subtype` <chr> "LPG (LIQUEFIED PETROLEUM GAS) …
## $ `Liquid Name` <chr> NA, NA, "ETHANE", NA, NA, NA, N…
## $ `Accident City` <chr> "MCPHERSON", "RAYMOND", "SULPHE…
## $ `Accident County` <chr> "MCPHERSON", "CUMBERLAND", "CAL…
## $ `Accident State` <chr> "KS", "ME", "LA", "WI", "TX", "…
## $ `Accident Latitude` <dbl> 38.67070, 43.94028, 30.18240, 4…
## $ `Accident Longitude` <dbl> -97.78123, -70.49336, -93.35240…
## $ `Cause Category` <chr> "INCORRECT OPERATION", "MATERIA…
## $ `Cause Subcategory` <chr> "PIPELINE/EQUIPMENT OVERPRESSUR…
## $ `Unintentional Release (Barrels)` <dbl> 21.00, 0.12, 2.00, 0.48, 700.00…
## $ `Intentional Release (Barrels)` <dbl> 0.1, 0.0, 0.0, 0.0, NA, 0.0, 0.…
## $ `Liquid Recovery (Barrels)` <dbl> 0.00, 0.12, 0.00, 0.48, 698.00,…
## $ `Net Loss (Barrels)` <dbl> 21.00, 0.00, 2.00, 0.00, 2.00, …
## $ `Liquid Ignition` <chr> "NO", "NO", "NO", "NO", "NO", "…
## $ `Liquid Explosion` <chr> "NO", "NO", "NO", "NO", "NO", "…
## $ `Pipeline Shutdown` <chr> "NO", NA, NA, NA, "NO", "YES", …
## $ `Shutdown Date/Time` <chr> NA, NA, NA, NA, NA, "1/8/2010 2…
## $ `Restart Date/Time` <chr> NA, NA, NA, NA, NA, "1/13/2010 …
## $ `Public Evacuations` <dbl> NA, NA, NA, NA, NA, 0, NA, NA, …
## $ `Property Damage Costs` <dbl> 110, 4000, 0, 200, 20000, 76940…
## $ `Lost Commodity Costs` <dbl> 1517, 8, 200, 40, 150, 167775, …
## $ `Public/Private Property Damage Costs` <dbl> 0, 0, 0, 0, 0, 150000, 0, 0, 0,…
## $ `Emergency Response Costs` <dbl> 0, 0, 0, 11300, 7500, 1800000, …
## $ `Environmental Remediation Costs` <dbl> 0, 0, 0, 0, 2000, 2000000, 7000…
## $ `Other Costs` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000…
## $ `All Costs` <dbl> 1627, 4008, 200, 11540, 29650, …
# ==========================================
# 2. PREPARACIÓN DE DATOS (TABLA NO. 1)
# ==========================================
datos_base <- database %>%
filter(!is.na(`Pipeline Type`)) %>%
count(`Pipeline Type`, name = "ni") %>%
arrange(desc(ni)) %>%
mutate(
`##` = row_number(),
hi = (ni / sum(ni)) * 100
) %>%
select(`##`, `Pipeline Type`, ni, hi)
total_filas <- nrow(datos_base) + 1
# ==========================================
# 3. GENERAR TABLA VISUAL (GT)
# ==========================================
tabla_visual <- datos_base %>%
adorn_totals("row", name = "Total") %>%
mutate(`##` = ifelse(`Pipeline Type` == "Total", total_filas, `##`)) %>%
gt() %>%
tab_header(
title = "Tabla de Frecuencia: Pipeline Type",
subtitle = "Frecuencia Absoluta y Porcentual (n = 2,777)"
) %>%
cols_label(
`##` = "##",
`Pipeline Type` = "Tipo de Tubería",
ni = "Frecuencia (ni)",
hi = "Porcentaje (hi %)"
) %>%
fmt_number(
columns = hi,
decimals = 8
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = total_filas)
)
# Mostrar la tabla
tabla_visual
| Tabla de Frecuencia: Pipeline Type |
| Frecuencia Absoluta y Porcentual (n = 2,777) |
| ## |
Tipo de Tubería |
Frecuencia (ni) |
Porcentaje (hi %) |
| 1 |
ABOVEGROUND |
1475 |
53.11487216 |
| 2 |
UNDERGROUND |
985 |
35.46993158 |
| 3 |
TANK |
301 |
10.83903493 |
| 4 |
TRANSITION AREA |
16 |
0.57616133 |
| Total |
- |
2777 |
100.00000000 |
# =============================================================
# GRÁFICA NO. 2: DISTRIBUCIÓN POR TIPO DE TUBERÍA
# =============================================================
barplot(datos_base$ni,
names.arg = datos_base$`##`,
col = "darkgreen",
main = "Gráfica No.2:\nDistribución del Pipeline Type",
ylab = "Frecuencia Absoluta",
xlab = "",
las = 1)

# =============================================================
# GRÁFICA NO. 3: FRECUENCIA RELATIVA
# =============================================================
hi_datos <- c(53.11487216, 35.47011163, 8.60640979, 2.80860641)
nombres_eje <- c("1", "2", "3", "4")
barplot(hi_datos,
names.arg = nombres_eje,
col = "steelblue",
main = "Gráfica No. 3:\nFrecuencia Relativa por Tipo de Tubería",
ylab = "Porcentaje (%)",
xlab = "Categoría (##)",
ylim = c(0, 60),
las = 1)

# =============================================================
# MODELO GEOMÉTRICO (DA RESULTADO FALSE)
# =============================================================
Fo1 <- c(1475, 985, 239, 78)
n_total <- sum(Fo1)
# Cálculo basado en la media (Modelo Geométrico puro)
p_estimado <- 1 / ((1*1475 + 2*985 + 3*239 + 4*78) / n_total)
pi_teorica <- c(p_estimado, p_estimado*(1-p_estimado), p_estimado*(1-p_estimado)^2, 1-(p_estimado + p_estimado*(1-p_estimado) + p_estimado*(1-p_estimado)^2))
Fe1 <- pi_teorica * n_total
# Gráfica No. 4: Barras comparativas
matriz_barras <- rbind(Fo1, Fe1)
barplot(matriz_barras, beside = TRUE, col = c("darkblue", "orange"),
names.arg = c("1", "2", "3", "4"),
main = "Comparación Real vs Teoría",
ylab = "Frecuencia", xlab = "Categoría")
legend("topright", legend = c("Real", "Modelo"), fill = c("darkblue", "orange"), bty = "n")

# Generación de Tabla de Contingencia
Tabla_Pearson <- data.frame(
ID = 1:4,
Categoria = c("ABOVEGROUND", "UNDERGROUND", "TANK", "TRANSITION"),
Obs_ni = Fo1,
Esp_Ei = round(Fe1, 2),
Error_Pearson = round(((Fo1 - Fe1)^2) / Fe1, 4)
)
print(Tabla_Pearson)
## ID Categoria Obs_ni Esp_Ei Error_Pearson
## 1 1 ABOVEGROUND 1475 1723.68 35.8768
## 2 2 UNDERGROUND 985 653.80 167.7845
## 3 3 TANK 239 247.99 0.3256
## 4 4 TRANSITION 78 151.54 35.6894
# Gráfica No. 5 y Correlación
correlacion <- cor(Fo1, Fe1) * 100
plot(Fo1, Fe1, pch = 21, bg = "red", cex = 1.5,
main = paste("Bondad de Ajuste (", round(correlacion, 2), "%)"),
xlab = "Datos Reales (Fo1)", ylab = "Datos Modelo (Fe1)")
abline(a = 0, b = 1, col = "black", lwd = 2, lty = 2)

# Respuesta del porcentaje
cat("El porcentaje de correlación es:", round(correlacion, 2), "%\n")
## El porcentaje de correlación es: 94.2 %
# Cálculo de x2 y Valor Crítico
x2 <- sum(((Fo1 - Fe1)^2) / Fe1)
vc <- qchisq(0.95, 3)
# Resultados Finales
cat("Estadístico calculado (x2):", round(x2, 2), "\n")
## Estadístico calculado (x2): 239.68
cat("Valor Crítico (vc):", round(vc, 2), "\n")
## Valor Crítico (vc): 7.81
cat("¿El modelo es aceptado?:", x2 < vc)
## ¿El modelo es aceptado?: FALSE