#UNIVERSIDAD CENTRAL DEL ECUADOR
#Tema: Estadistica inferencial de variables cualitativas
#grupo 2
#2025-2026
setwd("C:/Users/ronal/OneDrive/Desktop")
datos <- read.csv("database (1).csv", header = TRUE, sep = ",", dec = ".")
str(datos)
## 'data.frame': 2795 obs. of 36 variables:
## $ Report.Number : int 20100016 20100254 20100038 20100260 20100030 20100021 20110036 20100255 20100261 20100024 ...
## $ Supplemental.Number : int 17305 17331 17747 18574 16276 17161 18052 18584 18050 18390 ...
## $ Accident.Year : chr "2010" "2010" "2010" "2010" ...
## $ Accident.Date.Time : chr "1/1/2010 7:15" "1/4/2010 8:30" "1/5/2010 10:30" "1/6/2010 19:30" ...
## $ Operator.ID : int 32109 15786 20160 11169 300 11169 26041 12624 26041 31684 ...
## $ Operator.Name : chr "ONEOK NGL PIPELINE LP" "PORTLAND PIPELINE CORP" "PETROLOGISTICS OLEFINS, LLC" "ENBRIDGE ENERGY, LIMITED PARTNERSHIP" ...
## $ Pipeline.Facility.Name : chr "KINDER MORGAN JCT" "24-INCH MAIN LINE" "" "SUPERIOR TERMINAL" ...
## $ Pipeline.Location : chr "ONSHORE" "ONSHORE" "ONSHORE" "ONSHORE" ...
## $ Pipeline.Type : chr "ABOVEGROUND" "ABOVEGROUND" "ABOVEGROUND" "UNDERGROUND" ...
## $ Liquid.Type : chr "HVL OR OTHER FLAMMABLE OR TOXIC FLUID, GAS" "CRUDE OIL" "HVL OR OTHER FLAMMABLE OR TOXIC FLUID, GAS" "CRUDE OIL" ...
## $ Liquid.Subtype : chr "LPG (LIQUEFIED PETROLEUM GAS) / NGL (NATURAL GAS LIQUID)" "" "OTHER HVL" "" ...
## $ Liquid.Name : chr "" "" "ETHANE" "" ...
## $ Accident.City : chr "MCPHERSON" "RAYMOND" "SULPHER" "SUPERIOR" ...
## $ Accident.County : chr "MCPHERSON" "CUMBERLAND" "CALCASIEU" "DOUGLAS" ...
## $ Accident.State : chr "KS" "ME" "LA" "WI" ...
## $ Accident.Latitude : num 38.7 43.9 30.2 46.7 33.6 ...
## $ Accident.Longitude : num -97.8 -70.5 -93.4 -92.1 -96.6 ...
## $ Cause.Category : chr "INCORRECT OPERATION" "MATERIAL/WELD/EQUIP FAILURE" "MATERIAL/WELD/EQUIP FAILURE" "NATURAL FORCE DAMAGE" ...
## $ Cause.Subcategory : chr "PIPELINE/EQUIPMENT OVERPRESSURED" "PUMP OR PUMP-RELATED EQUIPMENT" "DEFECTIVE OR LOOSE TUBING/FITTING" "TEMPERATURE" ...
## $ Unintentional.Release..Barrels. : num 21 0.12 2 0.48 700 ...
## $ Intentional.Release..Barrels. : num 0.1 0 0 0 NA 0 0 0 0 0 ...
## $ Liquid.Recovery..Barrels. : num 0 0.12 0 0.48 698 ...
## $ Net.Loss..Barrels. : num 21 0 2 0 2 ...
## $ Liquid.Ignition : chr "NO" "NO" "NO" "NO" ...
## $ Liquid.Explosion : chr "NO" "NO" "NO" "NO" ...
## $ Pipeline.Shutdown : chr "NO" "" "" "" ...
## $ Shutdown.Date.Time : chr "" "" "" "" ...
## $ Restart.Date.Time : chr "" "" "" "" ...
## $ Public.Evacuations : int NA NA NA NA NA 0 NA NA NA NA ...
## $ Property.Damage.Costs : int 110 4000 0 200 20000 76940 0 400 0 0 ...
## $ Lost.Commodity.Costs : int 1517 8 200 40 150 167775 400 13 336 50 ...
## $ Public.Private.Property.Damage.Costs: int 0 0 0 0 0 150000 0 0 0 0 ...
## $ Emergency.Response.Costs : int 0 0 0 11300 7500 1800000 0 0 0 10000 ...
## $ Environmental.Remediation.Costs : int 0 0 0 0 2000 2000000 70000 0 40000 10000 ...
## $ Other.Costs : int 0 0 0 0 0 0 0 0 0 10000 ...
## $ All.Costs : int 1627 4008 200 11540 29650 4194715 70400 413 40336 30050 ...
cause_Category <- datos$Cause.Category
# 1. Tabla de frecuencia absoluta
freq_abs <- table(cause_Category)
# 2. Convertir a data frame
Tabla <- as.data.frame(freq_abs)
# 3. Renombrar columnas
colnames(Tabla) <- c("x", "ni")
# 4. Frecuencia relativa simple
Tabla$hi <- round(Tabla$ni / sum(Tabla$ni), 4)
# 5. Crear fila TOTAL
fila_total <- data.frame(
x = "TOTAL",
ni = sum(Tabla$ni),
hi = 1.00 # equivalente a 100%
)
# 6. Unir tabla con la fila total
TablaFinal <- rbind(Tabla, fila_total)
# 7. Mostrar tabla final
TablaFinal
## x ni hi
## 1 ALL OTHER CAUSES 118 0.0422
## 2 CORROSION 592 0.2118
## 3 EXCAVATION DAMAGE 97 0.0347
## 4 INCORRECT OPERATION 378 0.1352
## 5 MATERIAL/WELD/EQUIP FAILURE 1435 0.5134
## 6 NATURAL FORCE DAMAGE 118 0.0422
## 7 OTHER OUTSIDE FORCE DAMAGE 57 0.0204
## 8 TOTAL 2795 1.0000
##Gráfica No.1
par(mar = c(8, 4, 4, 2))
bp <- barplot(Tabla$ni,
col = "#4ECDC4",
main = "Gráfica No.1: Distribución por Categoría de Causa",
ylab = "Cantidad",
ylim = c(0, max(Tabla$ni) * 1.3))
# Etiquetas diagonales
text(x = bp,
y = -max(Tabla$ni) * 0.08, # posición debajo de las barras
labels = Tabla$x,
srt = 45, # inclinación 45°
adj = 1,
xpd = TRUE,
cex = 0.7)
##Gráfica No.2
par(mar = c(8, 4, 4, 2))
bp <- barplot(Tabla$hi,
col = "#4ECDC4",
main = "Gráfica No.2:\nDistribución de Cause Category (Frecuencia Relativa)",
ylab = "Frecuencia Relativa",
ylim = c(0, max(Tabla$hi) * 1.3))
text(x = bp,
y = -max(Tabla$hi) * 0.08,
labels = Tabla$x,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7)
##Gráfica No.3
par(mar = c(8, 4, 4, 2))
bp <- barplot(Tabla$hi * 100,
col = "#4ECDC4",
main = "Gráfica No.3:\nDistribución de Cause Category (Porcentaje)",
ylab = "Porcentaje (%)",
ylim = c(0, max(Tabla$hi * 100) * 1.3))
text(x = bp,
y = -max(Tabla$hi * 100) * 0.08,
labels = Tabla$x,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7)
##Gráfica No.4 (circular)
# Márgenes grandes a la derecha para separar la leyenda
par(mar = c(4, 4, 4, 22))
# Colores del pastel
azules <- colorRampPalette(c("#1f77b4", "#d4f1f9"))(nrow(Tabla))
# Diagrama de pastel
pie(
Tabla$hi,
labels = NA,
col = azules,
main = "Gráfica Nº4:\nDistribución de Cause Category (Porcentaje)",
cex = 1.0,
radius = 0.8
)
# Leyenda EXTREMADAMENTE SEPARADA
legend(
x = 1.5,
y = 0.5,
legend = paste0(
Tabla$x, " - ",
round(Tabla$hi * 100, 1), "% (", Tabla$ni, " casos)"
),
fill = azules,
cex = 0.85,
bty = "n",
xpd = TRUE
)