Cargar datos

setwd("C:/Users/ronal/OneDrive/Desktop") 
datos <- read.csv("database (1).csv", header = TRUE, sep = ",", dec = ".")
str(datos)
## 'data.frame':    2795 obs. of  36 variables:
##  $ Report.Number                       : int  20100016 20100254 20100038 20100260 20100030 20100021 20110036 20100255 20100261 20100024 ...
##  $ Supplemental.Number                 : int  17305 17331 17747 18574 16276 17161 18052 18584 18050 18390 ...
##  $ Accident.Year                       : chr  "2010" "2010" "2010" "2010" ...
##  $ Accident.Date.Time                  : chr  "1/1/2010 7:15" "1/4/2010 8:30" "1/5/2010 10:30" "1/6/2010 19:30" ...
##  $ Operator.ID                         : int  32109 15786 20160 11169 300 11169 26041 12624 26041 31684 ...
##  $ Operator.Name                       : chr  "ONEOK NGL PIPELINE LP" "PORTLAND PIPELINE CORP" "PETROLOGISTICS OLEFINS, LLC" "ENBRIDGE ENERGY, LIMITED PARTNERSHIP" ...
##  $ Pipeline.Facility.Name              : chr  "KINDER MORGAN JCT" "24-INCH MAIN LINE" "" "SUPERIOR TERMINAL" ...
##  $ Pipeline.Location                   : chr  "ONSHORE" "ONSHORE" "ONSHORE" "ONSHORE" ...
##  $ Pipeline.Type                       : chr  "ABOVEGROUND" "ABOVEGROUND" "ABOVEGROUND" "UNDERGROUND" ...
##  $ Liquid.Type                         : chr  "HVL OR OTHER FLAMMABLE OR TOXIC FLUID, GAS" "CRUDE OIL" "HVL OR OTHER FLAMMABLE OR TOXIC FLUID, GAS" "CRUDE OIL" ...
##  $ Liquid.Subtype                      : chr  "LPG (LIQUEFIED PETROLEUM GAS) / NGL (NATURAL GAS LIQUID)" "" "OTHER HVL" "" ...
##  $ Liquid.Name                         : chr  "" "" "ETHANE" "" ...
##  $ Accident.City                       : chr  "MCPHERSON" "RAYMOND" "SULPHER" "SUPERIOR" ...
##  $ Accident.County                     : chr  "MCPHERSON" "CUMBERLAND" "CALCASIEU" "DOUGLAS" ...
##  $ Accident.State                      : chr  "KS" "ME" "LA" "WI" ...
##  $ Accident.Latitude                   : num  38.7 43.9 30.2 46.7 33.6 ...
##  $ Accident.Longitude                  : num  -97.8 -70.5 -93.4 -92.1 -96.6 ...
##  $ Cause.Category                      : chr  "INCORRECT OPERATION" "MATERIAL/WELD/EQUIP FAILURE" "MATERIAL/WELD/EQUIP FAILURE" "NATURAL FORCE DAMAGE" ...
##  $ Cause.Subcategory                   : chr  "PIPELINE/EQUIPMENT OVERPRESSURED" "PUMP OR PUMP-RELATED EQUIPMENT" "DEFECTIVE OR LOOSE TUBING/FITTING" "TEMPERATURE" ...
##  $ Unintentional.Release..Barrels.     : num  21 0.12 2 0.48 700 ...
##  $ Intentional.Release..Barrels.       : num  0.1 0 0 0 NA 0 0 0 0 0 ...
##  $ Liquid.Recovery..Barrels.           : num  0 0.12 0 0.48 698 ...
##  $ Net.Loss..Barrels.                  : num  21 0 2 0 2 ...
##  $ Liquid.Ignition                     : chr  "NO" "NO" "NO" "NO" ...
##  $ Liquid.Explosion                    : chr  "NO" "NO" "NO" "NO" ...
##  $ Pipeline.Shutdown                   : chr  "NO" "" "" "" ...
##  $ Shutdown.Date.Time                  : chr  "" "" "" "" ...
##  $ Restart.Date.Time                   : chr  "" "" "" "" ...
##  $ Public.Evacuations                  : int  NA NA NA NA NA 0 NA NA NA NA ...
##  $ Property.Damage.Costs               : int  110 4000 0 200 20000 76940 0 400 0 0 ...
##  $ Lost.Commodity.Costs                : int  1517 8 200 40 150 167775 400 13 336 50 ...
##  $ Public.Private.Property.Damage.Costs: int  0 0 0 0 0 150000 0 0 0 0 ...
##  $ Emergency.Response.Costs            : int  0 0 0 11300 7500 1800000 0 0 0 10000 ...
##  $ Environmental.Remediation.Costs     : int  0 0 0 0 2000 2000000 70000 0 40000 10000 ...
##  $ Other.Costs                         : int  0 0 0 0 0 0 0 0 0 10000 ...
##  $ All.Costs                           : int  1627 4008 200 11540 29650 4194715 70400 413 40336 30050 ...
cause_Category <- datos$Cause.Category
# 1. Tabla de frecuencia absoluta

freq_abs <- table(cause_Category)

# 2. Convertir a data frame

Tabla <- as.data.frame(freq_abs)

# 3. Renombrar columnas

colnames(Tabla) <- c("x", "ni")

# 4. Frecuencia relativa simple

Tabla$hi <- round(Tabla$ni / sum(Tabla$ni), 4)

# 5. Crear fila TOTAL

fila_total <- data.frame(
x = "TOTAL",
ni = sum(Tabla$ni),
hi = 1.00    # equivalente a 100%
)

# 6. Unir tabla con la fila total

TablaFinal <- rbind(Tabla, fila_total)

# 7. Mostrar tabla final

TablaFinal
##                             x   ni     hi
## 1            ALL OTHER CAUSES  118 0.0422
## 2                   CORROSION  592 0.2118
## 3           EXCAVATION DAMAGE   97 0.0347
## 4         INCORRECT OPERATION  378 0.1352
## 5 MATERIAL/WELD/EQUIP FAILURE 1435 0.5134
## 6        NATURAL FORCE DAMAGE  118 0.0422
## 7  OTHER OUTSIDE FORCE DAMAGE   57 0.0204
## 8                       TOTAL 2795 1.0000
##Gráfica No.1

par(mar = c(8, 4, 4, 2))

bp <- barplot(Tabla$ni,
              col = "#4ECDC4",
              main = "Gráfica No.1: Distribución por Categoría de Causa",
              ylab = "Cantidad",
              ylim = c(0, max(Tabla$ni) * 1.3))

# Etiquetas diagonales
text(x = bp,
     y = -max(Tabla$ni) * 0.08,     # posición debajo de las barras
     labels = Tabla$x,
     srt = 45,                      # inclinación 45°
     adj = 1,
     xpd = TRUE,
     cex = 0.7)

##Gráfica No.2

par(mar = c(8, 4, 4, 2))

bp <- barplot(Tabla$hi,
              col = "#4ECDC4",
              main = "Gráfica No.2:\nDistribución de Cause Category (Frecuencia Relativa)",
              ylab = "Frecuencia Relativa",
              ylim = c(0, max(Tabla$hi) * 1.3))

text(x = bp,
     y = -max(Tabla$hi) * 0.08,
     labels = Tabla$x,
     srt = 45,
     adj = 1,
     xpd = TRUE,
     cex = 0.7)

##Gráfica No.3

par(mar = c(8, 4, 4, 2))

bp <- barplot(Tabla$hi * 100,
              col = "#4ECDC4",
              main = "Gráfica No.3:\nDistribución de Cause Category (Porcentaje)",
              ylab = "Porcentaje (%)",
              ylim = c(0, max(Tabla$hi * 100) * 1.3))

text(x = bp,
     y = -max(Tabla$hi * 100) * 0.08,
     labels = Tabla$x,
     srt = 45,
     adj = 1,
     xpd = TRUE,
     cex = 0.7)

##Gráfica No.4 (circular)
# Márgenes grandes a la derecha para separar la leyenda

par(mar = c(4, 4, 4, 22))

# Colores del pastel

azules <- colorRampPalette(c("#1f77b4", "#d4f1f9"))(nrow(Tabla))

# Diagrama de pastel

pie(
Tabla$hi,
labels = NA,
col = azules,
main = "Gráfica Nº4:\nDistribución de Cause Category (Porcentaje)",
cex = 1.0,
radius = 0.8
)

# Leyenda EXTREMADAMENTE SEPARADA

legend(
x = 1.5,
y = 0.5,
legend = paste0(
Tabla$x, " - ",
round(Tabla$hi * 100, 1), "% (", Tabla$ni, " casos)"
),
fill = azules,
cex = 0.85,
bty = "n",
xpd = TRUE
)