###UNIVERSIDAD CENTRAL DEL ECUADOR ###
##Petróleos##
#Tema: Estadistica inferencial de variables cualitativas
#grupo 2
#2025-2026
##Cargar LibrerÃa
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
## cargar datos
setwd("C:/Users/ronal/OneDrive/Desktop")
datos <- read.csv("database (1).csv", header = TRUE, sep = ",", dec = ".")
# Variable: Cause Sub Category
cause_sub <- datos$Cause.Subcategory
# Frecuencia absoluta
freq_abs2 <- table(cause_sub)
# Convertir a data frame
Tabla2 <- as.data.frame(freq_abs2)
colnames(Tabla2) <- c("x", "ni")
# Frecuencia relativa
Tabla2$hi <- round(Tabla2$ni / sum(Tabla2$ni), 4)
# Fila TOTAL
fila_total2 <- data.frame(
x = "TOTAL",
ni = sum(Tabla2$ni),
hi = 1.00
)
# Tabla final
TablaFinal2 <- rbind(Tabla2, fila_total2)
TablaFinal2
## x ni hi
## 1 CONSTRUCTION, INSTALLATION OR FABRICATION-RELATED 112 0.0401
## 2 DAMAGE BY OPERATOR OR OPERATOR'S CONTRACTOR 20 0.0072
## 3 DEFECTIVE OR LOOSE TUBING/FITTING 62 0.0222
## 4 EARTH MOVEMENT 8 0.0029
## 5 ELECTRICAL ARCING FROM OTHER EQUIPMENT/FACILITY 11 0.0039
## 6 ENVIRONMENTAL CRACKING-RELATED 16 0.0057
## 7 EXTERNAL 230 0.0823
## 8 FAILURE OF EQUIPMENT BODY 67 0.0240
## 9 FIRE/EXPLOSION AS PRIMARY CAUSE 4 0.0014
## 10 FISHING OR MARITIME ACTIVITY 1 0.0004
## 11 HEAVY RAINS/FLOODS 25 0.0089
## 12 HIGH WINDS 1 0.0004
## 13 INCORRECT EQUIPMENT 10 0.0036
## 14 INCORRECT INSTALLATION 72 0.0258
## 15 INCORRECT VALVE POSITION 84 0.0301
## 16 INTENTIONAL DAMAGE 3 0.0011
## 17 INTERNAL 362 0.1295
## 18 LIGHTNING 19 0.0068
## 19 MALFUNCTION OF CONTROL/RELIEF EQUIPMENT 171 0.0612
## 20 MANUFACTURING-RELATED 70 0.0250
## 21 MARITIME EQUIPMENT OR VESSEL ADRIFT 1 0.0004
## 22 MISCELLANEOUS 76 0.0272
## 23 NON-THREADED CONNECTION FAILURE 286 0.1023
## 24 OPERATOR/CONTRACTOR EXCAVATION DAMAGE 28 0.0100
## 25 OTHER EQUIPMENT FAILURE 204 0.0730
## 26 OTHER INCORRECT OPERATION 86 0.0308
## 27 OTHER NATURAL FORCE DAMAGE 8 0.0029
## 28 OTHER OUTSIDE FORCE DAMAGE 15 0.0054
## 29 OVERFILL/OVERFLOW OF TANK/VESSEL/SUMP 69 0.0247
## 30 PIPELINE/EQUIPMENT OVERPRESSURED 37 0.0132
## 31 PREVIOUS DAMAGE DUE TO EXCAVATION 12 0.0043
## 32 PREVIOUS MECHANICAL DAMAGE 1 0.0004
## 33 PUMP OR PUMP-RELATED EQUIPMENT 296 0.1059
## 34 TEMPERATURE 57 0.0204
## 35 THIRD PARTY EXCAVATION DAMAGE 57 0.0204
## 36 THREADED CONNECTION/COUPLING FAILURE 151 0.0540
## 37 UNKNOWN 42 0.0150
## 38 VEHICLE NOT ENGAGED IN EXCAVATION 21 0.0075
## 39 TOTAL 2795 1.0000
##Agruparlas subcategorias menos importantes en "OTRAS"###
top_n <- 7
orden <- sort(table(cause_sub), decreasing = TRUE)
principales <- names(orden)[1:top_n]
cause_sub_grouped <- ifelse(cause_sub %in% principales, cause_sub, "OTRAS")
freq_abs2 <- table(cause_sub_grouped)
Tabla2 <- as.data.frame(freq_abs2)
colnames(Tabla2) <- c("x", "ni")
# Ordenar
Tabla2 <- Tabla2[order(Tabla2$x == "OTRAS"), ]
# Frecuencia relativa
Tabla2$hi <- round(Tabla2$ni / sum(Tabla2$ni), 4)
# Fila total
fila_total2 <- data.frame(
x = "TOTAL",
ni = sum(Tabla2$ni),
hi = 1.00
)
TablaFinal2 <- rbind(Tabla2, fila_total2)
TablaFinal2
## x ni hi
## 1 EXTERNAL 230 0.0823
## 2 INTERNAL 362 0.1295
## 3 MALFUNCTION OF CONTROL/RELIEF EQUIPMENT 171 0.0612
## 4 NON-THREADED CONNECTION FAILURE 286 0.1023
## 5 OTHER EQUIPMENT FAILURE 204 0.0730
## 7 PUMP OR PUMP-RELATED EQUIPMENT 296 0.1059
## 8 THREADED CONNECTION/COUPLING FAILURE 151 0.0540
## 6 OTRAS 1095 0.3918
## 11 TOTAL 2795 1.0000
Tabla2_graf <- subset(TablaFinal2, x != "TOTAL")
##Gráfica No.1
# Asegurar tabla sin TOTAL
Tabla2_graf <- subset(TablaFinal2, x != "TOTAL")
# Crear grafica con espacio para las etiquetas
par(mar = c(10, 4, 4, 2))
# Crear barplot y guardar posiciones en bp
bp <- barplot(
Tabla2_graf$ni,
names.arg = rep("", nrow(Tabla2_graf)),
col = "#4ECDC4",
main = "Grafica No.1: Distribucion por Categoria de Causa",
ylab = "Cantidad",
las = 1,
cex.axis = 0.9,
width = 1.2,
space = 0.4,
ylim = c(0, max(Tabla2_graf$ni) * 1.25)
)
# Agregar etiquetas diagonales
text(
x = bp,
y = -max(Tabla2_graf$ni) * 0.07,
labels = Tabla2_graf$x,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7,
col = "black"
)
##Gráfica No.2
par(mar = c(10, 4, 4, 2))
bp <- barplot(
Tabla2_graf$hi,
col = "#4ECDC4",
main = "Grafica No.2: Frecuencia Relativa SubCause Category",
ylab = "Frecuencia Relativa",
ylim = c(0, max(Tabla2_graf$hi) * 1.3),
las = 1
)
text(
x = bp,
y = -max(Tabla2_graf$hi) * 0.07,
labels = Tabla2_graf$x,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7
)
##Gráfica No.3
par(mar = c(10, 4, 4, 2))
bp <- barplot(
Tabla2_graf$hi * 100,
col = "#4ECDC4",
main = "Grafica No.3: Porcentaje SubCause Category",
ylab = "Porcentaje (%)",
ylim = c(0, max(Tabla2_graf$hi * 100) * 1.3),
las = 1
)
text(
x = bp,
y = -max(Tabla2_graf$hi * 100) * 0.08,
labels = Tabla2_graf$x,
srt = 45,
adj = 1,
xpd = TRUE,
cex = 0.7
)
##Gráfica No.4
par(mar = c(4, 4, 4, 22))
azules <- colorRampPalette(c("#1f77b4", "#d4f1f9"))(nrow(Tabla2_graf))
pie(
Tabla2_graf$hi,
labels = NA,
col = azules,
main = "Grafica No.4: Distribucion de SubCause Category (Porcentaje)",
cex = 1.0,
radius = 1.0
)
legend(
x = 1.5,
y = 0.5,
legend = paste0(
Tabla2_graf$x, " - ",
round(Tabla2_graf$hi * 100, 1), "% (", Tabla2_graf$ni, " casos)"
),
fill = azules,
cex = 0.85,
bty = "n",
xpd = TRUE
)