UNIVERSIDAD CENTRAL DEL ECUADOR
Petróleos
Tema: Estadistica inferencial de variables cualitativas
grupo 2
2025-2026
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
##Cargar datos
setwd("C:/Users/ronal/OneDrive/Desktop")
datos <- read.csv("database (1).csv", header = TRUE, sep = ",", dec = ".")
# Extraer variable
pipe_sd <- datos$Pipeline.Shutdown
# Limpiar valores vacíos o NA
pipe_sd <- pipe_sd[pipe_sd != "" & !is.na(pipe_sd)]
# Frecuencia absoluta
freq_SD <- table(pipe_sd)
# Convertir a data frame
Tabla_SD <- as.data.frame(freq_SD)
colnames(Tabla_SD) <- c("x", "ni")
# Frecuencia relativa
Tabla_SD$hi <- round(Tabla_SD$ni / sum(Tabla_SD$ni), 4)
# Fila TOTAL
fila_total_SD <- data.frame(
x = "TOTAL",
ni = sum(Tabla_SD$ni),
hi = 1.00
)
# Tabla final
TablaFinal_SD <- rbind(Tabla_SD, fila_total_SD)
TablaFinal_SD
## x ni hi
## 1 NO 1188 0.4599
## 2 YES 1395 0.5401
## 3 TOTAL 2583 1.0000
#Gráfica No.1
Tabla_SD_graf <- subset(TablaFinal_SD, x != "TOTAL")
par(mar = c(8, 5, 5, 2))
bp <- barplot(
Tabla_SD_graf$ni,
names.arg = rep("", nrow(Tabla_SD_graf)),
col = "#4ECDC4",
main = "Grafica No.1: Frecuencia Absoluta - Pipeline Shutdown",
ylab = "Cantidad",
width = 1.5,
space = 1,
cex.axis = 1.2,
ylim = c(0, max(Tabla_SD_graf$ni) * 1.3)
)
text(
x = bp,
y = -max(Tabla_SD_graf$ni) * 0.10,
labels = Tabla_SD_graf$x,
cex = 1.2,
xpd = TRUE
)
#Gráfica No.2
Tabla_SD_graf <- subset(TablaFinal_SD, x != "TOTAL")
par(mar = c(8, 5, 5, 2))
bp <- barplot(
Tabla_SD_graf$hi,
names.arg = rep("", nrow(Tabla_SD_graf)),
col = "#4ECDC4",
main = "Grafica No.2: Frecuencia Relativa - Pipeline Shutdown",
ylab = "Cantidad",
width = 1.5,
space = 1,
cex.axis = 1.2,
ylim = c(0, max(Tabla_SD_graf$hi) * 1.3)
)
text(
x = bp,
y = -max(Tabla_SD_graf$hi) * 0.08,
labels = Tabla_SD_graf$x,
cex = 1.2,
xpd = TRUE
)
#Gráfica No.3
Tabla_SD_graf <- subset(TablaFinal_SD, x != "TOTAL")
par(mar = c(8, 5, 5, 2))
bp <- barplot(
Tabla_SD_graf$hi * 100,
names.arg = rep("", nrow(Tabla_SD_graf)),
col = "#4ECDC4",
main = "Grafica No.3: Porcentaje - Pipeline Shutdown",
ylab = "Porcentaje (%)",
width = 1.5,
space = 1,
cex.axis = 1.2,
ylim = c(0, max(Tabla_SD_graf$hi * 100) * 1.3)
)
text(
x = bp,
y = -max(Tabla_SD_graf$hi * 100) * 0.08,
labels = Tabla_SD_graf$x,
cex = 1.2,
xpd = TRUE
)
#Gráfica No.4- DIagrama Circular
Tabla_SD_graf <- subset(TablaFinal_SD, x != "TOTAL")
par(mar = c(4, 4, 4, 18))
colores <- colorRampPalette(c("#1f77b4", "#d4f1f9"))(nrow(Tabla_SD_graf))
pie(
Tabla_SD_graf$hi,
labels = NA,
col = colores,
main = "Grafica No.4: Distribucion - Pipeline Shutdown",
cex = 1.2,
radius = 1.0
)
legend(
x = 1.3, y = 0.6,
legend = paste0(
Tabla_SD_graf$x, " - ",
round(Tabla_SD_graf$hi * 100, 1), "% (",
Tabla_SD_graf$ni, " casos)"
),
fill = colores,
cex = 1.1,
bty = "n",
xpd = TRUE
)