library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
df_parcial <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni")
total_objetivo <- 2795
suma_actual <- sum(df_parcial$ni)
n_restante <- total_objetivo - suma_actual
fila_restante <- data.frame(
Pipeline.Shutdown = "NO IDENTIFICADOS",
ni = n_restante
)
df_completo <- rbind(df_parcial, fila_restante) %>%
mutate(hi_pct = round((ni / sum(ni)) * 100, 2)) %>%
arrange(desc(ni))
fila_total <- data.frame(
Pipeline.Shutdown = "TOTAL",
ni = sum(df_completo$ni),
hi_pct = 100.00
)
tabla_final <- rbind(df_completo, fila_total)
kable(tabla_final,
digits = 2,
col.names = c("Estado", "Frecuencia (ni)", "Porcentaje (%)"),
align = 'c',
caption = "Tabla: Distribución Total de Cierres") %>%
kable_styling(full_width = FALSE, position = "center",
bootstrap_options = c("striped", "bordered")) %>%
row_spec(nrow(tabla_final), bold = TRUE, background = "#f2f2f2") %>%
row_spec(which(tabla_final$Pipeline.Shutdown == "NO IDENTIFICADOS"), color = "gray", italic = TRUE)
Tabla: Distribución Total de Cierres
|
Estado
|
Frecuencia (ni)
|
Porcentaje (%)
|
|
SI
|
1395
|
49.91
|
|
NO
|
1188
|
42.50
|
|
NO IDENTIFICADOS
|
212
|
7.58
|
|
TOTAL
|
2795
|
100.00
|
#barra local ni
library(ggplot2)
library(dplyr)
datos_grafico <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni")
ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = ni, fill = Pipeline.Shutdown)) +
geom_bar(stat = "identity", width = 0.6) +
scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
labs(
title = "Gráfica 1: Cierres de oleoductos SI/NO",
x = "Cierre de oleoductos",
y = "Número de Accidentes"
) +
theme_classic() +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12, face = "bold"),
plot.title = element_text(face = "bold", size = 14)
)

#barra general
library(ggplot2)
library(dplyr)
datos_grafico <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni")
ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = ni, fill = Pipeline.Shutdown)) +
geom_bar(stat = "identity", width = 0.6) +
scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
scale_y_continuous(limits = c(0, 2795), breaks = c(0, 1000, 2000, 2795)) +
labs(
title = "Gráfica 2: Cantidad de Cierres de oleoductos en general",
x = "Cierre de oleoductos",
y = "Número de Accidentes"
) +
theme_classic() +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12, face = "bold"),
plot.title = element_text(face = "bold", size = 14)
)

library(ggplot2)
library(dplyr)
library(ggplot2)
library(dplyr)
datos_grafico <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni") %>%
mutate(porcentaje = (ni / sum(ni)) * 100)
ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = porcentaje, fill = Pipeline.Shutdown)) +
geom_bar(stat = "identity", width = 0.6) +
scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 10)) +
labs(
title = "Gráfica 2: Porcentaje de Cierres de oleoductos",
x = "Cierre de oleoductos",
y = "Porcentaje (%)"
) +
theme_classic() +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12, face = "bold"),
plot.title = element_text(face = "bold", size = 14)
)

#barra_local hi
library(ggplot2)
library(dplyr)
datos_grafico <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni") %>%
mutate(hi_pct = (ni / sum(ni)) * 100)
ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = hi_pct, fill = Pipeline.Shutdown)) +
geom_bar(stat = "identity", width = 0.6) +
scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
scale_y_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10)) +
labs(
title = "Gráfica 4: Cantidad de porcentaje",
x = "Cierre de oleoductos",
y = "Porcentaje (%)"
) +
theme_classic() +
theme(
legend.position = "none",
axis.text.x = element_text(size = 12, face = "bold"),
plot.title = element_text(face = "bold", size = 14)
)

#diagrama circular
library(ggplot2)
library(dplyr)
df_clean <- datos %>%
mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
count(Pipeline.Shutdown, name = "ni") %>%
mutate(hi_pct = round((ni / sum(ni)) * 100, 1))
ggplot(df_clean, aes(x = "", y = hi_pct, fill = Pipeline.Shutdown)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(hi_pct, "%")),
position = position_stack(vjust = 0.5),
color = "white", fontface = "bold", size = 5) +
scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
labs(title = "Gráfico 5: Distribución de Cierres de oleoductos", fill = "Estado") +
theme_void()

variable_limpia <- trimws(toupper(na.omit(datos$Pipeline.Shutdown)))
variable_limpia <- variable_limpia[variable_limpia != ""]
tabla_frecuencia <- table(variable_limpia)
mas_repetido <- names(tabla_frecuencia)[which.max(tabla_frecuencia)]
cantidad <- max(tabla_frecuencia)
total_validos <- sum(tabla_frecuencia)
porcentaje <- round((cantidad / total_validos) * 100, 2)
cat("El valor que más se repite es:", mas_repetido, "\n")
## El valor que más se repite es: YES
cat("Cantidad:", cantidad, "\n")
## Cantidad: 1395
cat("Representa el:", porcentaje, "% de los casos válidos\n")
## Representa el: 54.01 % de los casos válidos
#CONCLUSIONES
#El valor más frecuente de la variable Pipeline Shutdown es ‘YES’, con un total de 1,395 registros. Esto representa aproximadamente el 54.01% de los casos reportados (excluyendo valores nulos), lo que indica que en la mayoría de los incidentes registrados la severidad fue suficiente para requerir el cierre operativo de la tubería..