library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
df_parcial <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>%
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
  count(Pipeline.Shutdown, name = "ni")
total_objetivo <- 2795
suma_actual <- sum(df_parcial$ni)
n_restante <- total_objetivo - suma_actual
fila_restante <- data.frame(
  Pipeline.Shutdown = "NO IDENTIFICADOS",
  ni = n_restante
)
df_completo <- rbind(df_parcial, fila_restante) %>%
  mutate(hi_pct = round((ni / sum(ni)) * 100, 2)) %>%
  arrange(desc(ni))
fila_total <- data.frame(
  Pipeline.Shutdown = "TOTAL",
  ni = sum(df_completo$ni), 
  hi_pct = 100.00
)
tabla_final <- rbind(df_completo, fila_total)

kable(tabla_final, 
      digits = 2, 
      col.names = c("Estado", "Frecuencia (ni)", "Porcentaje (%)"),
      align = 'c', 
      caption = "Tabla: Distribución Total de Cierres") %>%
  
  kable_styling(full_width = FALSE, position = "center", 
                bootstrap_options = c("striped", "bordered")) %>%
  row_spec(nrow(tabla_final), bold = TRUE, background = "#f2f2f2") %>%
  row_spec(which(tabla_final$Pipeline.Shutdown == "NO IDENTIFICADOS"), color = "gray", italic = TRUE)
Tabla: Distribución Total de Cierres
Estado Frecuencia (ni) Porcentaje (%)
SI 1395 49.91
NO 1188 42.50
NO IDENTIFICADOS 212 7.58
TOTAL 2795 100.00
#barra local ni
library(ggplot2)
library(dplyr)
datos_grafico <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>% 
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%                  
  
  count(Pipeline.Shutdown, name = "ni")

ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = ni, fill = Pipeline.Shutdown)) +
  geom_bar(stat = "identity", width = 0.6) +
  scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
  
  labs(
    title = "Gráfica 1: Cierres de oleoductos SI/NO",
    x = "Cierre de oleoductos",
    y = "Número de Accidentes"
  ) +
  theme_classic() +
  theme(
    legend.position = "none", 
    axis.text.x = element_text(size = 12, face = "bold"), 
    plot.title = element_text(face = "bold", size = 14)
  )

#barra general
library(ggplot2)
library(dplyr)

datos_grafico <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>% 
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%                  
  count(Pipeline.Shutdown, name = "ni")
ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = ni, fill = Pipeline.Shutdown)) +
  
  geom_bar(stat = "identity", width = 0.6) +
  scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
  scale_y_continuous(limits = c(0, 2795), breaks = c(0, 1000, 2000, 2795)) +
  
  labs(
    title = "Gráfica 2: Cantidad de Cierres de oleoductos en general",
    x = "Cierre de oleoductos",
    y = "Número de Accidentes"
  ) +
  
  theme_classic() +
  
  theme(
    legend.position = "none", 
    axis.text.x = element_text(size = 12, face = "bold"), 
    plot.title = element_text(face = "bold", size = 14)
  )

library(ggplot2)
library(dplyr)
library(ggplot2)
library(dplyr)

datos_grafico <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>% 
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%                  
  
  count(Pipeline.Shutdown, name = "ni") %>%
  mutate(porcentaje = (ni / sum(ni)) * 100)

ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = porcentaje, fill = Pipeline.Shutdown)) +
  
  geom_bar(stat = "identity", width = 0.6) +
  scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
  
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 10)) + 
  
  labs(
    title = "Gráfica 2: Porcentaje de Cierres de oleoductos",
    x = "Cierre de oleoductos",
    y = "Porcentaje (%)"
  ) +
  
  theme_classic() +
  
  theme(
    legend.position = "none", 
    axis.text.x = element_text(size = 12, face = "bold"), 
    plot.title = element_text(face = "bold", size = 14)
  )

#barra_local hi
library(ggplot2)
library(dplyr)

datos_grafico <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>% 
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%                  
  
  count(Pipeline.Shutdown, name = "ni") %>%
  mutate(hi_pct = (ni / sum(ni)) * 100) 

ggplot(datos_grafico, aes(x = Pipeline.Shutdown, y = hi_pct, fill = Pipeline.Shutdown)) +
  
  geom_bar(stat = "identity", width = 0.6) +
  scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
  scale_y_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10)) +
  
  labs(
    title = "Gráfica 4: Cantidad de porcentaje",
    x = "Cierre de oleoductos",
    y = "Porcentaje (%)"
  ) +
  
  theme_classic() +
  
  theme(
    legend.position = "none", 
    axis.text.x = element_text(size = 12, face = "bold"), 
    plot.title = element_text(face = "bold", size = 14)
  )

#diagrama circular
library(ggplot2)
library(dplyr)
df_clean <- datos %>%
  mutate(Pipeline.Shutdown = trimws(toupper(Pipeline.Shutdown)),
         Pipeline.Shutdown = ifelse(Pipeline.Shutdown == "YES", "SI", Pipeline.Shutdown)) %>% 
  filter(Pipeline.Shutdown %in% c("SI", "NO")) %>%
  count(Pipeline.Shutdown, name = "ni") %>%
  mutate(hi_pct = round((ni / sum(ni)) * 100, 1))
ggplot(df_clean, aes(x = "", y = hi_pct, fill = Pipeline.Shutdown)) +
  
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y", start = 0) +

  geom_text(aes(label = paste0(hi_pct, "%")), 
            position = position_stack(vjust = 0.5), 
            color = "white", fontface = "bold", size = 5) +
  scale_fill_manual(values = c("SI" = "skyblue", "NO" = "skyblue")) +
  
  labs(title = "Gráfico 5: Distribución de Cierres de oleoductos", fill = "Estado") +
  theme_void()

variable_limpia <- trimws(toupper(na.omit(datos$Pipeline.Shutdown)))
variable_limpia <- variable_limpia[variable_limpia != ""]

tabla_frecuencia <- table(variable_limpia)

mas_repetido <- names(tabla_frecuencia)[which.max(tabla_frecuencia)]
cantidad <- max(tabla_frecuencia)
total_validos <- sum(tabla_frecuencia)
porcentaje <- round((cantidad / total_validos) * 100, 2)

cat("El valor que más se repite es:", mas_repetido, "\n")
## El valor que más se repite es: YES
cat("Cantidad:", cantidad, "\n")
## Cantidad: 1395
cat("Representa el:", porcentaje, "% de los casos válidos\n")
## Representa el: 54.01 % de los casos válidos
#CONCLUSIONES
#El valor más frecuente de la variable Pipeline Shutdown es ‘YES’, con un total de 1,395 registros. Esto representa aproximadamente el 54.01% de los casos reportados (excluyendo valores nulos), lo que indica que en la mayoría de los incidentes registrados la severidad fue suficiente para requerir el cierre operativo de la tubería..