###### Universidad Central del Ecuador######
###### Carrera de Petróleos  #####

library(readxl)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Cargar datos
library(readr)
datasetf <- read_csv("datasetf.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 2795 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): Accident Date/Time, Operator Name, Pipeline/Facility Name, Pipelin...
## dbl (18): Report Number, Supplemental Number, Accident Year, Operator ID, Ac...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(datasetf)


# Agrupar Pipeline Location por Pipeline Location 
Tabla_location <- datasetf %>%
  count(Location = `Pipeline Location`, name = "ni") %>%
  arrange(desc(ni))

# TDFfrecuencias
Tabla_location$hi <- Tabla_location$ni / sum(Tabla_location$ni)
Tabla_location$hi_porcentaje <- round(Tabla_location$hi * 100, 3)

# Agregar TOTAL
total_fila_location <- data.frame(
  Location = "TOTAL",
  ni = sum(Tabla_location$ni),
  hi = 1,
  hi_porcentaje = 100.000
)

Tabla_completa_location <- rbind(Tabla_location, total_fila_location)
Tabla_final_location <- Tabla_completa_location[, c("Location", "ni", "hi_porcentaje")]
names(Tabla_final_location)[names(Tabla_final_location) == "hi_porcentaje"] <- "hi" #CAMBIO DE NOMBRE 

View(Tabla_final_location)
print(Tabla_final_location)
## # A tibble: 3 × 3
##   Location    ni      hi
##   <chr>    <int>   <dbl>
## 1 ONSHORE   2777  99.4  
## 2 OFFSHORE    18   0.644
## 3 TOTAL     2795 100
# Filtrar datos (sin TOTAL)
datos_grafica_location <- Tabla_final_location[Tabla_final_location$Location != "TOTAL", ]

# Gráfico 1: Barras para ni
barplot(datos_grafica_location$ni, 
        names.arg = datos_grafica_location$Location,
        col = "#4ECDC4",
        main = "Gráfica No.1: Distribución por Ubicación de Pipeline",
        ylab = "Cantidad",
        las = 2,  
        cex.names = 0.7,  
        ylim = c(0, max(datos_grafica_location$ni) * 1.1))

# Gráfico 2: Barras para hi
barplot(datos_grafica_location$hi, 
        names.arg = datos_grafica_location$Location,
        col = "#4ECDC4",
        main = "Gráfica No.2: Distribución por Ubicación de Pipeline",
        ylab = "Cantidad",
        las = 2,
        cex.names = 0.7,
        ylim = c(0, max(datos_grafica_location$hi) * 1.1))

# Gráfico 3: Porcentaje (hi * 100)
barplot(datos_grafica_location$hi * 100, 
        names.arg = datos_grafica_location$Location,
        col = "#4ECDC4", 
        main = "Gráfica Nº3: Distribución por Ubicación de Pipeline", 
        las = 2, 
        cex.names = 0.7,
        ylab = "Cantidad")

# Gráfico 4: Pastel

par(mar = c(2, 2, 2, 12))  # Margen derecho MUY grande para la leyenda
azules_degradado <- c("#1f77b4", "#279ece", "#4fb4d8", "#7bcde8", "#a6e3f7", "#d4f1f9")
# Crear el pastel normal
pie(datos_grafica_location$hi * 100,
    labels = NA,  # Sin etiquetas en el gráfico
    col = azules_degradado,
    main = "Distribución por Tipo de Líquido",
    radius = 1)


# Leyenda externa BIEN POSICIONADA
legend(x = 1.2, y = 1,  
       legend = paste0(datos_grafica_location$Location, " - ",
                       round(datos_grafica_location$hi * 100, 1), "%",
                       " (", datos_grafica_location$ni, " casos)"),
       fill = azules_degradado,
       bty = "t",        
       cex = 0.6,        
       xpd = TRUE)       

# Restaurar márgenes
par(mar = c(5, 4, 4, 2) + 0.1)