###### Universidad Central del Ecuador######
###### Carrera de Petr??leos  #####
library(readxl)
library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#   VARIABLE 3: LIQUID NAME


setwd("C:/Users/Usuario/Downloads")
datos <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
# Extraer la variable
liquid_name <- datos$`Liquid Name`

# Tablas
tabla_freq_name <- table(liquid_name)
tabla_rel_name <- prop.table(tabla_freq_name)
tabla_porcent_name <- tabla_rel_name * 100

# Unir todo en un solo data frame
tabla_completa_name <- data.frame(
  Liquid_Name   = names(tabla_freq_name),
  Frecuencia    = as.vector(tabla_freq_name),
  Frec_Relativa = round(as.vector(tabla_rel_name), 4),
  Porcentaje    = round(as.vector(tabla_porcent_name), 2)
)

View(tabla_completa_name)

#==========================================
#     G R C F I C A S   
#==========================================

# Frecuencia 
df_freq <- tabla_completa_name %>%
  arrange(desc(Frecuencia)) %>%
  slice(1:10)

ggplot(df_freq,
       aes(x = reorder(Liquid_Name, Frecuencia), y = Frecuencia)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Grafica No.1 : Distribucion de Frecuencia  Liquid Name ",
       x = "Nombre del Liquido",
       y = "Frecuencia") +
  coord_flip() +
  theme_minimal()

# Proportion 
df_rel <- tabla_completa_name %>%
  arrange(desc(Frec_Relativa)) %>%
  slice(1:10)

ggplot(df_rel,
       aes(x = reorder(Liquid_Name, Frec_Relativa), y = Frec_Relativa)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  labs(title = "Grafica No.2:Distribucion de Proporcion  Liquid Name ",
       x = "Nombre del LC-quido",
       y = "ProporciC3n") +
  coord_flip() +
  theme_minimal()

# Porcentaje TOP 10
df_pct <- tabla_completa_name %>%
  arrange(desc(Porcentaje)) %>%
  slice(1:10)

ggplot(df_pct,
       aes(x = reorder(Liquid_Name, Porcentaje), y = Porcentaje)) +
  geom_bar(stat = "identity", fill = "orange") +
  labs(title = " Grafica No.3 : Distribucion dePorcentaje  Liquid Name ",
       x = "Nombre del Liquido",
       y = "Porcentaje (%)") +
  coord_flip() +
  theme_minimal()

# ===============================
# GRAFICA No.4: PASTEL
# ===============================

# TOP 7 para evitar saturacion
df_pastel <- tabla_completa_name %>%
  arrange(desc(Frecuencia)) %>%
  slice(1:7) %>%
  mutate(
    Porcentaje = round(Frecuencia / sum(Frecuencia) * 100, 1),
    Leyenda = paste0(Liquid_Name, " (", Porcentaje, "%)")
  )

ggplot(df_pastel, aes(x = "", y = Frecuencia, fill = Leyenda)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  labs(
    title = "Grafica No.4: Distribucion de Liquid Name",
    fill = "Liquid Name"
  ) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10),
    legend.position = "right"
  )