library(readxl)
setwd("C:/Users/Usuario/Downloads")
datos <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
#   VARIABLE 2: LIQUID SUBTYPE
# ===============================

# Extraer la variable correctamente
liquid_subtype <- datos$`Liquid Subtype`

# Tablas
tabla_freq_sub <- table(liquid_subtype)
View(tabla_freq_sub)

tabla_rel_sub <- prop.table(tabla_freq_sub)
View(tabla_rel_sub)

tabla_porcent_sub <- prop.table(tabla_freq_sub) * 100
View(tabla_porcent_sub)

# Unir en un solo data frame (CORREGIDO)
tabla_completa_sub <- data.frame(
  Liquid_Subtype = names(tabla_freq_sub),
  Frecuencia = as.vector(tabla_freq_sub),
  Frec_Relativa = round(as.vector(tabla_rel_sub), 4),
  Porcentaje = round(as.vector(tabla_porcent_sub), 2)
)

View(tabla_completa_sub)
tabla_completa_sub
##                                             Liquid_Subtype Frecuencia
## 1                                        ANHYDROUS AMMONIA         55
## 2                                                BIODIESEL          2
## 3                     DIESEL, FUEL OIL, KEROSENE, JET FUEL        408
## 4                                   GASOLINE (NON-ETHANOL)        376
## 5 LPG (LIQUEFIED PETROLEUM GAS) / NGL (NATURAL GAS LIQUID)        188
## 6  MIXTURE OF REFINED PRODUCTS (TRANSMIX OR OTHER MIXTURE)         98
## 7                                                    OTHER         51
## 8                                                OTHER HVL        171
##   Frec_Relativa Porcentaje
## 1        0.0408       4.08
## 2        0.0015       0.15
## 3        0.3024      30.24
## 4        0.2787      27.87
## 5        0.1394      13.94
## 6        0.0726       7.26
## 7        0.0378       3.78
## 8        0.1268      12.68
library(ggplot2)

ggplot(data.frame(
  Liquid_Subtype = names(tabla_freq_sub),
  Frecuencia = as.vector(tabla_freq_sub)),
  aes(x = Liquid_Subtype, y = Frecuencia)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Frecuencia de Liquid Subtype",
       x = "Subtype",
       y = "Frecuencia") +
  theme_minimal()

tabla_segunda_sub <- data.frame(
  Liquid_Subtype = names(tabla_rel_sub),
  Frec_Relativa = as.vector(tabla_rel_sub)
)

ggplot(tabla_segunda_sub,
       aes(x = Liquid_Subtype, y = Frec_Relativa)) +
  geom_bar(stat = "identity", fill = "lightgreen") +
  labs(title = "Frecuencia Relativa de Liquid Subtype",
       x = "Subtype",
       y = "Frecuencia Relativa") +
  theme_minimal()

tabla_por_sub <- data.frame(
  Liquid_Subtype = names(tabla_porcent_sub),
  Porcentaje = as.vector(tabla_porcent_sub)
)

ggplot(tabla_por_sub,
       aes(x = Liquid_Subtype, y = Porcentaje)) +
  geom_bar(stat = "identity", fill = "orange") +
  labs(title = "Porcentaje de Liquid Subtype",
       x = "Subtype",
       y = "Porcentaje (%)") +
  theme_minimal()