library(readxl)
setwd("C:/Users/Usuario/Downloads")
datos <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
# VARIABLE 2: LIQUID SUBTYPE
# ===============================
# Extraer la variable correctamente
liquid_subtype <- datos$`Liquid Subtype`
# Tablas
tabla_freq_sub <- table(liquid_subtype)
View(tabla_freq_sub)
tabla_rel_sub <- prop.table(tabla_freq_sub)
View(tabla_rel_sub)
tabla_porcent_sub <- prop.table(tabla_freq_sub) * 100
View(tabla_porcent_sub)
# Unir en un solo data frame (CORREGIDO)
tabla_completa_sub <- data.frame(
Liquid_Subtype = names(tabla_freq_sub),
Frecuencia = as.vector(tabla_freq_sub),
Frec_Relativa = round(as.vector(tabla_rel_sub), 4),
Porcentaje = round(as.vector(tabla_porcent_sub), 2)
)
View(tabla_completa_sub)
tabla_completa_sub
## Liquid_Subtype Frecuencia
## 1 ANHYDROUS AMMONIA 55
## 2 BIODIESEL 2
## 3 DIESEL, FUEL OIL, KEROSENE, JET FUEL 408
## 4 GASOLINE (NON-ETHANOL) 376
## 5 LPG (LIQUEFIED PETROLEUM GAS) / NGL (NATURAL GAS LIQUID) 188
## 6 MIXTURE OF REFINED PRODUCTS (TRANSMIX OR OTHER MIXTURE) 98
## 7 OTHER 51
## 8 OTHER HVL 171
## Frec_Relativa Porcentaje
## 1 0.0408 4.08
## 2 0.0015 0.15
## 3 0.3024 30.24
## 4 0.2787 27.87
## 5 0.1394 13.94
## 6 0.0726 7.26
## 7 0.0378 3.78
## 8 0.1268 12.68
library(ggplot2)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
tabla_freq_sub_df <- data.frame(
Liquid_Subtype = names(tabla_freq_sub),
Frecuencia = as.vector(tabla_freq_sub)
)
tabla_freq_sub_grouped <- tabla_freq_sub_df %>%
arrange(desc(Frecuencia)) %>%
mutate(Liquid_Subtype = ifelse(row_number() > 10, "OTROS", Liquid_Subtype)) %>%
group_by(Liquid_Subtype) %>%
summarise(Frecuencia = sum(Frecuencia))
ggplot(tabla_freq_sub_grouped,
aes(x = reorder(Liquid_Subtype, Frecuencia), y = Frecuencia)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Frecuencia de Liquid Subtype ",
x = "Subtype",
y = "Frecuencia") +
theme_minimal() +
coord_flip()

tabla_segunda_sub <- data.frame(
Liquid_Subtype = names(tabla_rel_sub),
Frec_Relativa = as.vector(tabla_rel_sub)
)
tabla_segunda_sub_grouped <- tabla_segunda_sub %>%
arrange(desc(Frec_Relativa)) %>%
mutate(Liquid_Subtype = ifelse(row_number() > 10, "OTROS", Liquid_Subtype)) %>%
group_by(Liquid_Subtype) %>%
summarise(Frec_Relativa = sum(Frec_Relativa))
ggplot(tabla_segunda_sub_grouped,
aes(x = reorder(Liquid_Subtype, Frec_Relativa), y = Frec_Relativa)) +
geom_bar(stat = "identity", fill = "lightgreen") +
labs(title = "Frecuencia Relativa de Liquid Subtype ",
x = "Subtype",
y = "Frecuencia Relativa") +
theme_minimal() +
coord_flip()

tabla_por_sub <- data.frame(
Liquid_Subtype = names(tabla_porcent_sub),
Porcentaje = as.vector(tabla_porcent_sub)
)
tabla_por_sub_grouped <- tabla_por_sub %>%
arrange(desc(Porcentaje)) %>%
mutate(Liquid_Subtype = ifelse(row_number() > 10, "OTROS", Liquid_Subtype)) %>%
group_by(Liquid_Subtype) %>%
summarise(Porcentaje = sum(Porcentaje))
ggplot(tabla_por_sub_grouped,
aes(x = reorder(Liquid_Subtype, Porcentaje), y = Porcentaje)) +
geom_bar(stat = "identity", fill = "orange") +
labs(title = "Porcentaje de Liquid Subtype ",
x = "Subtype",
y = "Porcentaje (%)") +
theme_minimal() +
coord_flip()
