En esta sección, nos centraremos en las variables ordinales del estudio, analizando su comportamiento y características a través de la estadística descriptiva. El objetivo es resumir y organizar los datos para comprender mejor la distribución de cada variable, respetando el orden inherente a sus categorías.
Cargar Datos:
setwd("/cloud/project")
datos <- read.csv("MPG_consumo.csv", header = TRUE, sep = "," , dec = ".")
str(datos)
## 'data.frame': 38113 obs. of 81 variables:
## $ Vehicle.ID : int 26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
## $ Year : int 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
## $ Make : chr "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
## $ Model : chr "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
## $ Class : chr "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
## $ Drive : chr "" "" "" "" ...
## $ Transmission : chr "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
## $ Transmission.Descriptor : chr "" "" "" "" ...
## $ Engine.Index : int 9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
## $ Engine.Descriptor : chr "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
## $ Engine.Cylinders : int 6 6 4 4 4 4 6 6 6 6 ...
## $ Engine.Displacement : num 2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
## $ Turbocharger : logi NA NA NA NA NA NA ...
## $ Supercharger : chr "" "" "" "" ...
## $ Fuel.Type : chr "Regular" "Regular" "Regular" "Regular" ...
## $ Fuel.Type.1 : chr "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
## $ Fuel.Type.2 : chr "" "" "" "" ...
## $ City.MPG..FT1. : int 17 17 18 18 18 18 13 13 15 15 ...
## $ Unrounded.City.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT1. : int 24 24 25 25 17 17 13 13 20 19 ...
## $ Unrounded.Highway.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.City.MPG..FT1. : num 21 21 23 23 22 22 16 16 19 19 ...
## $ Unadjusted.Highway.MPG..FT1. : num 34 34 35 35 24 24 18 18 27 26 ...
## $ Unadjusted.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT1. : int 20 20 21 21 17 17 13 13 17 17 ...
## $ Unrounded.Combined.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Combined.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Annual.Fuel.Cost..FT1. : int 1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
## $ Annual.Fuel.Cost..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Gas.Guzzler.Tax : chr "" "" "" "" ...
## $ Save.or.Spend..5.Year. : int -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
## $ Annual.Consumption.in.Barrels..FT1.: num 16.5 16.5 15.7 15.7 19.4 ...
## $ Annual.Consumption.in.Barrels..FT2.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Tailpipe.CO2..FT1. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT1. : num 444 444 423 423 523 ...
## $ Tailpipe.CO2..FT2. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fuel.Economy.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score..Alt.Fuel. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ My.MPG.Data : chr "N" "N" "N" "N" ...
## $ X2D.Passenger.Volume : int 74 74 0 0 0 0 0 0 0 0 ...
## $ X2D.Luggage.Volume : int 7 7 0 0 0 0 0 0 0 0 ...
## $ X4D.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X4D.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Start.Stop.Technology : chr "" "" "" "" ...
## $ Alternative.Fuel.Technology : chr "" "" "" "" ...
## $ Electric.Motor : chr "" "" "" "" ...
## $ Manufacturer.Code : chr "" "" "" "" ...
## $ Gasoline.Electricity.Blended..CD. : chr "False" "False" "False" "False" ...
## $ Vehicle.Charger : chr "" "" "" "" ...
## $ Alternate.Charger : chr "" "" "" "" ...
## $ Hours.to.Charge..120V. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..AC.240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.City.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Highway.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Combined.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT1. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT2. : chr "" "" "" "" ...
## $ City.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## [1] "/cloud/project"
Empezamos el desarrollo para cada variable ordinal:
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: colorspace
barplot(TDFMes,
main = "Gráfica No 1:\nTop 10 Vehicle.ID - Frecuencia absoluta",
xlab = "Vehicle.ID",
ylab = "Frecuencia",
col = terrain.colors(length(TDFMes)),
las = 2,
cex.names = 0.9,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(TDFMes) * 1.2))
porcentajes <- prop.table(TDFMes) * 100
barplot(porcentajes,
main = "Gráfica No 2:\nTop 10 Vehicle.ID - Porcentaje",
xlab = "Vehicle.ID",
ylab = "Porcentaje",
col = rainbow(length(TDFMes)),
las = 2,
cex.names = 0.9,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(porcentajes) * 1.2))
colores <- rainbow_hcl(length(TDFMes))
pie(TDFMes,
main = "Gráfica No 3:\nDistribución de los 10 Vehicle.ID más frecuentes",
radius = 1,
col = colores,
cex = 0.7,
labels = rep("", length(TDFMes)))
legend("topright",
legend = names(TDFMes),
fill = colores,
cex = 0.7)
colores <- divergingx_hcl(length(TDFMes), palette = "Spectral")
porcentajes_label <- paste0(names(TDFMes), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
main = "Gráfica No 4:\nPorcentaje de los 10 Vehicle.ID más frecuentes",
radius = 1,
col = colores,
cex = 0.6,
labels = porcentajes_label)
legend("topright",
legend = names(TDFMes),
fill = colores,
cex = 0.7)
datos <- datos[!is.na(datos$Fuel.Economy.Score), ]
niveles_ordenados <- sort(unique(datos$Fuel.Economy.Score))
datos$Fuel.Economy.Score <- factor(datos$Fuel.Economy.Score,
levels = niveles_ordenados,
ordered = TRUE)
TDF <- table(datos$Fuel.Economy.Score)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
main = "Gráfica No 5:\nDistribución de Fuel Economy Score",
xlab = "Fuel Economy Score",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(TDF) * 1.2))
porcentajes <- prop.table(TDF) * 100
barplot(porcentajes,
main = "Gráfica No 6:\nPorcentaje por Fuel Economy Score",
xlab = "Fuel Economy Score",
ylab = "Porcentaje",
col = rainbow(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(porcentajes) * 1.2))
colores <- rainbow_hcl(length(TDF))
pie(TDF,
main = "Gráfica No 7:\nDistribución de Fuel Economy Score",
radius = 1,
col = colores,
cex = 0.6,
labels = rep("", length(TDF)))
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)
colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
main = "Gráfica No 8:\nPorcentaje por Fuel Economy Score",
radius = 1,
col = colores,
cex = 0.5,
labels = porcentajes_label)
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)
datos <- datos[!is.na(datos$GHG.Score), ]
niveles_ordenados <- sort(unique(datos$GHG.Score))
datos$GHG.Score <- factor(datos$GHG.Score,
levels = niveles_ordenados,
ordered = TRUE)
TDF <- table(datos$GHG.Score)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
main = "Gráfica No 9:\nDistribución de GHG Score",
xlab = "GHG Score",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(TDF) * 1.2))
porcentajes <- prop.table(TDF) * 100
barplot(porcentajes,
main = "Gráfica No 10:\nPorcentaje por GHG Score",
xlab = "GHG Score",
ylab = "Porcentaje",
col = rainbow(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(porcentajes) * 1.2))
colores <- rainbow_hcl(length(TDF))
pie(TDF,
main = "Gráfica No 11:\nDistribución de GHG Score",
radius = 1,
col = colores,
cex = 0.6,
labels = rep("", length(TDF)))
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)
colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
main = "Gráfica No 12:\nPorcentaje por GHG Score",
radius = 1,
col = colores,
cex = 0.5,
labels = porcentajes_label)
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)
datos <- datos[!is.na(datos$GHG.Score..Alt.Fuel.), ]
niveles_ordenados <- sort(unique(datos$GHG.Score..Alt.Fuel.))
datos$GHG.Score..Alt.Fuel. <- factor(datos$GHG.Score..Alt.Fuel.,
levels = niveles_ordenados,
ordered = TRUE)
TDF <- table(datos$GHG.Score..Alt.Fuel.)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
main = "Gráfica No 1:\nDistribución de GHG Score (Alt Fuel)",
xlab = "GHG Score (Alt Fuel)",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(TDF) * 1.2))
porcentajes <- prop.table(TDF) * 100
barplot(porcentajes,
main = "Gráfica No 2:\nPorcentaje por GHG Score (Alt Fuel)",
xlab = "GHG Score (Alt Fuel)",
ylab = "Porcentaje",
col = rainbow(length(TDF)),
las = 2,
cex.names = 0.8,
cex.main = 1.2,
cex.lab = 1.0,
ylim = c(0, max(porcentajes) * 1.2))
colores <- rainbow_hcl(length(TDF))
pie(TDF,
main = "Gráfica No 3:\nDistribución de GHG Score (Alt Fuel)",
radius = 1,
col = colores,
cex = 0.6,
labels = rep("", length(TDF)))
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)
colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
main = "Gráfica No 4:\nPorcentaje por GHG Score (Alt Fuel)",
radius = 1,
col = colores,
cex = 0.5,
labels = porcentajes_label)
legend("topright",
legend = names(TDF),
fill = colores,
cex = 0.6)