1 Variables Ordinales

En esta sección, nos centraremos en las variables ordinales del estudio, analizando su comportamiento y características a través de la estadística descriptiva. El objetivo es resumir y organizar los datos para comprender mejor la distribución de cada variable, respetando el orden inherente a sus categorías.

Cargar Datos:

setwd("/cloud/project")
datos <- read.csv("MPG_consumo.csv", header = TRUE, sep = "," , dec = ".")
str(datos)
## 'data.frame':    38113 obs. of  81 variables:
##  $ Vehicle.ID                         : int  26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
##  $ Year                               : int  1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
##  $ Make                               : chr  "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
##  $ Model                              : chr  "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
##  $ Class                              : chr  "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
##  $ Drive                              : chr  "" "" "" "" ...
##  $ Transmission                       : chr  "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
##  $ Transmission.Descriptor            : chr  "" "" "" "" ...
##  $ Engine.Index                       : int  9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
##  $ Engine.Descriptor                  : chr  "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
##  $ Engine.Cylinders                   : int  6 6 4 4 4 4 6 6 6 6 ...
##  $ Engine.Displacement                : num  2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
##  $ Turbocharger                       : logi  NA NA NA NA NA NA ...
##  $ Supercharger                       : chr  "" "" "" "" ...
##  $ Fuel.Type                          : chr  "Regular" "Regular" "Regular" "Regular" ...
##  $ Fuel.Type.1                        : chr  "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
##  $ Fuel.Type.2                        : chr  "" "" "" "" ...
##  $ City.MPG..FT1.                     : int  17 17 18 18 18 18 13 13 15 15 ...
##  $ Unrounded.City.MPG..FT1.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.MPG..FT2.                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.City.MPG..FT2.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Gasoline.Consumption..CD.     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Electricity.Consumption       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Utility.Factor                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT1.                  : int  24 24 25 25 17 17 13 13 20 19 ...
##  $ Unrounded.Highway.MPG..FT1.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT2.                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Highway.MPG..FT2.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Gasoline.Consumption..CD.  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Electricity.Consumption    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Utility.Factor             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.City.MPG..FT1.          : num  21 21 23 23 22 22 16 16 19 19 ...
##  $ Unadjusted.Highway.MPG..FT1.       : num  34 34 35 35 24 24 18 18 27 26 ...
##  $ Unadjusted.City.MPG..FT2.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.Highway.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT1.                 : int  20 20 21 21 17 17 13 13 17 17 ...
##  $ Unrounded.Combined.MPG..FT1.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT2.                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Combined.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Electricity.Consumption   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Gasoline.Consumption..CD. : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Utility.Factor            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Annual.Fuel.Cost..FT1.             : int  1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
##  $ Annual.Fuel.Cost..FT2.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gas.Guzzler.Tax                    : chr  "" "" "" "" ...
##  $ Save.or.Spend..5.Year.             : int  -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
##  $ Annual.Consumption.in.Barrels..FT1.: num  16.5 16.5 15.7 15.7 19.4 ...
##  $ Annual.Consumption.in.Barrels..FT2.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Tailpipe.CO2..FT1.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT1.   : num  444 444 423 423 523 ...
##  $ Tailpipe.CO2..FT2.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT2.   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fuel.Economy.Score                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score                          : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score..Alt.Fuel.               : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ My.MPG.Data                        : chr  "N" "N" "N" "N" ...
##  $ X2D.Passenger.Volume               : int  74 74 0 0 0 0 0 0 0 0 ...
##  $ X2D.Luggage.Volume                 : int  7 7 0 0 0 0 0 0 0 0 ...
##  $ X4D.Passenger.Volume               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X4D.Luggage.Volume                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Passenger.Volume         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Luggage.Volume           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Start.Stop.Technology              : chr  "" "" "" "" ...
##  $ Alternative.Fuel.Technology        : chr  "" "" "" "" ...
##  $ Electric.Motor                     : chr  "" "" "" "" ...
##  $ Manufacturer.Code                  : chr  "" "" "" "" ...
##  $ Gasoline.Electricity.Blended..CD.  : chr  "False" "False" "False" "False" ...
##  $ Vehicle.Charger                    : chr  "" "" "" "" ...
##  $ Alternate.Charger                  : chr  "" "" "" "" ...
##  $ Hours.to.Charge..120V.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..240V.             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..AC.240V.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.City.MPG                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Highway.MPG              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Combined.MPG             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT1.                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Range..FT1.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT1.                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT2.                        : chr  "" "" "" "" ...
##  $ City.Range..FT2.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT2.                : num  0 0 0 0 0 0 0 0 0 0 ...
getwd()
## [1] "/cloud/project"

Empezamos el desarrollo para cada variable ordinal:

2 Vehicle ID

## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: colorspace
barplot(TDFMes,
        main = "Gráfica No 1:\nTop 10 Vehicle.ID - Frecuencia absoluta",
        xlab = "Vehicle.ID",
        ylab = "Frecuencia",
        col = terrain.colors(length(TDFMes)),
        las = 2,
        cex.names = 0.9,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(TDFMes) * 1.2))

porcentajes <- prop.table(TDFMes) * 100  
barplot(porcentajes,
        main = "Gráfica No 2:\nTop 10 Vehicle.ID - Porcentaje",
        xlab = "Vehicle.ID",
        ylab = "Porcentaje",
        col = rainbow(length(TDFMes)),
        las = 2,
        cex.names = 0.9,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(porcentajes) * 1.2))

colores <- rainbow_hcl(length(TDFMes))
pie(TDFMes,
    main = "Gráfica No 3:\nDistribución de los 10 Vehicle.ID más frecuentes",
    radius = 1,
    col = colores,
    cex = 0.7,
    labels = rep("", length(TDFMes)))
legend("topright",
       legend = names(TDFMes),
       fill = colores,
       cex = 0.7)

colores <- divergingx_hcl(length(TDFMes), palette = "Spectral")
porcentajes_label <- paste0(names(TDFMes), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
    main = "Gráfica No 4:\nPorcentaje de los 10 Vehicle.ID más frecuentes",
    radius = 1,
    col = colores,
    cex = 0.6,
    labels = porcentajes_label)
legend("topright",
       legend = names(TDFMes),
       fill = colores,
       cex = 0.7)

3 Fuel Economy Score

datos <- datos[!is.na(datos$Fuel.Economy.Score), ]
niveles_ordenados <- sort(unique(datos$Fuel.Economy.Score))
datos$Fuel.Economy.Score <- factor(datos$Fuel.Economy.Score,
                                   levels = niveles_ordenados,
                                   ordered = TRUE)

TDF <- table(datos$Fuel.Economy.Score)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
        main = "Gráfica No 5:\nDistribución de Fuel Economy Score",
        xlab = "Fuel Economy Score",
        ylab = "Frecuencia",
        col = terrain.colors(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(TDF) * 1.2))

porcentajes <- prop.table(TDF) * 100  
barplot(porcentajes,
        main = "Gráfica No 6:\nPorcentaje por Fuel Economy Score",
        xlab = "Fuel Economy Score",
        ylab = "Porcentaje",
        col = rainbow(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(porcentajes) * 1.2))

colores <- rainbow_hcl(length(TDF))
pie(TDF,
    main = "Gráfica No 7:\nDistribución de Fuel Economy Score",
    radius = 1,
    col = colores,
    cex = 0.6,
    labels = rep("", length(TDF)))

legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)

colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
    main = "Gráfica No 8:\nPorcentaje por Fuel Economy Score",
    radius = 1,
    col = colores,
    cex = 0.5,
    labels = porcentajes_label)
legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)

4 GHG Score

datos <- datos[!is.na(datos$GHG.Score), ]
niveles_ordenados <- sort(unique(datos$GHG.Score))
datos$GHG.Score <- factor(datos$GHG.Score,
                          levels = niveles_ordenados,
                          ordered = TRUE)
TDF <- table(datos$GHG.Score)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
        main = "Gráfica No 9:\nDistribución de GHG Score",
        xlab = "GHG Score",
        ylab = "Frecuencia",
        col = terrain.colors(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(TDF) * 1.2))

porcentajes <- prop.table(TDF) * 100  
barplot(porcentajes,
        main = "Gráfica No 10:\nPorcentaje por GHG Score",
        xlab = "GHG Score",
        ylab = "Porcentaje",
        col = rainbow(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(porcentajes) * 1.2))

colores <- rainbow_hcl(length(TDF))
pie(TDF,
    main = "Gráfica No 11:\nDistribución de GHG Score",
    radius = 1,
    col = colores,
    cex = 0.6,
    labels = rep("", length(TDF)))
legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)

colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
    main = "Gráfica No 12:\nPorcentaje por GHG Score",
    radius = 1,
    col = colores,
    cex = 0.5,
    labels = porcentajes_label)
legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)

5 GHG Score Alt Fuel

datos <- datos[!is.na(datos$GHG.Score..Alt.Fuel.), ]
niveles_ordenados <- sort(unique(datos$GHG.Score..Alt.Fuel.))
datos$GHG.Score..Alt.Fuel. <- factor(datos$GHG.Score..Alt.Fuel.,
                                     levels = niveles_ordenados,
                                     ordered = TRUE)
TDF <- table(datos$GHG.Score..Alt.Fuel.)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
barplot(TDF,
        main = "Gráfica No 1:\nDistribución de GHG Score (Alt Fuel)",
        xlab = "GHG Score (Alt Fuel)",
        ylab = "Frecuencia",
        col = terrain.colors(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(TDF) * 1.2))

porcentajes <- prop.table(TDF) * 100  
barplot(porcentajes,
        main = "Gráfica No 2:\nPorcentaje por GHG Score (Alt Fuel)",
        xlab = "GHG Score (Alt Fuel)",
        ylab = "Porcentaje",
        col = rainbow(length(TDF)),
        las = 2,
        cex.names = 0.8,
        cex.main = 1.2,
        cex.lab = 1.0,
        ylim = c(0, max(porcentajes) * 1.2))

colores <- rainbow_hcl(length(TDF))
pie(TDF,
    main = "Gráfica No 3:\nDistribución de GHG Score (Alt Fuel)",
    radius = 1,
    col = colores,
    cex = 0.6,
    labels = rep("", length(TDF)))
legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)

colores <- divergingx_hcl(length(TDF), palette = "Spectral")
porcentajes_label <- paste0(names(TDF), ": ", round(porcentajes, 1), "%")
pie(porcentajes,
    main = "Gráfica No 4:\nPorcentaje por GHG Score (Alt Fuel)",
    radius = 1,
    col = colores,
    cex = 0.5,
    labels = porcentajes_label)
legend("topright",
       legend = names(TDF),
       fill = colores,
       cex = 0.6)