1 Variables Discretas

En esta sección, nos enfocaremos en las variables discretas del estudio, analizando su comportamiento y características a través de la estadística descriptiva. El objetivo es resumir y organizar los datos para comprender mejor la distribución de cada variable.

Cargar Datos

setwd("/cloud/project")
datos <- read.csv("MPG_consumo.csv", header = TRUE, sep = "," , dec = ".")
str(datos)
## 'data.frame':    38113 obs. of  81 variables:
##  $ Vehicle.ID                         : int  26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
##  $ Year                               : int  1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
##  $ Make                               : chr  "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
##  $ Model                              : chr  "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
##  $ Class                              : chr  "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
##  $ Drive                              : chr  "" "" "" "" ...
##  $ Transmission                       : chr  "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
##  $ Transmission.Descriptor            : chr  "" "" "" "" ...
##  $ Engine.Index                       : int  9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
##  $ Engine.Descriptor                  : chr  "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
##  $ Engine.Cylinders                   : int  6 6 4 4 4 4 6 6 6 6 ...
##  $ Engine.Displacement                : num  2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
##  $ Turbocharger                       : logi  NA NA NA NA NA NA ...
##  $ Supercharger                       : chr  "" "" "" "" ...
##  $ Fuel.Type                          : chr  "Regular" "Regular" "Regular" "Regular" ...
##  $ Fuel.Type.1                        : chr  "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
##  $ Fuel.Type.2                        : chr  "" "" "" "" ...
##  $ City.MPG..FT1.                     : int  17 17 18 18 18 18 13 13 15 15 ...
##  $ Unrounded.City.MPG..FT1.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.MPG..FT2.                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.City.MPG..FT2.           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Gasoline.Consumption..CD.     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Electricity.Consumption       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Utility.Factor                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT1.                  : int  24 24 25 25 17 17 13 13 20 19 ...
##  $ Unrounded.Highway.MPG..FT1.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.MPG..FT2.                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Highway.MPG..FT2.        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Gasoline.Consumption..CD.  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Electricity.Consumption    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Utility.Factor             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.City.MPG..FT1.          : num  21 21 23 23 22 22 16 16 19 19 ...
##  $ Unadjusted.Highway.MPG..FT1.       : num  34 34 35 35 24 24 18 18 27 26 ...
##  $ Unadjusted.City.MPG..FT2.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unadjusted.Highway.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT1.                 : int  20 20 21 21 17 17 13 13 17 17 ...
##  $ Unrounded.Combined.MPG..FT1.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.MPG..FT2.                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Unrounded.Combined.MPG..FT2.       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Electricity.Consumption   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Gasoline.Consumption..CD. : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined.Utility.Factor            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Annual.Fuel.Cost..FT1.             : int  1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
##  $ Annual.Fuel.Cost..FT2.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gas.Guzzler.Tax                    : chr  "" "" "" "" ...
##  $ Save.or.Spend..5.Year.             : int  -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
##  $ Annual.Consumption.in.Barrels..FT1.: num  16.5 16.5 15.7 15.7 19.4 ...
##  $ Annual.Consumption.in.Barrels..FT2.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Tailpipe.CO2..FT1.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT1.   : num  444 444 423 423 523 ...
##  $ Tailpipe.CO2..FT2.                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Tailpipe.CO2.in.Grams.Mile..FT2.   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fuel.Economy.Score                 : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score                          : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ GHG.Score..Alt.Fuel.               : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ My.MPG.Data                        : chr  "N" "N" "N" "N" ...
##  $ X2D.Passenger.Volume               : int  74 74 0 0 0 0 0 0 0 0 ...
##  $ X2D.Luggage.Volume                 : int  7 7 0 0 0 0 0 0 0 0 ...
##  $ X4D.Passenger.Volume               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X4D.Luggage.Volume                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Passenger.Volume         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hatchback.Luggage.Volume           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Start.Stop.Technology              : chr  "" "" "" "" ...
##  $ Alternative.Fuel.Technology        : chr  "" "" "" "" ...
##  $ Electric.Motor                     : chr  "" "" "" "" ...
##  $ Manufacturer.Code                  : chr  "" "" "" "" ...
##  $ Gasoline.Electricity.Blended..CD.  : chr  "False" "False" "False" "False" ...
##  $ Vehicle.Charger                    : chr  "" "" "" "" ...
##  $ Alternate.Charger                  : chr  "" "" "" "" ...
##  $ Hours.to.Charge..120V.             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..240V.             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Hours.to.Charge..AC.240V.          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.City.MPG                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Highway.MPG              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Composite.Combined.MPG             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT1.                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ City.Range..FT1.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT1.                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Range..FT2.                        : chr  "" "" "" "" ...
##  $ City.Range..FT2.                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Highway.Range..FT2.                : num  0 0 0 0 0 0 0 0 0 0 ...

2 Engine Cylinders

datos <- datos[!is.na(datos$Engine.Cylinders), ]
datos <- datos[datos$Engine.Cylinders != 0, ]
datos$Engine.Cylinders <- factor(datos$Engine.Cylinders,
                                 levels = sort(unique(datos$Engine.Cylinders)))
var_engine <- as.numeric(as.character(datos$Engine.Cylinders))
TDF <- table(var_engine)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
##   var_engine  Freq           hi     hi_porc
## 1          2    55 0.0014482450  0.14482450
## 2          3   213 0.0056086579  0.56086579
## 3          4 14598 0.3843905522 38.43905522
## 4          5   766 0.0201701030  2.01701030
## 5          6 13268 0.3493693551 34.93693551
## 6          8  8342 0.2196592675 21.96592675
## 7         10   153 0.0040287542  0.40287542
## 8         12   574 0.0151144114  1.51144114
## 9         16     8 0.0002106538  0.02106538
if (length(TDF) > 0 && all(is.finite(TDF))) {
  barplot(TDF,
          main = "Grafica No.1:\nHistograma de Distribución de Engine Cylinders",
          xlab = "Número de cilindros",
          ylab = "Frecuencia",
          col = terrain.colors(length(TDF)),
          las = 1,
          cex.names = 0.8,
          ylim = c(0, max(TDF) * 1.2))
} else {
  cat("Advertencia: No hay datos válidos para graficar Engine Cylinders.\n")
}

ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
  plot(MC, Ni_asc,
       type = "o", col = "blue", pch = 16,
       main = "Grafica No.2:\nOjivas de Engine Cylinders",
       xlab = "Número de cilindros",
       ylab = "Frecuencia acumulada",
       ylim = c(0, max(Ni_asc) * 1.1))

  lines(MC, Ni_desc,
        type = "o", col = "red", pch = 17)

  legend("topleft",
         legend = c("Ojiva Ascendente", "Ojiva Descendente"),
         col = c("blue", "red"),
         pch = c(16,17),
         lty = 1)
} else {
  cat("Advertencia: No hay datos válidos para ojivas de Engine Cylinders.\n")
}

if (length(var_engine) > 0 && all(is.finite(var_engine))) {
  boxplot(var_engine,
          horizontal = TRUE,
          col = "orange",
          main = "Grafica No.3:\nDiagrama de Caja Engine Cylinders",
          xlab = "Número de cilindros")
} else {
  cat("Advertencia: No hay datos válidos para boxplot de Engine Cylinders.\n")
}

3 Year

datos <- datos[!is.na(datos$Year), ]
n <- length(datos$Year)
k <- ceiling(1 + 3.322 * log10(n))
intervalos <- cut(datos$Year, breaks = k, right = FALSE, include.lowest = TRUE)
TDF <- table(intervalos)
tabla <- as.data.frame(TDF)
limites <- do.call(rbind, strsplit(gsub("\\[|\\)|\\]", "", levels(intervalos)), ","))
lim_inf <- as.numeric(limites[,1])
lim_sup <- as.numeric(limites[,2])
MC <- (lim_inf + lim_sup) / 2
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(Intervalo = levels(intervalos), MC = MC, Frecuencia = tabla$Freq, hi = hi, hi_porc = hi_porc)
print(tabla)
##      Intervalo     MC Frecuencia         hi  hi_porc
## 1  [1984,1986) 1985.0       3663 0.09645312 9.645312
## 2  [1986,1988) 1987.0       2456 0.06467072 6.467072
## 3  [1988,1990) 1989.0       2283 0.06011533 6.011533
## 4  [1990,1992) 1991.0       2210 0.05819312 5.819312
## 5  [1992,1994) 1993.0       2214 0.05829844 5.829844
## 6  [1994,1996) 1995.0       1949 0.05132054 5.132054
## 7  [1996,1998) 1997.0       1535 0.04041920 4.041920
## 8  [1998,2000) 1999.0       1654 0.04355268 4.355268
## 9  [2000,2001) 2000.5       1742 0.04586987 4.586987
## 10 [2001,2003) 2002.0       2016 0.05308476 5.308476
## 11 [2003,2005) 2004.0       2288 0.06024699 6.024699
## 12 [2005,2007) 2006.0       2230 0.05871975 5.871975
## 13 [2007,2009) 2008.0       2364 0.06224820 6.224820
## 14 [2009,2011) 2010.0       2217 0.05837744 5.837744
## 15 [2011,2013) 2012.0       2288 0.06024699 6.024699
## 16 [2013,2015) 2014.0       2440 0.06424941 6.424941
## 17 [2015,2017] 2016.0       2428 0.06393343 6.393343
if (length(TDF) > 0 && all(is.finite(TDF))) {
  barplot(TDF,
          main = "Grafica No.4:\nDistribución de Year",
          xlab = "Año",
          ylab = "Frecuencia",
          col = terrain.colors(length(TDF)),
          las = 2,
          cex.names = 0.8,
          ylim = c(0, max(TDF) * 1.2))
} else {
  cat("Advertencia: No hay datos válidos para graficar Year.\n")
}

ni <- as.numeric(TDF)
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
  plot(MC, Ni_asc,
       type = "o", col = "blue", pch = 16,
       main = "Grafica No.5:\nOjivas de Year por intervalos",
       xlab = "Marca de clase (Año)",
       ylab = "Frecuencia acumulada",
       ylim = c(0, max(Ni_asc) * 1.1))

  lines(MC, Ni_desc,
        type = "o", col = "red", pch = 17)

  legend("topleft",
         legend = c("Ojiva Ascendente", "Ojiva Descendente"),
         col = c("blue", "red"),
         pch = c(16, 17),
         lty = 1)
} else {
  cat("Advertencia: No hay datos válidos para ojivas de Year.\n")
}

if (length(datos$Year) > 0 && all(is.finite(datos$Year))) {
  boxplot(datos$Year,
          horizontal = TRUE,
          col = "orange",
          main = "Grafica No.6:\nDiagrama de Caja Year",
          xlab = "Año")
} else {
  cat("Advertencia: No hay datos válidos para boxplot de Year.\n")
}

4 Hours to Charge 120V

datos <- datos[!is.na(datos$Hours.to.Charge..120V.), ]
if (is.factor(datos$Hours.to.Charge..120V.)) {
  var_charge120 <- as.numeric(as.character(datos$Hours.to.Charge..120V.))
} else {
  var_charge120 <- as.numeric(datos$Hours.to.Charge..120V.)
}
TDF <- table(var_charge120)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
##   var_charge120  Freq hi hi_porc
## 1             0 37977  1     100
if (length(TDF) > 0 && all(is.finite(TDF))) {
  barplot(TDF,
          main = "Grafica No.7:\nDistribución de Hours to Charge 120V",
          xlab = "Horas de Carga",
          ylab = "Frecuencia",
          col = terrain.colors(length(TDF)),
          las = 1,
          cex.names = 0.8,
          ylim = c(0, max(TDF) * 1.2))
} else {
  cat("Advertencia: No hay datos válidos para graficar Hours to Charge 120V.\n")
}

ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
  plot(MC, Ni_asc,
       type = "o", col = "blue", pch = 16,
       main = "Grafica No.8:\nOjivas de Hours to Charge 120V",
       xlab = "Horas de Carga",
       ylab = "Frecuencia acumulada",
       ylim = c(0, max(Ni_asc) * 1.1))

  lines(MC, Ni_desc,
        type = "o", col = "red", pch = 17)

  legend("topleft",
         legend = c("Ojiva Ascendente", "Ojiva Descendente"),
         col = c("blue", "red"),
         pch = c(16, 17),
         lty = 1)
} else {
  cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge 120V.\n")
}

if (length(var_charge120) > 0 && all(is.finite(var_charge120))) {
  boxplot(var_charge120,
          horizontal = TRUE,
          col = "orange",
          main = "Grafica No.9:\nDiagrama de Caja Hours to Charge 120V",
          xlab = "Horas de Carga")
} else {
  cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge 120V.\n")
}

5 Hours to Charge AC 240V

datos <- datos[!is.na(datos$Hours.to.Charge..AC.240V.), ]
datos <- datos[datos$Hours.to.Charge..AC.240V. != 0, ]
var_chargeAC240 <- as.numeric(datos$Hours.to.Charge..AC.240V.)
TDF <- table(var_chargeAC240)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## [1] Freq    hi      hi_porc
## <0 rows> (or 0-length row.names)
if (length(TDF) > 0 && all(is.finite(TDF))) {
  barplot(TDF,
          main = "Grafica No.10:\nDistribución de Hours to Charge AC 240V",
          xlab = "Horas de Carga",
          ylab = "Frecuencia",
          col = terrain.colors(length(TDF)),
          las = 1,
          cex.names = 0.8,
          ylim = c(0, max(TDF) * 1.2))
} else {
  cat("Advertencia: No hay datos válidos para graficar Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para graficar Hours to Charge AC 240V.
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
  plot(MC, Ni_asc,
       type = "o", col = "blue", pch = 16,
       main = "Grafica No.11:\nOjivas de Hours to Charge AC 240V",
       xlab = "Horas de Carga",
       ylab = "Frecuencia acumulada",
       ylim = c(0, max(Ni_asc) * 1.1))

  lines(MC, Ni_desc,
        type = "o", col = "red", pch = 17)

  legend("topleft",
         legend = c("Ojiva Ascendente", "Ojiva Descendente"),
         col = c("blue", "red"),
         pch = c(16, 17),
         lty = 1)
} else {
  cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para ojivas de Hours to Charge AC 240V.
if (length(var_chargeAC240) > 0 && all(is.finite(var_chargeAC240))) {
  boxplot(var_chargeAC240,
          horizontal = TRUE,
          col = "orange",
          main = "Grafica No.12:\nDiagrama de Caja Hours to Charge AC 240V",
          xlab = "Horas de Carga")
} else {
  cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para boxplot de Hours to Charge AC 240V.

6 Hours to Charge 240V

datos <- datos[!is.na(datos$Hours.to.Charge..240V.), ]
datos <- datos[datos$Hours.to.Charge..240V. != 0, ]
var_charge240 <- as.numeric(datos$Hours.to.Charge..240V.)
TDF <- table(var_charge240)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## [1] Freq    hi      hi_porc
## <0 rows> (or 0-length row.names)
if (length(TDF) > 0 && all(is.finite(TDF))) {
  barplot(TDF,
          main = "Grafica No.13:\nDistribución de Hours to Charge 240V",
          xlab = "Horas de Carga",
          ylab = "Frecuencia",
          col = terrain.colors(length(TDF)),
          las = 1,
          cex.names = 0.8,
          ylim = c(0, max(TDF) * 1.2))
} else {
  cat("Advertencia: No hay datos válidos para graficar Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para graficar Hours to Charge 240V.
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
  plot(MC, Ni_asc,
       type = "o", col = "blue", pch = 16,
       main = "Grafica No.14:\nOjivas de Hours to Charge 240V",
       xlab = "Horas de Carga",
       ylab = "Frecuencia acumulada",
       ylim = c(0, max(Ni_asc) * 1.1))

  lines(MC, Ni_desc,
        type = "o", col = "red", pch = 17)

  legend("topleft",
         legend = c("Ojiva Ascendente", "Ojiva Descendente"),
         col = c("blue", "red"),
         pch = c(16, 17),
         lty = 1)
} else {
  cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para ojivas de Hours to Charge 240V.
if (length(var_charge240) > 0 && all(is.finite(var_charge240))) {
  boxplot(var_charge240,
          horizontal = TRUE,
          col = "orange",
          main = "Grafica No.15:\nDiagrama de Caja Hours to Charge 240V",
          xlab = "Horas de Carga")
} else {
  cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para boxplot de Hours to Charge 240V.