En esta sección, nos enfocaremos en las variables discretas del estudio, analizando su comportamiento y características a través de la estadística descriptiva. El objetivo es resumir y organizar los datos para comprender mejor la distribución de cada variable.
Cargar Datos
setwd("/cloud/project")
datos <- read.csv("MPG_consumo.csv", header = TRUE, sep = "," , dec = ".")
str(datos)
## 'data.frame': 38113 obs. of 81 variables:
## $ Vehicle.ID : int 26587 27705 26561 27681 27550 28426 27549 28425 27593 28455 ...
## $ Year : int 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 ...
## $ Make : chr "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" "Alfa Romeo" ...
## $ Model : chr "GT V6 2.5" "GT V6 2.5" "Spider Veloce 2000" "Spider Veloce 2000" ...
## $ Class : chr "Minicompact Cars" "Minicompact Cars" "Two Seaters" "Two Seaters" ...
## $ Drive : chr "" "" "" "" ...
## $ Transmission : chr "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" "Manual 5-Speed" ...
## $ Transmission.Descriptor : chr "" "" "" "" ...
## $ Engine.Index : int 9001 9005 9002 9006 1830 1880 1831 1881 1524 1574 ...
## $ Engine.Descriptor : chr "(FFS)" "(FFS) CA model" "(FFS)" "(FFS) CA model" ...
## $ Engine.Cylinders : int 6 6 4 4 4 4 6 6 6 6 ...
## $ Engine.Displacement : num 2.5 2.5 2 2 2.5 2.5 4.2 4.2 4.2 4.2 ...
## $ Turbocharger : logi NA NA NA NA NA NA ...
## $ Supercharger : chr "" "" "" "" ...
## $ Fuel.Type : chr "Regular" "Regular" "Regular" "Regular" ...
## $ Fuel.Type.1 : chr "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" "Regular Gasoline" ...
## $ Fuel.Type.2 : chr "" "" "" "" ...
## $ City.MPG..FT1. : int 17 17 18 18 18 18 13 13 15 15 ...
## $ Unrounded.City.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT1. : int 24 24 25 25 17 17 13 13 20 19 ...
## $ Unrounded.Highway.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.City.MPG..FT1. : num 21 21 23 23 22 22 16 16 19 19 ...
## $ Unadjusted.Highway.MPG..FT1. : num 34 34 35 35 24 24 18 18 27 26 ...
## $ Unadjusted.City.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Unadjusted.Highway.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT1. : int 20 20 21 21 17 17 13 13 17 17 ...
## $ Unrounded.Combined.MPG..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.MPG..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Unrounded.Combined.MPG..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Electricity.Consumption : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Gasoline.Consumption..CD. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Combined.Utility.Factor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Annual.Fuel.Cost..FT1. : int 1750 1750 1650 1650 2050 2050 2700 2700 2050 2050 ...
## $ Annual.Fuel.Cost..FT2. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Gas.Guzzler.Tax : chr "" "" "" "" ...
## $ Save.or.Spend..5.Year. : int -2000 -2000 -1500 -1500 -3500 -3500 -6750 -6750 -3500 -3500 ...
## $ Annual.Consumption.in.Barrels..FT1.: num 16.5 16.5 15.7 15.7 19.4 ...
## $ Annual.Consumption.in.Barrels..FT2.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Tailpipe.CO2..FT1. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT1. : num 444 444 423 423 523 ...
## $ Tailpipe.CO2..FT2. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ Tailpipe.CO2.in.Grams.Mile..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fuel.Economy.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ GHG.Score..Alt.Fuel. : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ My.MPG.Data : chr "N" "N" "N" "N" ...
## $ X2D.Passenger.Volume : int 74 74 0 0 0 0 0 0 0 0 ...
## $ X2D.Luggage.Volume : int 7 7 0 0 0 0 0 0 0 0 ...
## $ X4D.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ X4D.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Passenger.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hatchback.Luggage.Volume : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Start.Stop.Technology : chr "" "" "" "" ...
## $ Alternative.Fuel.Technology : chr "" "" "" "" ...
## $ Electric.Motor : chr "" "" "" "" ...
## $ Manufacturer.Code : chr "" "" "" "" ...
## $ Gasoline.Electricity.Blended..CD. : chr "False" "False" "False" "False" ...
## $ Vehicle.Charger : chr "" "" "" "" ...
## $ Alternate.Charger : chr "" "" "" "" ...
## $ Hours.to.Charge..120V. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Hours.to.Charge..AC.240V. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.City.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Highway.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Composite.Combined.MPG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT1. : int 0 0 0 0 0 0 0 0 0 0 ...
## $ City.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT1. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Range..FT2. : chr "" "" "" "" ...
## $ City.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Highway.Range..FT2. : num 0 0 0 0 0 0 0 0 0 0 ...
datos <- datos[!is.na(datos$Engine.Cylinders), ]
datos <- datos[datos$Engine.Cylinders != 0, ]
datos$Engine.Cylinders <- factor(datos$Engine.Cylinders,
levels = sort(unique(datos$Engine.Cylinders)))
var_engine <- as.numeric(as.character(datos$Engine.Cylinders))
TDF <- table(var_engine)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## var_engine Freq hi hi_porc
## 1 2 55 0.0014482450 0.14482450
## 2 3 213 0.0056086579 0.56086579
## 3 4 14598 0.3843905522 38.43905522
## 4 5 766 0.0201701030 2.01701030
## 5 6 13268 0.3493693551 34.93693551
## 6 8 8342 0.2196592675 21.96592675
## 7 10 153 0.0040287542 0.40287542
## 8 12 574 0.0151144114 1.51144114
## 9 16 8 0.0002106538 0.02106538
if (length(TDF) > 0 && all(is.finite(TDF))) {
barplot(TDF,
main = "Grafica No.1:\nHistograma de Distribución de Engine Cylinders",
xlab = "Número de cilindros",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 1,
cex.names = 0.8,
ylim = c(0, max(TDF) * 1.2))
} else {
cat("Advertencia: No hay datos válidos para graficar Engine Cylinders.\n")
}
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
plot(MC, Ni_asc,
type = "o", col = "blue", pch = 16,
main = "Grafica No.2:\nOjivas de Engine Cylinders",
xlab = "Número de cilindros",
ylab = "Frecuencia acumulada",
ylim = c(0, max(Ni_asc) * 1.1))
lines(MC, Ni_desc,
type = "o", col = "red", pch = 17)
legend("topleft",
legend = c("Ojiva Ascendente", "Ojiva Descendente"),
col = c("blue", "red"),
pch = c(16,17),
lty = 1)
} else {
cat("Advertencia: No hay datos válidos para ojivas de Engine Cylinders.\n")
}
if (length(var_engine) > 0 && all(is.finite(var_engine))) {
boxplot(var_engine,
horizontal = TRUE,
col = "orange",
main = "Grafica No.3:\nDiagrama de Caja Engine Cylinders",
xlab = "Número de cilindros")
} else {
cat("Advertencia: No hay datos válidos para boxplot de Engine Cylinders.\n")
}
datos <- datos[!is.na(datos$Year), ]
n <- length(datos$Year)
k <- ceiling(1 + 3.322 * log10(n))
intervalos <- cut(datos$Year, breaks = k, right = FALSE, include.lowest = TRUE)
TDF <- table(intervalos)
tabla <- as.data.frame(TDF)
limites <- do.call(rbind, strsplit(gsub("\\[|\\)|\\]", "", levels(intervalos)), ","))
lim_inf <- as.numeric(limites[,1])
lim_sup <- as.numeric(limites[,2])
MC <- (lim_inf + lim_sup) / 2
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(Intervalo = levels(intervalos), MC = MC, Frecuencia = tabla$Freq, hi = hi, hi_porc = hi_porc)
print(tabla)
## Intervalo MC Frecuencia hi hi_porc
## 1 [1984,1986) 1985.0 3663 0.09645312 9.645312
## 2 [1986,1988) 1987.0 2456 0.06467072 6.467072
## 3 [1988,1990) 1989.0 2283 0.06011533 6.011533
## 4 [1990,1992) 1991.0 2210 0.05819312 5.819312
## 5 [1992,1994) 1993.0 2214 0.05829844 5.829844
## 6 [1994,1996) 1995.0 1949 0.05132054 5.132054
## 7 [1996,1998) 1997.0 1535 0.04041920 4.041920
## 8 [1998,2000) 1999.0 1654 0.04355268 4.355268
## 9 [2000,2001) 2000.5 1742 0.04586987 4.586987
## 10 [2001,2003) 2002.0 2016 0.05308476 5.308476
## 11 [2003,2005) 2004.0 2288 0.06024699 6.024699
## 12 [2005,2007) 2006.0 2230 0.05871975 5.871975
## 13 [2007,2009) 2008.0 2364 0.06224820 6.224820
## 14 [2009,2011) 2010.0 2217 0.05837744 5.837744
## 15 [2011,2013) 2012.0 2288 0.06024699 6.024699
## 16 [2013,2015) 2014.0 2440 0.06424941 6.424941
## 17 [2015,2017] 2016.0 2428 0.06393343 6.393343
if (length(TDF) > 0 && all(is.finite(TDF))) {
barplot(TDF,
main = "Grafica No.4:\nDistribución de Year",
xlab = "Año",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 2,
cex.names = 0.8,
ylim = c(0, max(TDF) * 1.2))
} else {
cat("Advertencia: No hay datos válidos para graficar Year.\n")
}
if (length(MC) > 0 && all(is.finite(MC))) {
plot(MC, Ni_asc,
type = "o", col = "blue", pch = 16,
main = "Grafica No.5:\nOjivas de Year por intervalos",
xlab = "Marca de clase (Año)",
ylab = "Frecuencia acumulada",
ylim = c(0, max(Ni_asc) * 1.1))
lines(MC, Ni_desc,
type = "o", col = "red", pch = 17)
legend("topleft",
legend = c("Ojiva Ascendente", "Ojiva Descendente"),
col = c("blue", "red"),
pch = c(16, 17),
lty = 1)
} else {
cat("Advertencia: No hay datos válidos para ojivas de Year.\n")
}
if (length(datos$Year) > 0 && all(is.finite(datos$Year))) {
boxplot(datos$Year,
horizontal = TRUE,
col = "orange",
main = "Grafica No.6:\nDiagrama de Caja Year",
xlab = "Año")
} else {
cat("Advertencia: No hay datos válidos para boxplot de Year.\n")
}
datos <- datos[!is.na(datos$Hours.to.Charge..120V.), ]
if (is.factor(datos$Hours.to.Charge..120V.)) {
var_charge120 <- as.numeric(as.character(datos$Hours.to.Charge..120V.))
} else {
var_charge120 <- as.numeric(datos$Hours.to.Charge..120V.)
}
TDF <- table(var_charge120)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## var_charge120 Freq hi hi_porc
## 1 0 37977 1 100
if (length(TDF) > 0 && all(is.finite(TDF))) {
barplot(TDF,
main = "Grafica No.7:\nDistribución de Hours to Charge 120V",
xlab = "Horas de Carga",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 1,
cex.names = 0.8,
ylim = c(0, max(TDF) * 1.2))
} else {
cat("Advertencia: No hay datos válidos para graficar Hours to Charge 120V.\n")
}
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
plot(MC, Ni_asc,
type = "o", col = "blue", pch = 16,
main = "Grafica No.8:\nOjivas de Hours to Charge 120V",
xlab = "Horas de Carga",
ylab = "Frecuencia acumulada",
ylim = c(0, max(Ni_asc) * 1.1))
lines(MC, Ni_desc,
type = "o", col = "red", pch = 17)
legend("topleft",
legend = c("Ojiva Ascendente", "Ojiva Descendente"),
col = c("blue", "red"),
pch = c(16, 17),
lty = 1)
} else {
cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge 120V.\n")
}
if (length(var_charge120) > 0 && all(is.finite(var_charge120))) {
boxplot(var_charge120,
horizontal = TRUE,
col = "orange",
main = "Grafica No.9:\nDiagrama de Caja Hours to Charge 120V",
xlab = "Horas de Carga")
} else {
cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge 120V.\n")
}
datos <- datos[!is.na(datos$Hours.to.Charge..AC.240V.), ]
datos <- datos[datos$Hours.to.Charge..AC.240V. != 0, ]
var_chargeAC240 <- as.numeric(datos$Hours.to.Charge..AC.240V.)
TDF <- table(var_chargeAC240)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## [1] Freq hi hi_porc
## <0 rows> (or 0-length row.names)
if (length(TDF) > 0 && all(is.finite(TDF))) {
barplot(TDF,
main = "Grafica No.10:\nDistribución de Hours to Charge AC 240V",
xlab = "Horas de Carga",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 1,
cex.names = 0.8,
ylim = c(0, max(TDF) * 1.2))
} else {
cat("Advertencia: No hay datos válidos para graficar Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para graficar Hours to Charge AC 240V.
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
plot(MC, Ni_asc,
type = "o", col = "blue", pch = 16,
main = "Grafica No.11:\nOjivas de Hours to Charge AC 240V",
xlab = "Horas de Carga",
ylab = "Frecuencia acumulada",
ylim = c(0, max(Ni_asc) * 1.1))
lines(MC, Ni_desc,
type = "o", col = "red", pch = 17)
legend("topleft",
legend = c("Ojiva Ascendente", "Ojiva Descendente"),
col = c("blue", "red"),
pch = c(16, 17),
lty = 1)
} else {
cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para ojivas de Hours to Charge AC 240V.
if (length(var_chargeAC240) > 0 && all(is.finite(var_chargeAC240))) {
boxplot(var_chargeAC240,
horizontal = TRUE,
col = "orange",
main = "Grafica No.12:\nDiagrama de Caja Hours to Charge AC 240V",
xlab = "Horas de Carga")
} else {
cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge AC 240V.\n")
}
## Advertencia: No hay datos válidos para boxplot de Hours to Charge AC 240V.
datos <- datos[!is.na(datos$Hours.to.Charge..240V.), ]
datos <- datos[datos$Hours.to.Charge..240V. != 0, ]
var_charge240 <- as.numeric(datos$Hours.to.Charge..240V.)
TDF <- table(var_charge240)
tabla <- as.data.frame(TDF)
hi <- tabla$Freq / sum(tabla$Freq)
hi_porc <- hi * 100
tabla <- data.frame(tabla, hi = hi, hi_porc = hi_porc)
print(tabla)
## [1] Freq hi hi_porc
## <0 rows> (or 0-length row.names)
if (length(TDF) > 0 && all(is.finite(TDF))) {
barplot(TDF,
main = "Grafica No.13:\nDistribución de Hours to Charge 240V",
xlab = "Horas de Carga",
ylab = "Frecuencia",
col = terrain.colors(length(TDF)),
las = 1,
cex.names = 0.8,
ylim = c(0, max(TDF) * 1.2))
} else {
cat("Advertencia: No hay datos válidos para graficar Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para graficar Hours to Charge 240V.
ni <- as.numeric(TDF)
MC <- as.numeric(names(TDF))
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
if (length(MC) > 0 && all(is.finite(MC))) {
plot(MC, Ni_asc,
type = "o", col = "blue", pch = 16,
main = "Grafica No.14:\nOjivas de Hours to Charge 240V",
xlab = "Horas de Carga",
ylab = "Frecuencia acumulada",
ylim = c(0, max(Ni_asc) * 1.1))
lines(MC, Ni_desc,
type = "o", col = "red", pch = 17)
legend("topleft",
legend = c("Ojiva Ascendente", "Ojiva Descendente"),
col = c("blue", "red"),
pch = c(16, 17),
lty = 1)
} else {
cat("Advertencia: No hay datos válidos para ojivas de Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para ojivas de Hours to Charge 240V.
if (length(var_charge240) > 0 && all(is.finite(var_charge240))) {
boxplot(var_charge240,
horizontal = TRUE,
col = "orange",
main = "Grafica No.15:\nDiagrama de Caja Hours to Charge 240V",
xlab = "Horas de Carga")
} else {
cat("Advertencia: No hay datos válidos para boxplot de Hours to Charge 240V.\n")
}
## Advertencia: No hay datos válidos para boxplot de Hours to Charge 240V.