setwd("D:/Data")
datos <- read.csv("database.csv", header = TRUE, sep = ";", dec =".")
transmision_v <- as.character(datos$Transmission)
transmision_grupo <- ifelse(grepl("^2-Wheel Drive", transmision_v, ignore.case = TRUE), "2-Wheel Drive",
ifelse(grepl("^4-Wheel Drive|^4-Wheel or All-Wheel Drive", transmision_v, ignore.case = TRUE), "4-Wheel or All-Wheel Drive",
ifelse(grepl("^All-Wheel Drive", transmision_v, ignore.case = TRUE), "All-Wheel Drive",
ifelse(grepl("^Automatic", transmision_v, ignore.case = TRUE), "Automatic",
ifelse(grepl("^Front-Wheel Drive", transmision_v, ignore.case = TRUE), "Front-Wheel Drive",
ifelse(grepl("^Manual", transmision_v, ignore.case = TRUE), "Manual",
ifelse(grepl("^Rear-Wheel Drive", transmision_v, ignore.case = TRUE), "Rear-Wheel Drive", "OTROS")))))))
transmision_grupo <- transmision_grupo[!is.na(transmision_grupo) & transmision_grupo != ""]
valores_moose <- c(
"Manual" = 1,
"Front-Wheel Drive" = 2,
"Automatic" = 3,
"2-Wheel Drive" = 4,
"Rear-Wheel Drive" = 5,
"All-Wheel Drive" = 6,
"4-Wheel or All-Wheel Drive" = 7,
"OTROS" = 8
)
valor_moose_transmision <- valores_moose[transmision_grupo]
datos$Transmision_MOOSE <- valor_moose_transmision
Trans <- as.data.frame(table(datos$Transmision_MOOSE))
colnames(Trans) <- c("Nivel", "Cantidad")
Trans$Nivel <- as.numeric(as.character(Trans$Nivel))
ni <- Trans$Cantidad
hi <- ni / sum(ni) * 100
TDFTrans <- data.frame(Nivel = Trans$Nivel, ni, hi)
TDFTrans
## Nivel ni hi
## 1 1 12376 32.47186000
## 2 2 3 0.00787133
## 3 3 24035 63.06247212
## 4 4 138 0.36208118
## 5 5 485 1.27253168
## 6 6 30 0.07871330
## 7 7 103 0.27024900
## 8 8 943 2.47422139
barplot(TDFTrans$ni,
main = "Gráfica N°1: Frecuencia del Nivel de consumo de combustible",
xlab = "Nivel",
ylab = "Cantidad",
names.arg = TDFTrans$Nivel,
col = "skyblue")
Trans38 <- subset(Trans, Nivel >= 3 & Nivel <= 8)
ni <- Trans38$Cantidad
hi <- ni / sum(ni) * 100
TDF_MOOSE <- data.frame(Trans38, ni, hi)
print(TDF_MOOSE)
## Nivel Cantidad ni hi
## 3 3 24035 24035 93.3978394
## 4 4 138 138 0.5362555
## 5 5 485 485 1.8846662
## 6 6 30 30 0.1165773
## 7 7 103 103 0.4002487
## 8 8 943 943 3.6644128
barplot(TDF_MOOSE$ni,
main = "Gráfica N°2: Distribución de niveles de cosumo de combustible",
xlab = "Nivel ",
ylab = "Cantidad",
names.arg = TDF_MOOSE$Nivel,
las = 2,
col = "skyblue")
tdf_moose1 <- data.frame(TDF_MOOSE)
hi1 <- tdf_moose1$Cantidad / sum(tdf_moose1$Cantidad)
hi1
## [1] 0.933978394 0.005362555 0.018846662 0.001165773 0.004002487 0.036644128
barplot(hi1,
main = "Gráfica N°3: Distribución aleatoria de niveles de cosumo de combustible",
xlab = "Nivel",
ylab = "Probabilidad",
names.arg = tdf_moose1$Nivel,
col = "skyblue")
niveles_geom <- 0:(length(hi1) - 1)
esperanza <- sum(niveles_geom * hi1)
p_geom <- 1 / (esperanza + 1)
P1 <- dgeom(niveles_geom, prob = p_geom)
P1 <- P1 / sum(P1)
P1
## [1] 0.8027548500 0.1583775048 0.0312466926 0.0061647378 0.0012162565
## [6] 0.0002399583
barplot(rbind(hi1, P1),
main = "Gráfica N°4: Distribución aleatoria de niveles de cosumo de combustible",
xlab = "Nivel",
ylab = "Probabilidad",
names.arg = tdf_moose1$Nivel,
beside = TRUE,
col = c("skyblue", "blue"))
legend("topright", legend = c("Real", "Modelo"), fill = c("skyblue", "blue"))
Fo1 <- hi1
Fe1 <- P1
plot(Fo1, Fe1,
main = "Gráfica N°5: Correlación FO vs FE",
xlab = "FO",
ylab = "FE")
abline(lm(Fe1 ~ Fo1), col = "red", lwd = 2)
Correlacion1 <- cor(Fo1, Fe1) * 100
Correlacion1
## [1] 97.91293
x2 <- sum(((Fo1 - Fe1)^2) / Fe1)
x2
## [1] 5.707534
vc <- qchisq(0.95, df = length(Fo1) - 1)
vc
## [1] 11.0705
x2 < vc
## [1] TRUE
Variable <- c("Transmision_MOOSE (niveles 3–8)")
tabla_resumen <- data.frame(
Variable,
round(Correlacion1, 2),
round(x2, 2),
round(vc, 2)
)
colnames(tabla_resumen) <- c("Variable", "Test Pearson (%)", "Chi Cuadrado", "Umbral de aceptación")
kable(tabla_resumen, format = "markdown", caption = "Tabla N°5: Resumen de test de bondad al modelo geométrico")
Variable | Test Pearson (%) | Chi Cuadrado | Umbral de aceptación |
---|---|---|---|
Transmision_MOOSE (niveles 3–8) | 97.91 | 5.71 | 11.07 |
Trans12 <- subset(Trans, Nivel >= 1 & Nivel <= 2)
ni <- Trans12$Cantidad
hi <- ni / sum(ni) * 100
TDF_MOOSE <- data.frame(Trans12, ni, hi)
print(TDF_MOOSE)
## Nivel Cantidad ni hi
## 1 1 12376 12376 99.97576541
## 2 2 3 3 0.02423459
barplot(TDF_MOOSE$ni,
main = "Gráfica N°2: Distribución de niveles de cosumo de combustible",
xlab = "Nivel ",
ylab = "Cantidad",
names.arg = TDF_MOOSE$Nivel,
las = 2,
col = "skyblue")
tdf_moose1 <- data.frame(TDF_MOOSE)
hi1 <- tdf_moose1$Cantidad / sum(tdf_moose1$Cantidad)
hi1
## [1] 0.9997576541 0.0002423459
barplot(hi1,
main = "Gráfica N°3: Distribución aleatoria de niveles de cosumo de combustible",
xlab = "Nivel",
ylab = "Probabilidad",
names.arg = tdf_moose1$Nivel,
col = "skyblue")
niveles_geom <- 0:(length(hi1) - 1)
esperanza <- sum(niveles_geom * hi1)
p_geom <- 1 / (esperanza + 1)
P1 <- dgeom(niveles_geom, prob = p_geom)
P1 <- P1 / sum(P1)
P1
## [1] 0.9997577715 0.0002422285
barplot(rbind(hi1, P1),
main = "Gráfica N°4: Distribución aleatoria de niveles de cosumo de combustible",
xlab = "Nivel",
ylab = "Probabilidad",
names.arg = tdf_moose1$Nivel,
beside = TRUE,
col = c("skyblue", "blue"))
legend("topright", legend = c("Real", "Modelo"), fill = c("skyblue", "blue"))
Fo1 <- hi1
Fe1 <- P1
plot(Fo1, Fe1,
main = "Gráfica N°5: Correlación FO vs FE",
xlab = "FO",
ylab = "FE")
abline(lm(Fe1 ~ Fo1), col = "red", lwd = 2)
Correlacion1 <- cor(Fo1, Fe1) * 100
Correlacion1
## [1] 100
x2 <- sum(((Fo1 - Fe1)^2) / Fe1)
x2
## [1] 5.69196e-11
vc <- qchisq(0.95, df = length(Fo1) - 1)
vc
## [1] 3.841459
x2 < vc
## [1] TRUE
Variable <- c("Transmision_MOOSE (niveles 3–8)")
tabla_resumen <- data.frame(
Variable,
round(Correlacion1, 2),
round(x2, 2),
round(vc, 2)
)
colnames(tabla_resumen) <- c("Variable", "Test Pearson (%)", "Chi Cuadrado", "Umbral de aceptación")
kable(tabla_resumen, format = "markdown", caption = "Tabla N°5: Resumen de test de bondad al modelo geométrico")
Variable | Test Pearson (%) | Chi Cuadrado | Umbral de aceptación |
---|---|---|---|
Transmision_MOOSE (niveles 3–8) | 100 | 0 | 3.84 |
¿Cuál es la probabilidad de que un vehículo tenga un nivel= 6 bajo este modelo geométrico?
dgeom(2, prob = p_geom) # 6 – 4 = 2 en el modelo indexado
## [1] 5.868886e-08
La variable tipo de transmisión se explica con un modelo de probabilidad Geometric, aprobando los test de Pearson y Chi-cuadrado. Al transformar los tipos de transmisión en niveles del 1 al 10, es posible calcular probabilidades asociadas a cada nivel de consumo de combustible.