Variables Cualitativas

library(e1071) # o library(tidyverse)

Cargar los datos

setwd("D:/Data")
datos <- read.csv("database.csv", header = TRUE, sep = ";", dec =".")

get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Modo
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

Variable: Cilindrada del motor

c_motor <- as.numeric(as.character(datos$Engine.Index))

## Warning: NAs introducidos por coerción

c_motor <- na.omit(c_motor)

# Rango
R <- max(c_motor) - min(c_motor)

# Número de clases usando regla de Sturges
k <- 1 + (3.3 * log10(length(c_motor)))
k <- floor(k)

# Amplitud
A <- R / k

# Limites inferior y superior

liminf <- seq(from = min(c_motor), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_motor) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

# Marca de clase
MC <- (liminf + limsup) / 2

# Frecuencia absoluta
ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor <= limsup[i]))
  } else {
    ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor < limsup[i]))
  }
}

# Frecuencia relativa porcentual
hi <- (ni / length(c_motor)) * 100

# Frecuencia acumulada ascendente y descendente
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

# Tabla de frecuencias
TDFcu_c_motor <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_c_motor)

##      liminf   limsup       MC    ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1      0.00  4318.88  2159.44 27459   73.51 27459 37354      73.51     100.00
## 2   4318.88  8637.75  6478.31  3169    8.48 30628  9895      81.99      26.49
## 3   8637.75 12956.62 10797.19   328    0.88 30956  6726      82.87      18.01
## 4  12956.62 17275.50 15116.06    22    0.06 30978  6398      82.93      17.13
## 5  17275.50 21594.38 19434.94   472    1.26 31450  6376      84.19      17.07
## 6  21594.38 25913.25 23753.81    80    0.21 31530  5904      84.41      15.81
## 7  25913.25 30232.12 28072.69   882    2.36 32412  5824      86.77      15.59
## 8  30232.12 34551.00 32391.56    96    0.26 32508  4942      87.03      13.23
## 9  34551.00 38869.88 36710.44   666    1.78 33174  4846      88.81      12.97
## 10 38869.88 43188.75 41029.31   254    0.68 33428  4180      89.49      11.19
## 11 43188.75 47507.62 45348.19   283    0.76 33711  3926      90.25      10.51
## 12 47507.62 51826.50 49667.06   837    2.24 34548  3643      92.49       9.75
## 13 51826.50 56145.38 53985.94   702    1.88 35250  2806      94.37       7.51
## 14 56145.38 60464.25 58304.81  1485    3.98 36735  2104      98.34       5.63
## 15 60464.25 64783.12 62623.69   204    0.55 36939   619      98.89       1.66
## 16 64783.12 69102.01 66942.57   415    1.11 37354   415     100.00       1.11

Histograma Cilindrada del motor

H <-hist(c_motor,
     breaks = 16,
     main = "GRÁFICA NO.1 DISTRIBUCIÓN DE LA CILINDRADA 
     DEL MOTOR (HISTOGRAMA)",
     xlab = "CILINDRADA",
     ylab = "CANTIDAD",
     col = "steelblue",
     border = "black",
     xlim = c(min(c_motor), 70000))

# Datos para ojiva

x_asc <- c(min(TDFcu_c_motor$liminf), TDFcu_c_motor$limsup)
y_asc <- c(0, TDFcu_c_motor$Niasc)

x_desc <- c(TDFcu_c_motor$liminf, max(TDFcu_c_motor$limsup))
y_desc <- c(TDFcu_c_motor$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de Cilindrada del motor

O1 <-plot(x_asc, y_asc, type = "o",
     main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DE LA 
     CILINDRADA DEL MOTOR",
     xlab = "CILINDRADA DEL MOTOR", ylab = "CANTIDAD",
     col = "darkgreen",
     xlim = x_plot_range,
     ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Boxplot de Cilindrada del motor

B1 <- boxplot(c_motor, horizontal = TRUE, col = "darkorange",
        main = "GRÁFICA NO.3 DISTRIBUCIÓN DE LA CILINDRADA DEL MOTOR",
        xlab = "CILINDRADA DEL MOTOR")

Histograma secundario de Cilindrada del motor

HISTOGRAMAc_motor <- hist(c_motor, 
                          main = "GRÁFICA NO.4 DISTRIBUCIÓN DE LA 
                          CILINDRADA DEL MOTOR (HISTOGRAMA)",
                          xlab = "CILINDRADA DEL MOTOR",
                          ylab = "CANTIDAD",
                          col = "darkred",
                          plot = TRUE)

###Indicadores estadisticos c_motor

mean_c_motor <- mean(c_motor)
median_c_motor <- median(c_motor)
sd_c_motor <- sd(c_motor)

# Función para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

indicadores_c_motor <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(c_motor), 2),
    round(median_c_motor, 2),
    round(mean_c_motor, 2),
    round(sd_c_motor, 2),
    round(var(c_motor), 2),
    round((sd_c_motor / mean_c_motor) * 100, 2),
    round(skewness(c_motor), 2),
    round(kurtosis(c_motor), 2)
  )
)

##Tabla indicadores c_motor

print(indicadores_c_motor)

##             Indicador        Valor
## 1                Moda         0.00
## 2             Mediana       219.50
## 3            Media(x)      8939.90
## 4 Desviacion Estandar     17899.80
## 5            Varianza 320402924.46
## 6  Coef Variacion (%)       200.22
## 7           Asimetria         2.06
## 8            Curtosis         2.71

Variable: MPG en ciudad

mpg_c <- as.numeric(as.character(datos$Unadjusted.City.MPG..FT1.))

## Warning: NAs introducidos por coerción

mpg_c <- na.omit(mpg_c)

R <- max(mpg_c) - min(mpg_c)
k <- floor(1 + (3.3 * log10(length(mpg_c))))
A <- R / k

liminf <- seq(from = min(mpg_c), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(mpg_c) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

MC <- (liminf + limsup) / 2


ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c <= limsup[i]))
  } else {
    ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c < limsup[i]))
  }
}

hi <- (ni / length(mpg_c)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

TDFcu_mpg_c <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_mpg_c)

##    liminf limsup     MC    ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1    0.00  55.43  27.71 26036   69.12 26036 37669      69.12     100.00
## 2   55.43 110.86  83.14   104    0.28 26140 11633      69.39      30.88
## 3  110.86 166.28 138.57  1713    4.55 27853 11529      73.94      30.61
## 4  166.28 221.71 194.00  4424   11.74 32277  9816      85.69      26.06
## 5  221.71 277.14 249.42  3311    8.79 35588  5392      94.48      14.31
## 6  277.14 332.57 304.85  1212    3.22 36800  2081      97.69       5.52
## 7  332.57 387.99 360.28   592    1.57 37392   869      99.26       2.31
## 8  387.99 443.42 415.71   121    0.32 37513   277      99.59       0.74
## 9  443.42 498.85 471.13    48    0.13 37561   156      99.71       0.41
## 10 498.85 554.28 526.56    28    0.07 37589   108      99.79       0.29
## 11 554.28 609.70 581.99    41    0.11 37630    80      99.90       0.21
## 12 609.70 665.13 637.42     6    0.02 37636    39      99.91       0.10
## 13 665.13 720.56 692.85    22    0.06 37658    33      99.97       0.09
## 14 720.56 775.99 748.27     4    0.01 37662    11      99.98       0.03
## 15 775.99 831.41 803.70     1    0.00 37663     7      99.98       0.02
## 16 831.41 886.85 859.13     6    0.02 37669     6     100.00       0.02

Histograma MPG en ciudad

H2 <- hist(mpg_c,
          breaks = 16,
          main = "GRÁFICA NO.1 DISTRIBUCIÓN DE MPG EN CIUDAD
          (HISTOGRAMA)",
          xlab = "MPG_C",
          ylab = "CANTIDAD",
          col = "steelblue",
          border = "black",
          xlim = c(min(mpg_c), max(mpg_c)))

Boxplot de MPG en ciudad

boxplot(mpg_c, horizontal = TRUE, col = "darkorange",
        main = "GRÁFICA NO.2 DISTRIBUCIÓN DE mpg EN CIUDAD",
        xlab = "mpg_c")

# Datos Ojivas
x_asc <- c(min(TDFcu_mpg_c$liminf), TDFcu_mpg_c$limsup)
y_asc <- c(0, TDFcu_mpg_c$Niasc)

x_desc <- c(TDFcu_mpg_c$liminf, max(TDFcu_mpg_c$limsup))
y_desc <- c(TDFcu_mpg_c$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de MPG en ciudad

plot(x_asc, y_asc, type = "o",
     main = "GRÁFICA NO.3 OJIVA ASCENDENTE Y DESCENDENTE DE MPG
     EN CIUDAD",
     xlab = "mpg_c", ylab = "CANTIDAD",
     col = "darkgreen",
     xlim = x_plot_range,
     ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Histograma secundario de MPG en ciudad

HISTOGRAMAmpg_c <- hist(mpg_c, 
                        main = "GRÁFICA NO.4 DISTRIBUCIÓN DE MPG 
                        EN CIUDAD (HISTOGRAMA)",
                        xlab = "mpg_c",
                        ylab = "CANTIDAD",
                        col = "darkred", 
                        plot = TRUE)

###Indicadores estadisticos mpg_c

mean_mpg_c <- mean(mpg_c)
median_mpg_c <- median(mpg_c)
sd_mpg_c <- sd(mpg_c)

# FunciC3n para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

indicadores_mpg_c <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(mpg_c), 2),
    round(median_mpg_c, 2),
    round(mean_mpg_c, 2),
    round(sd_mpg_c, 2),
    round(var(mpg_c), 2),
    round((sd_mpg_c / mean_mpg_c) * 100, 2),
    round(skewness(mpg_c), 2),
    round(kurtosis(mpg_c), 2)
  )
)

##Tabla indicadores mpg_c

print(indicadores_mpg_c)

##             Indicador    Valor
## 1                Moda     0.00
## 2             Mediana    24.00
## 3            Media(x)    83.88
## 4 Desviacion Estandar   103.54
## 5            Varianza 10721.47
## 6  Coef Variacion (%)   123.44
## 7           Asimetria     1.49
## 8            Curtosis     2.19

Variable: Costo anual del combustible

c_anual <- as.numeric(as.character(datos$Annual.Fuel.Cost..FT1.))
c_anual <- na.omit(c_anual)

R <- max(c_anual) - min(c_anual)
k <- floor(1 + (3.3 * log10(length(c_anual))))
A <- R / k

liminf <- seq(from = min(c_anual), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_anual) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual <= limsup[i]))
  } else {
    ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual < limsup[i]))
  }
}

hi <- (ni / length(c_anual)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

TDFcu_c_anual <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_c_anual)

##     liminf  limsup     MC   ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1     0.00  378.12  27.71 1975    5.24  1975 37698       5.24     100.00
## 2   378.12  756.25  83.14  158    0.42  2133 35723       5.66      94.76
## 3   756.25 1134.38 138.57  746    1.98  2879 35565       7.64      94.34
## 4  1134.38 1512.50 194.00 7223   19.16 10102 34819      26.80      92.36
## 5  1512.50 1890.62 249.42 9015   23.91 19117 27596      50.71      73.20
## 6  1890.62 2268.75 304.85 9863   26.16 28980 18581      76.87      49.29
## 7  2268.75 2646.88 360.28 4829   12.81 33809  8718      89.68      23.13
## 8  2646.88 3025.00 415.71 2697    7.15 36506  3889      96.84      10.32
## 9  3025.00 3403.12 471.13  617    1.64 37123  1192      98.47       3.16
## 10 3403.12 3781.25 526.56  304    0.81 37427   575      99.28       1.53
## 11 3781.25 4159.38 581.99  175    0.46 37602   271      99.75       0.72
## 12 4159.38 4537.50 637.42   83    0.22 37685    96      99.97       0.25
## 13 4537.50 4915.62 692.85    6    0.02 37691    13      99.98       0.03
## 14 4915.62 5293.75 748.27    0    0.00 37691     7      99.98       0.02
## 15 5293.75 5671.88 803.70    2    0.01 37693     7      99.99       0.02
## 16 5671.88 6050.01 859.13    5    0.01 37698     5     100.00       0.01

Histograma costo anual del combstible

H3 <- hist(c_anual,
          breaks = 16,
          main = "GRÁFICA NO.1 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
          xlab = "COSTO ANUAL DE COMBUSTIBLE",
          ylab = "CANTIDAD",
          col = "steelblue",
          border = "black",
          xlim = c(min(c_anual), max(c_anual)))

# Datos para ojiva
x_asc <- c(min(TDFcu_c_anual$liminf), TDFcu_c_anual$limsup)
y_asc <- c(0, TDFcu_c_anual$Niasc)

x_desc <- c(TDFcu_c_anual$liminf, max(TDFcu_c_anual$limsup))
y_desc <- c(TDFcu_c_anual$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de costo anual del combstible

O1 <- plot(x_asc, y_asc, type = "o",
           main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DEL COSTO ANUAL DE COMBUSTIBLE",
           xlab = "COSTO ANUAL DE COMBUSTIBLE", ylab = "CANTIDAD",
           col = "darkgreen",
           xlim = x_plot_range,
           ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Boxplot de costo anual del combstible

B1 <- boxplot(c_anual, horizontal = TRUE, col = "darkorange",
              main = "GRÁFICA NO.3 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE",
              xlab = "COSTO ANUAL DE COMBUSTIBLE")

Histograma secundario de costo anual del combstible

HISTOGRAMAc_anual <- hist(c_anual,
                          main = "GRÁFICA NO.4 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
                          xlab = "COSTO ANUAL DE COMBUSTIBLE",
                          ylab = "CANTIDAD",
                          col = "darkred",
                          plot = TRUE)

###Indicadores estadisticos c_anual

# Función para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

# Cálculo de indicadores
mean_c_anual <- mean(c_anual)
median_c_anual <- median(c_anual)
sd_c_anual <- sd(c_anual)

indicadores_c_anual <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(c_anual), 2),
    round(median_c_anual, 2),
    round(mean_c_anual, 2),
    round(sd_c_anual, 2),
    round(var(c_anual), 2),
    round((sd_c_anual / mean_c_anual) * 100, 2),
    round(skewness(c_anual), 2),
    round(kurtosis(c_anual), 2)
  )
)

##Tabla indicadores c_anual

print(indicadores_c_anual)

##             Indicador     Valor
## 1                Moda   2350.00
## 2             Mediana   1850.00
## 3            Media(x)   1856.46
## 4 Desviacion Estandar    678.52
## 5            Varianza 460395.76
## 6  Coef Variacion (%)     36.55
## 7           Asimetria     -0.42
## 8            Curtosis      1.99

Variables Cualitativas

Grupo 7

2025-06-05

Cargar los datos

Variable: Cilindrada del motor

Mostrar tabla

Histograma Cilindrada del motor

Gráfico de ojivas de Cilindrada del motor

Boxplot de Cilindrada del motor

Histograma secundario de Cilindrada del motor

Variable: MPG en ciudad

Mostrar tabla

Histograma MPG en ciudad

Boxplot de MPG en ciudad

Gráfico de ojivas de MPG en ciudad

Histograma secundario de MPG en ciudad

Variable: Costo anual del combustible

Mostrar tabla

Histograma costo anual del combstible

Gráfico de ojivas de costo anual del combstible

Boxplot de costo anual del combstible

Histograma secundario de costo anual del combstible