library(e1071) # o library(tidyverse)

Cargar los datos

setwd("D:/Data")
datos <- read.csv("database.csv", header = TRUE, sep = ";", dec =".")

get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Modo
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

Variable: Cilindrada del motor

c_motor <- as.numeric(as.character(datos$Engine.Index))
## Warning: NAs introducidos por coerción
c_motor <- na.omit(c_motor)

# Rango
R <- max(c_motor) - min(c_motor)

# Número de clases usando regla de Sturges
k <- 1 + (3.3 * log10(length(c_motor)))
k <- floor(k)

# Amplitud
A <- R / k

# Limites inferior y superior

liminf <- seq(from = min(c_motor), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_motor) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

# Marca de clase
MC <- (liminf + limsup) / 2

# Frecuencia absoluta
ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor <= limsup[i]))
  } else {
    ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor < limsup[i]))
  }
}

# Frecuencia relativa porcentual
hi <- (ni / length(c_motor)) * 100

# Frecuencia acumulada ascendente y descendente
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

# Tabla de frecuencias
TDFcu_c_motor <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_c_motor)
##      liminf   limsup       MC    ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1      0.00  4318.88  2159.44 27459   73.51 27459 37354      73.51     100.00
## 2   4318.88  8637.75  6478.31  3169    8.48 30628  9895      81.99      26.49
## 3   8637.75 12956.62 10797.19   328    0.88 30956  6726      82.87      18.01
## 4  12956.62 17275.50 15116.06    22    0.06 30978  6398      82.93      17.13
## 5  17275.50 21594.38 19434.94   472    1.26 31450  6376      84.19      17.07
## 6  21594.38 25913.25 23753.81    80    0.21 31530  5904      84.41      15.81
## 7  25913.25 30232.12 28072.69   882    2.36 32412  5824      86.77      15.59
## 8  30232.12 34551.00 32391.56    96    0.26 32508  4942      87.03      13.23
## 9  34551.00 38869.88 36710.44   666    1.78 33174  4846      88.81      12.97
## 10 38869.88 43188.75 41029.31   254    0.68 33428  4180      89.49      11.19
## 11 43188.75 47507.62 45348.19   283    0.76 33711  3926      90.25      10.51
## 12 47507.62 51826.50 49667.06   837    2.24 34548  3643      92.49       9.75
## 13 51826.50 56145.38 53985.94   702    1.88 35250  2806      94.37       7.51
## 14 56145.38 60464.25 58304.81  1485    3.98 36735  2104      98.34       5.63
## 15 60464.25 64783.12 62623.69   204    0.55 36939   619      98.89       1.66
## 16 64783.12 69102.01 66942.57   415    1.11 37354   415     100.00       1.11

Histograma Cilindrada del motor

H <-hist(c_motor,
     breaks = 16,
     main = "GRÁFICA NO.1 DISTRIBUCIÓN DE LA CILINDRADA 
     DEL MOTOR (HISTOGRAMA)",
     xlab = "CILINDRADA",
     ylab = "CANTIDAD",
     col = "steelblue",
     border = "black",
     xlim = c(min(c_motor), 70000)) 

# Datos para ojiva

x_asc <- c(min(TDFcu_c_motor$liminf), TDFcu_c_motor$limsup)
y_asc <- c(0, TDFcu_c_motor$Niasc)

x_desc <- c(TDFcu_c_motor$liminf, max(TDFcu_c_motor$limsup))
y_desc <- c(TDFcu_c_motor$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de Cilindrada del motor

O1 <-plot(x_asc, y_asc, type = "o",
     main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DE LA 
     CILINDRADA DEL MOTOR",
     xlab = "CILINDRADA DEL MOTOR", ylab = "CANTIDAD",
     col = "darkgreen",
     xlim = x_plot_range,
     ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Boxplot de Cilindrada del motor

B1 <- boxplot(c_motor, horizontal = TRUE, col = "darkorange",
        main = "GRÁFICA NO.3 DISTRIBUCIÓN DE LA CILINDRADA DEL MOTOR",
        xlab = "CILINDRADA DEL MOTOR")

Histograma secundario de Cilindrada del motor

HISTOGRAMAc_motor <- hist(c_motor, 
                          main = "GRÁFICA NO.4 DISTRIBUCIÓN DE LA 
                          CILINDRADA DEL MOTOR (HISTOGRAMA)",
                          xlab = "CILINDRADA DEL MOTOR",
                          ylab = "CANTIDAD",
                          col = "darkred",
                          plot = TRUE)

###Indicadores estadisticos c_motor

mean_c_motor <- mean(c_motor)
median_c_motor <- median(c_motor)
sd_c_motor <- sd(c_motor)

# Función para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

indicadores_c_motor <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(c_motor), 2),
    round(median_c_motor, 2),
    round(mean_c_motor, 2),
    round(sd_c_motor, 2),
    round(var(c_motor), 2),
    round((sd_c_motor / mean_c_motor) * 100, 2),
    round(skewness(c_motor), 2),
    round(kurtosis(c_motor), 2)
  )
)

##Tabla indicadores c_motor

print(indicadores_c_motor)
##             Indicador        Valor
## 1                Moda         0.00
## 2             Mediana       219.50
## 3            Media(x)      8939.90
## 4 Desviacion Estandar     17899.80
## 5            Varianza 320402924.46
## 6  Coef Variacion (%)       200.22
## 7           Asimetria         2.06
## 8            Curtosis         2.71

Variable: MPG en ciudad

mpg_c <- as.numeric(as.character(datos$Unadjusted.City.MPG..FT1.))
## Warning: NAs introducidos por coerción
mpg_c <- na.omit(mpg_c)

R <- max(mpg_c) - min(mpg_c)
k <- floor(1 + (3.3 * log10(length(mpg_c))))
A <- R / k

liminf <- seq(from = min(mpg_c), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(mpg_c) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

MC <- (liminf + limsup) / 2


ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c <= limsup[i]))
  } else {
    ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c < limsup[i]))
  }
}

hi <- (ni / length(mpg_c)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

TDFcu_mpg_c <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_mpg_c)
##    liminf limsup     MC    ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1    0.00  55.43  27.71 26036   69.12 26036 37669      69.12     100.00
## 2   55.43 110.86  83.14   104    0.28 26140 11633      69.39      30.88
## 3  110.86 166.28 138.57  1713    4.55 27853 11529      73.94      30.61
## 4  166.28 221.71 194.00  4424   11.74 32277  9816      85.69      26.06
## 5  221.71 277.14 249.42  3311    8.79 35588  5392      94.48      14.31
## 6  277.14 332.57 304.85  1212    3.22 36800  2081      97.69       5.52
## 7  332.57 387.99 360.28   592    1.57 37392   869      99.26       2.31
## 8  387.99 443.42 415.71   121    0.32 37513   277      99.59       0.74
## 9  443.42 498.85 471.13    48    0.13 37561   156      99.71       0.41
## 10 498.85 554.28 526.56    28    0.07 37589   108      99.79       0.29
## 11 554.28 609.70 581.99    41    0.11 37630    80      99.90       0.21
## 12 609.70 665.13 637.42     6    0.02 37636    39      99.91       0.10
## 13 665.13 720.56 692.85    22    0.06 37658    33      99.97       0.09
## 14 720.56 775.99 748.27     4    0.01 37662    11      99.98       0.03
## 15 775.99 831.41 803.70     1    0.00 37663     7      99.98       0.02
## 16 831.41 886.85 859.13     6    0.02 37669     6     100.00       0.02

Histograma MPG en ciudad

H2 <- hist(mpg_c,
          breaks = 16,
          main = "GRÁFICA NO.1 DISTRIBUCIÓN DE MPG EN CIUDAD
          (HISTOGRAMA)",
          xlab = "MPG_C",
          ylab = "CANTIDAD",
          col = "steelblue",
          border = "black",
          xlim = c(min(mpg_c), max(mpg_c)))

Boxplot de MPG en ciudad

boxplot(mpg_c, horizontal = TRUE, col = "darkorange",
        main = "GRÁFICA NO.2 DISTRIBUCIÓN DE mpg EN CIUDAD",
        xlab = "mpg_c")

# Datos Ojivas
x_asc <- c(min(TDFcu_mpg_c$liminf), TDFcu_mpg_c$limsup)
y_asc <- c(0, TDFcu_mpg_c$Niasc)

x_desc <- c(TDFcu_mpg_c$liminf, max(TDFcu_mpg_c$limsup))
y_desc <- c(TDFcu_mpg_c$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de MPG en ciudad

plot(x_asc, y_asc, type = "o",
     main = "GRÁFICA NO.3 OJIVA ASCENDENTE Y DESCENDENTE DE MPG
     EN CIUDAD",
     xlab = "mpg_c", ylab = "CANTIDAD",
     col = "darkgreen",
     xlim = x_plot_range,
     ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Histograma secundario de MPG en ciudad

HISTOGRAMAmpg_c <- hist(mpg_c, 
                        main = "GRÁFICA NO.4 DISTRIBUCIÓN DE MPG 
                        EN CIUDAD (HISTOGRAMA)",
                        xlab = "mpg_c",
                        ylab = "CANTIDAD",
                        col = "darkred", 
                        plot = TRUE)

###Indicadores estadisticos mpg_c

mean_mpg_c <- mean(mpg_c)
median_mpg_c <- median(mpg_c)
sd_mpg_c <- sd(mpg_c)

# FunciC3n para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

indicadores_mpg_c <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(mpg_c), 2),
    round(median_mpg_c, 2),
    round(mean_mpg_c, 2),
    round(sd_mpg_c, 2),
    round(var(mpg_c), 2),
    round((sd_mpg_c / mean_mpg_c) * 100, 2),
    round(skewness(mpg_c), 2),
    round(kurtosis(mpg_c), 2)
  )
)

##Tabla indicadores mpg_c

print(indicadores_mpg_c)
##             Indicador    Valor
## 1                Moda     0.00
## 2             Mediana    24.00
## 3            Media(x)    83.88
## 4 Desviacion Estandar   103.54
## 5            Varianza 10721.47
## 6  Coef Variacion (%)   123.44
## 7           Asimetria     1.49
## 8            Curtosis     2.19

Variable: Costo anual del combustible

c_anual <- as.numeric(as.character(datos$Annual.Fuel.Cost..FT1.))
c_anual <- na.omit(c_anual)

R <- max(c_anual) - min(c_anual)
k <- floor(1 + (3.3 * log10(length(c_anual))))
A <- R / k

liminf <- seq(from = min(c_anual), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_anual) + 0.01  
breaks <- c(liminf, max(limsup) + 0.01)

ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual <= limsup[i]))
  } else {
    ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual < limsup[i]))
  }
}

hi <- (ni / length(c_anual)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

TDFcu_c_anual <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

Mostrar tabla

print(TDFcu_c_anual)
##     liminf  limsup     MC   ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1     0.00  378.12  27.71 1975    5.24  1975 37698       5.24     100.00
## 2   378.12  756.25  83.14  158    0.42  2133 35723       5.66      94.76
## 3   756.25 1134.38 138.57  746    1.98  2879 35565       7.64      94.34
## 4  1134.38 1512.50 194.00 7223   19.16 10102 34819      26.80      92.36
## 5  1512.50 1890.62 249.42 9015   23.91 19117 27596      50.71      73.20
## 6  1890.62 2268.75 304.85 9863   26.16 28980 18581      76.87      49.29
## 7  2268.75 2646.88 360.28 4829   12.81 33809  8718      89.68      23.13
## 8  2646.88 3025.00 415.71 2697    7.15 36506  3889      96.84      10.32
## 9  3025.00 3403.12 471.13  617    1.64 37123  1192      98.47       3.16
## 10 3403.12 3781.25 526.56  304    0.81 37427   575      99.28       1.53
## 11 3781.25 4159.38 581.99  175    0.46 37602   271      99.75       0.72
## 12 4159.38 4537.50 637.42   83    0.22 37685    96      99.97       0.25
## 13 4537.50 4915.62 692.85    6    0.02 37691    13      99.98       0.03
## 14 4915.62 5293.75 748.27    0    0.00 37691     7      99.98       0.02
## 15 5293.75 5671.88 803.70    2    0.01 37693     7      99.99       0.02
## 16 5671.88 6050.01 859.13    5    0.01 37698     5     100.00       0.01

Histograma costo anual del combstible

H3 <- hist(c_anual,
          breaks = 16,
          main = "GRÁFICA NO.1 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
          xlab = "COSTO ANUAL DE COMBUSTIBLE",
          ylab = "CANTIDAD",
          col = "steelblue",
          border = "black",
          xlim = c(min(c_anual), max(c_anual)))

# Datos para ojiva
x_asc <- c(min(TDFcu_c_anual$liminf), TDFcu_c_anual$limsup)
y_asc <- c(0, TDFcu_c_anual$Niasc)

x_desc <- c(TDFcu_c_anual$liminf, max(TDFcu_c_anual$limsup))
y_desc <- c(TDFcu_c_anual$Nidsc, 0)

y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)

Gráfico de ojivas de costo anual del combstible

O1 <- plot(x_asc, y_asc, type = "o",
           main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DEL COSTO ANUAL DE COMBUSTIBLE",
           xlab = "COSTO ANUAL DE COMBUSTIBLE", ylab = "CANTIDAD",
           col = "darkgreen",
           xlim = x_plot_range,
           ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")

Boxplot de costo anual del combstible

B1 <- boxplot(c_anual, horizontal = TRUE, col = "darkorange",
              main = "GRÁFICA NO.3 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE",
              xlab = "COSTO ANUAL DE COMBUSTIBLE")

Histograma secundario de costo anual del combstible

HISTOGRAMAc_anual <- hist(c_anual,
                          main = "GRÁFICA NO.4 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
                          xlab = "COSTO ANUAL DE COMBUSTIBLE",
                          ylab = "CANTIDAD",
                          col = "darkred",
                          plot = TRUE)

###Indicadores estadisticos c_anual

# Función para la moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

# Cálculo de indicadores
mean_c_anual <- mean(c_anual)
median_c_anual <- median(c_anual)
sd_c_anual <- sd(c_anual)

indicadores_c_anual <- data.frame(
  Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(c_anual), 2),
    round(median_c_anual, 2),
    round(mean_c_anual, 2),
    round(sd_c_anual, 2),
    round(var(c_anual), 2),
    round((sd_c_anual / mean_c_anual) * 100, 2),
    round(skewness(c_anual), 2),
    round(kurtosis(c_anual), 2)
  )
)

##Tabla indicadores c_anual

print(indicadores_c_anual)
##             Indicador     Valor
## 1                Moda   2350.00
## 2             Mediana   1850.00
## 3            Media(x)   1856.46
## 4 Desviacion Estandar    678.52
## 5            Varianza 460395.76
## 6  Coef Variacion (%)     36.55
## 7           Asimetria     -0.42
## 8            Curtosis      1.99