library(e1071) # o library(tidyverse)
setwd("D:/Data")
datos <- read.csv("database.csv", header = TRUE, sep = ";", dec =".")
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Modo
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
c_motor <- as.numeric(as.character(datos$Engine.Index))
## Warning: NAs introducidos por coerción
c_motor <- na.omit(c_motor)
# Rango
R <- max(c_motor) - min(c_motor)
# Número de clases usando regla de Sturges
k <- 1 + (3.3 * log10(length(c_motor)))
k <- floor(k)
# Amplitud
A <- R / k
# Limites inferior y superior
liminf <- seq(from = min(c_motor), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_motor) + 0.01
breaks <- c(liminf, max(limsup) + 0.01)
# Marca de clase
MC <- (liminf + limsup) / 2
# Frecuencia absoluta
ni <- c()
for (i in 1:k) {
if (i == k) {
ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor <= limsup[i]))
} else {
ni[i] <- length(subset(c_motor, c_motor >= liminf[i] & c_motor < limsup[i]))
}
}
# Frecuencia relativa porcentual
hi <- (ni / length(c_motor)) * 100
# Frecuencia acumulada ascendente y descendente
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))
# Tabla de frecuencias
TDFcu_c_motor <- data.frame(
liminf = round(liminf, 2),
limsup = round(limsup, 2),
MC = round(MC, 2),
ni = ni,
hi_perc = round(hi, 2),
Niasc = Niasc,
Nidsc = Nidsc,
Hiasc_perc = round(Hiasc, 2),
Hidsc_perc = round(Hidsc, 2)
)
print(TDFcu_c_motor)
## liminf limsup MC ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1 0.00 4318.88 2159.44 27459 73.51 27459 37354 73.51 100.00
## 2 4318.88 8637.75 6478.31 3169 8.48 30628 9895 81.99 26.49
## 3 8637.75 12956.62 10797.19 328 0.88 30956 6726 82.87 18.01
## 4 12956.62 17275.50 15116.06 22 0.06 30978 6398 82.93 17.13
## 5 17275.50 21594.38 19434.94 472 1.26 31450 6376 84.19 17.07
## 6 21594.38 25913.25 23753.81 80 0.21 31530 5904 84.41 15.81
## 7 25913.25 30232.12 28072.69 882 2.36 32412 5824 86.77 15.59
## 8 30232.12 34551.00 32391.56 96 0.26 32508 4942 87.03 13.23
## 9 34551.00 38869.88 36710.44 666 1.78 33174 4846 88.81 12.97
## 10 38869.88 43188.75 41029.31 254 0.68 33428 4180 89.49 11.19
## 11 43188.75 47507.62 45348.19 283 0.76 33711 3926 90.25 10.51
## 12 47507.62 51826.50 49667.06 837 2.24 34548 3643 92.49 9.75
## 13 51826.50 56145.38 53985.94 702 1.88 35250 2806 94.37 7.51
## 14 56145.38 60464.25 58304.81 1485 3.98 36735 2104 98.34 5.63
## 15 60464.25 64783.12 62623.69 204 0.55 36939 619 98.89 1.66
## 16 64783.12 69102.01 66942.57 415 1.11 37354 415 100.00 1.11
H <-hist(c_motor,
breaks = 16,
main = "GRÁFICA NO.1 DISTRIBUCIÓN DE LA CILINDRADA
DEL MOTOR (HISTOGRAMA)",
xlab = "CILINDRADA",
ylab = "CANTIDAD",
col = "steelblue",
border = "black",
xlim = c(min(c_motor), 70000))
# Datos para ojiva
x_asc <- c(min(TDFcu_c_motor$liminf), TDFcu_c_motor$limsup)
y_asc <- c(0, TDFcu_c_motor$Niasc)
x_desc <- c(TDFcu_c_motor$liminf, max(TDFcu_c_motor$limsup))
y_desc <- c(TDFcu_c_motor$Nidsc, 0)
y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)
O1 <-plot(x_asc, y_asc, type = "o",
main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DE LA
CILINDRADA DEL MOTOR",
xlab = "CILINDRADA DEL MOTOR", ylab = "CANTIDAD",
col = "darkgreen",
xlim = x_plot_range,
ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")
B1 <- boxplot(c_motor, horizontal = TRUE, col = "darkorange",
main = "GRÁFICA NO.3 DISTRIBUCIÓN DE LA CILINDRADA DEL MOTOR",
xlab = "CILINDRADA DEL MOTOR")
HISTOGRAMAc_motor <- hist(c_motor,
main = "GRÁFICA NO.4 DISTRIBUCIÓN DE LA
CILINDRADA DEL MOTOR (HISTOGRAMA)",
xlab = "CILINDRADA DEL MOTOR",
ylab = "CANTIDAD",
col = "darkred",
plot = TRUE)
###Indicadores estadisticos c_motor
mean_c_motor <- mean(c_motor)
median_c_motor <- median(c_motor)
sd_c_motor <- sd(c_motor)
# Función para la moda
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
indicadores_c_motor <- data.frame(
Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
Valor = c(
round(get_mode(c_motor), 2),
round(median_c_motor, 2),
round(mean_c_motor, 2),
round(sd_c_motor, 2),
round(var(c_motor), 2),
round((sd_c_motor / mean_c_motor) * 100, 2),
round(skewness(c_motor), 2),
round(kurtosis(c_motor), 2)
)
)
##Tabla indicadores c_motor
print(indicadores_c_motor)
## Indicador Valor
## 1 Moda 0.00
## 2 Mediana 219.50
## 3 Media(x) 8939.90
## 4 Desviacion Estandar 17899.80
## 5 Varianza 320402924.46
## 6 Coef Variacion (%) 200.22
## 7 Asimetria 2.06
## 8 Curtosis 2.71
mpg_c <- as.numeric(as.character(datos$Unadjusted.City.MPG..FT1.))
## Warning: NAs introducidos por coerción
mpg_c <- na.omit(mpg_c)
R <- max(mpg_c) - min(mpg_c)
k <- floor(1 + (3.3 * log10(length(mpg_c))))
A <- R / k
liminf <- seq(from = min(mpg_c), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(mpg_c) + 0.01
breaks <- c(liminf, max(limsup) + 0.01)
MC <- (liminf + limsup) / 2
ni <- c()
for (i in 1:k) {
if (i == k) {
ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c <= limsup[i]))
} else {
ni[i] <- length(subset(mpg_c, mpg_c >= liminf[i] & mpg_c < limsup[i]))
}
}
hi <- (ni / length(mpg_c)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))
TDFcu_mpg_c <- data.frame(
liminf = round(liminf, 2),
limsup = round(limsup, 2),
MC = round(MC, 2),
ni = ni,
hi_perc = round(hi, 2),
Niasc = Niasc,
Nidsc = Nidsc,
Hiasc_perc = round(Hiasc, 2),
Hidsc_perc = round(Hidsc, 2)
)
print(TDFcu_mpg_c)
## liminf limsup MC ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1 0.00 55.43 27.71 26036 69.12 26036 37669 69.12 100.00
## 2 55.43 110.86 83.14 104 0.28 26140 11633 69.39 30.88
## 3 110.86 166.28 138.57 1713 4.55 27853 11529 73.94 30.61
## 4 166.28 221.71 194.00 4424 11.74 32277 9816 85.69 26.06
## 5 221.71 277.14 249.42 3311 8.79 35588 5392 94.48 14.31
## 6 277.14 332.57 304.85 1212 3.22 36800 2081 97.69 5.52
## 7 332.57 387.99 360.28 592 1.57 37392 869 99.26 2.31
## 8 387.99 443.42 415.71 121 0.32 37513 277 99.59 0.74
## 9 443.42 498.85 471.13 48 0.13 37561 156 99.71 0.41
## 10 498.85 554.28 526.56 28 0.07 37589 108 99.79 0.29
## 11 554.28 609.70 581.99 41 0.11 37630 80 99.90 0.21
## 12 609.70 665.13 637.42 6 0.02 37636 39 99.91 0.10
## 13 665.13 720.56 692.85 22 0.06 37658 33 99.97 0.09
## 14 720.56 775.99 748.27 4 0.01 37662 11 99.98 0.03
## 15 775.99 831.41 803.70 1 0.00 37663 7 99.98 0.02
## 16 831.41 886.85 859.13 6 0.02 37669 6 100.00 0.02
H2 <- hist(mpg_c,
breaks = 16,
main = "GRÁFICA NO.1 DISTRIBUCIÓN DE MPG EN CIUDAD
(HISTOGRAMA)",
xlab = "MPG_C",
ylab = "CANTIDAD",
col = "steelblue",
border = "black",
xlim = c(min(mpg_c), max(mpg_c)))
boxplot(mpg_c, horizontal = TRUE, col = "darkorange",
main = "GRÁFICA NO.2 DISTRIBUCIÓN DE mpg EN CIUDAD",
xlab = "mpg_c")
# Datos Ojivas
x_asc <- c(min(TDFcu_mpg_c$liminf), TDFcu_mpg_c$limsup)
y_asc <- c(0, TDFcu_mpg_c$Niasc)
x_desc <- c(TDFcu_mpg_c$liminf, max(TDFcu_mpg_c$limsup))
y_desc <- c(TDFcu_mpg_c$Nidsc, 0)
y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)
plot(x_asc, y_asc, type = "o",
main = "GRÁFICA NO.3 OJIVA ASCENDENTE Y DESCENDENTE DE MPG
EN CIUDAD",
xlab = "mpg_c", ylab = "CANTIDAD",
col = "darkgreen",
xlim = x_plot_range,
ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")
HISTOGRAMAmpg_c <- hist(mpg_c,
main = "GRÁFICA NO.4 DISTRIBUCIÓN DE MPG
EN CIUDAD (HISTOGRAMA)",
xlab = "mpg_c",
ylab = "CANTIDAD",
col = "darkred",
plot = TRUE)
###Indicadores estadisticos mpg_c
mean_mpg_c <- mean(mpg_c)
median_mpg_c <- median(mpg_c)
sd_mpg_c <- sd(mpg_c)
# FunciC3n para la moda
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
indicadores_mpg_c <- data.frame(
Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
Valor = c(
round(get_mode(mpg_c), 2),
round(median_mpg_c, 2),
round(mean_mpg_c, 2),
round(sd_mpg_c, 2),
round(var(mpg_c), 2),
round((sd_mpg_c / mean_mpg_c) * 100, 2),
round(skewness(mpg_c), 2),
round(kurtosis(mpg_c), 2)
)
)
##Tabla indicadores mpg_c
print(indicadores_mpg_c)
## Indicador Valor
## 1 Moda 0.00
## 2 Mediana 24.00
## 3 Media(x) 83.88
## 4 Desviacion Estandar 103.54
## 5 Varianza 10721.47
## 6 Coef Variacion (%) 123.44
## 7 Asimetria 1.49
## 8 Curtosis 2.19
c_anual <- as.numeric(as.character(datos$Annual.Fuel.Cost..FT1.))
c_anual <- na.omit(c_anual)
R <- max(c_anual) - min(c_anual)
k <- floor(1 + (3.3 * log10(length(c_anual))))
A <- R / k
liminf <- seq(from = min(c_anual), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(c_anual) + 0.01
breaks <- c(liminf, max(limsup) + 0.01)
ni <- c()
for (i in 1:k) {
if (i == k) {
ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual <= limsup[i]))
} else {
ni[i] <- length(subset(c_anual, c_anual >= liminf[i] & c_anual < limsup[i]))
}
}
hi <- (ni / length(c_anual)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))
TDFcu_c_anual <- data.frame(
liminf = round(liminf, 2),
limsup = round(limsup, 2),
MC = round(MC, 2),
ni = ni,
hi_perc = round(hi, 2),
Niasc = Niasc,
Nidsc = Nidsc,
Hiasc_perc = round(Hiasc, 2),
Hidsc_perc = round(Hidsc, 2)
)
print(TDFcu_c_anual)
## liminf limsup MC ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1 0.00 378.12 27.71 1975 5.24 1975 37698 5.24 100.00
## 2 378.12 756.25 83.14 158 0.42 2133 35723 5.66 94.76
## 3 756.25 1134.38 138.57 746 1.98 2879 35565 7.64 94.34
## 4 1134.38 1512.50 194.00 7223 19.16 10102 34819 26.80 92.36
## 5 1512.50 1890.62 249.42 9015 23.91 19117 27596 50.71 73.20
## 6 1890.62 2268.75 304.85 9863 26.16 28980 18581 76.87 49.29
## 7 2268.75 2646.88 360.28 4829 12.81 33809 8718 89.68 23.13
## 8 2646.88 3025.00 415.71 2697 7.15 36506 3889 96.84 10.32
## 9 3025.00 3403.12 471.13 617 1.64 37123 1192 98.47 3.16
## 10 3403.12 3781.25 526.56 304 0.81 37427 575 99.28 1.53
## 11 3781.25 4159.38 581.99 175 0.46 37602 271 99.75 0.72
## 12 4159.38 4537.50 637.42 83 0.22 37685 96 99.97 0.25
## 13 4537.50 4915.62 692.85 6 0.02 37691 13 99.98 0.03
## 14 4915.62 5293.75 748.27 0 0.00 37691 7 99.98 0.02
## 15 5293.75 5671.88 803.70 2 0.01 37693 7 99.99 0.02
## 16 5671.88 6050.01 859.13 5 0.01 37698 5 100.00 0.01
H3 <- hist(c_anual,
breaks = 16,
main = "GRÁFICA NO.1 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
xlab = "COSTO ANUAL DE COMBUSTIBLE",
ylab = "CANTIDAD",
col = "steelblue",
border = "black",
xlim = c(min(c_anual), max(c_anual)))
# Datos para ojiva
x_asc <- c(min(TDFcu_c_anual$liminf), TDFcu_c_anual$limsup)
y_asc <- c(0, TDFcu_c_anual$Niasc)
x_desc <- c(TDFcu_c_anual$liminf, max(TDFcu_c_anual$limsup))
y_desc <- c(TDFcu_c_anual$Nidsc, 0)
y_plot_range <- c(0, max(c(y_asc, y_desc), na.rm = TRUE))
x_plot_range <- range(c(x_asc, x_desc), na.rm = TRUE)
O1 <- plot(x_asc, y_asc, type = "o",
main = "GRÁFICA NO.2 OJIVA ASCENDENTE Y DESCENDENTE DEL COSTO ANUAL DE COMBUSTIBLE",
xlab = "COSTO ANUAL DE COMBUSTIBLE", ylab = "CANTIDAD",
col = "darkgreen",
xlim = x_plot_range,
ylim = y_plot_range)
lines(x_desc, y_desc, col = "darkblue", type = "o")
B1 <- boxplot(c_anual, horizontal = TRUE, col = "darkorange",
main = "GRÁFICA NO.3 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE",
xlab = "COSTO ANUAL DE COMBUSTIBLE")
HISTOGRAMAc_anual <- hist(c_anual,
main = "GRÁFICA NO.4 DISTRIBUCIÓN DEL COSTO ANUAL DE COMBUSTIBLE (HISTOGRAMA)",
xlab = "COSTO ANUAL DE COMBUSTIBLE",
ylab = "CANTIDAD",
col = "darkred",
plot = TRUE)
###Indicadores estadisticos c_anual
# Función para la moda
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Cálculo de indicadores
mean_c_anual <- mean(c_anual)
median_c_anual <- median(c_anual)
sd_c_anual <- sd(c_anual)
indicadores_c_anual <- data.frame(
Indicador = c("Moda", "Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
Valor = c(
round(get_mode(c_anual), 2),
round(median_c_anual, 2),
round(mean_c_anual, 2),
round(sd_c_anual, 2),
round(var(c_anual), 2),
round((sd_c_anual / mean_c_anual) * 100, 2),
round(skewness(c_anual), 2),
round(kurtosis(c_anual), 2)
)
)
##Tabla indicadores c_anual
print(indicadores_c_anual)
## Indicador Valor
## 1 Moda 2350.00
## 2 Mediana 1850.00
## 3 Media(x) 1856.46
## 4 Desviacion Estandar 678.52
## 5 Varianza 460395.76
## 6 Coef Variacion (%) 36.55
## 7 Asimetria -0.42
## 8 Curtosis 1.99