Estadísitcas descriptivas de Mtcars

Mpg

data(mtcars)

mpg_e = mtcars %>% 
  summarise(
    Promedio = mean(mtcars$mpg, na.rm = TRUE),
    Min = min(mtcars$mpg, na.rm = TRUE),
    Q1 = quantile(mtcars$mpg, probs = 0.25, na.rm = TRUE),
    Mediana = median(mtcars$mpg, na.rm = TRUE),
    Q3 = quantile(mtcars$mpg, probs = 0.75, na.rm = TRUE),
    Max = max(mtcars$mpg, na.rm = TRUE),
    Desviación_estándar = sd(mtcars$mpg, na.rm = TRUE),
  ) 

mpg_e %>%
  kbl(caption = "Miles/gallon") %>%
  kable_styling(full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#D3D3D3") 
Miles/gallon
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
20.09062 10.4 15.425 19.2 22.8 33.9 6.026948

Cyl

Number of cylinders
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
6.1875 4 4 6 8 8 1.785922

Disp

Displacement
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
230.7219 71.1 120.825 196.3 326 472 123.9387

HP

Gross horsepower
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
146.6875 52 96.5 123 180 335 68.56287

Drat

Rear axle ratio
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
3.596563 2.76 3.08 3.695 3.92 4.93 0.5346787

WT

Weight
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
3.21725 1.513 2.58125 3.325 3.61 5.424 0.9784574

QSec

1/4 mile time
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
17.84875 14.5 16.8925 17.71 18.9 22.9 1.786943

VS

Engine
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
0.4375 0 0 0 1 1 0.5040161

AM

Transmission
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
0.40625 0 0 0 1 1 0.4989909

Gear

Number of foward gears
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
3.6875 3 3 4 4 5 0.7378041

carb

Number of carburetors
Promedio Min Q1 Mediana Q3 Max Desviación_estándar
2.8125 1 2 2 4 8 1.6152

Raincloud plots de mtcars

mpg por grupo de cyl

data <- mtcars %>%
  mutate(cyl = as.factor(cyl)) 

ggplot(data, aes(x = cyl, y = mpg, fill = cyl)) +
  ggdist::stat_halfeye(adjust = 0.5, width = 0.6, .width = 0, justification = -0.3, alpha = 0.6) + # Para el gráfico de densidad
  geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.5) + # Gráfico de caja
  geom_jitter(aes(color = cyl), width = 0.1, size = 1, alpha = 0.8) + # Puntos individuales
  scale_fill_brewer(palette = "Set1") + # Colores
  scale_color_brewer(palette = "Set1") +
  theme_minimal() +
  labs(title = "Raincloud Plot de 'mpg' por número de cilindros",
       x = "Número de cilindros",
       y = "Millas por galón (mpg)",
       fill = "Cilindros",
       color = "Cilindros")

hp por grupo de cyl

mpg por grupo de am

drat por grupo de gears

qsec por grupo de vs

# Correlation Matrix mtcars

datos = mtcars

matriz_correlacion <- cor(datos)

corrplot(matriz_correlacion, 
         method = "color",
         type = "lower",
         number.cex = 0.6,
         addCoef.col = "black")

Scatter plot mtcars

Relación entre disp y hp

modelo <- lm(hp ~ disp, data = mtcars)

summary(modelo)
## 
## Call:
## lm(formula = hp ~ disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.623 -28.378  -6.558  13.588 157.562 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  45.7345    16.1289   2.836  0.00811 ** 
## disp          0.4375     0.0618   7.080 7.14e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.65 on 30 degrees of freedom
## Multiple R-squared:  0.6256, Adjusted R-squared:  0.6131 
## F-statistic: 50.13 on 1 and 30 DF,  p-value: 7.143e-08
plot(mtcars$disp, mtcars$hp, 
     main = "Relación entre disp y hp",
     xlab = "Desplazamiento del motor (disp)",
     ylab = "Caballos de fuerza (hp)",
     col = "blue", 
     pch = 19)
abline(modelo, col = "red", lwd = 2)

Relación entre cyl y hp

## 
## Call:
## lm(formula = hp ~ disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.623 -28.378  -6.558  13.588 157.562 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  45.7345    16.1289   2.836  0.00811 ** 
## disp          0.4375     0.0618   7.080 7.14e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.65 on 30 degrees of freedom
## Multiple R-squared:  0.6256, Adjusted R-squared:  0.6131 
## F-statistic: 50.13 on 1 and 30 DF,  p-value: 7.143e-08

Relación entre carb y hp

## 
## Call:
## lm(formula = hp ~ carb, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -74.48 -26.83  12.00  29.17  79.52 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   57.170     16.566   3.451  0.00168 ** 
## carb          31.828      5.128   6.207 7.83e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46.11 on 30 degrees of freedom
## Multiple R-squared:  0.5622, Adjusted R-squared:  0.5476 
## F-statistic: 38.53 on 1 and 30 DF,  p-value: 7.828e-07

Relación entre mpg y drat

## 
## Call:
## lm(formula = mpg ~ drat, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0775 -2.6803 -0.2095  2.2976  9.0225 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.525      5.477  -1.374     0.18    
## drat           7.678      1.507   5.096 1.78e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.485 on 30 degrees of freedom
## Multiple R-squared:  0.464,  Adjusted R-squared:  0.4461 
## F-statistic: 25.97 on 1 and 30 DF,  p-value: 1.776e-05

Prueba t student (automático vs manual)

mpg

mtcars$am = factor(mtcars$am, levels = c(0, 1), labels = c("Automático", "Manual"))

ggplot(mtcars, aes(x = am, y = mpg, fill = am)) +
  geom_violin(trim = FALSE, color = "black") +
  geom_boxplot(width = 0.1, fill = "white", color = "black") +
  labs(title = "Consumo de Combustible según Tipo de Transmisión",
       x = "Tipo de Transmisión",
       y = "Millas por Galón (mpg)") +
  scale_fill_manual(values = c("skyblue", "salmon")) +
  theme_minimal()

data(mtcars)
automatico = mtcars %>% 
  filter(am == 0)

manual = mtcars %>% 
  filter(am == 1)

t_test = t.test(automatico$mpg, manual$mpg)
print(t_test)
## 
##  Welch Two Sample t-test
## 
## data:  automatico$mpg and manual$mpg
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.280194  -3.209684
## sample estimates:
## mean of x mean of y 
##  17.14737  24.39231

cyl

## 
##  Welch Two Sample t-test
## 
## data:  automatico$cyl and manual$cyl
## t = 3.3541, df = 25.854, p-value = 0.002465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.7238488 3.0170419
## sample estimates:
## mean of x mean of y 
##  6.947368  5.076923

disp

## 
##  Welch Two Sample t-test
## 
## data:  automatico$disp and manual$disp
## t = 4.1977, df = 29.258, p-value = 0.00023
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   75.32779 218.36857
## sample estimates:
## mean of x mean of y 
##  290.3789  143.5308

hp

## 
##  Welch Two Sample t-test
## 
## data:  automatico$hp and manual$hp
## t = 1.2662, df = 18.715, p-value = 0.221
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -21.87858  88.71259
## sample estimates:
## mean of x mean of y 
##  160.2632  126.8462

drat

## 
##  Welch Two Sample t-test
## 
## data:  automatico$drat and manual$drat
## t = -5.6461, df = 27.198, p-value = 5.267e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.0411183 -0.4862501
## sample estimates:
## mean of x mean of y 
##  3.286316  4.050000

wt

## 
##  Welch Two Sample t-test
## 
## data:  automatico$wt and manual$wt
## t = 5.4939, df = 29.234, p-value = 6.272e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.8525632 1.8632262
## sample estimates:
## mean of x mean of y 
##  3.768895  2.411000

qsec

## 
##  Welch Two Sample t-test
## 
## data:  automatico$qsec and manual$qsec
## t = 1.2878, df = 25.534, p-value = 0.2093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4918522  2.1381679
## sample estimates:
## mean of x mean of y 
##  18.18316  17.36000

vs

## 
##  Welch Two Sample t-test
## 
## data:  automatico$vs and manual$vs
## t = -0.92713, df = 25.129, p-value = 0.3627
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5476703  0.2075894
## sample estimates:
## mean of x mean of y 
## 0.3684211 0.5384615

gear

## 
##  Welch Two Sample t-test
## 
## data:  automatico$gear and manual$gear
## t = -6.8995, df = 22.568, p-value = 5.462e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.5264836 -0.8216945
## sample estimates:
## mean of x mean of y 
##  3.210526  4.384615

carb

## 
##  Welch Two Sample t-test
## 
## data:  automatico$carb and manual$carb
## t = -0.28265, df = 16.589, p-value = 0.7809
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.579013  1.206543
## sample estimates:
## mean of x mean of y 
##  2.736842  2.923077

Estadísticas descriptivas (Manual vs Automático)

data(mtcars)

mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c("Automático", "Manual"))

descriptivas2 <- mtcars %>%
  group_by(am) %>%
  summarise(across(
    .cols = everything(),
    .fns = list(
      Media = ~ mean(.),
      DesvStd = ~ sd(.),
      Mínimo = ~ min(.),
      Q1 = ~ quantile(., 0.25),
      Mediana = ~ median(.),
      Q3 = ~ quantile(., 0.75),
      Máximo = ~ max(.)
    ),
    .names = "{.col}_{.fn}"
  ))

tabla_final <- descriptivas2 %>%
  pivot_longer(-am, names_to = c("Variable", "Estadística"), names_sep = "_") %>%
  pivot_wider(names_from = c(am), values_from = value)

tabla_final %>%
  kbl(caption = "Estadísticas de mtcars por grupo (am)") %>%
  kable_styling(full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#D3D3D3")
Estadísticas de mtcars por grupo (am)
Variable Estadística Automático Manual
mpg Media 17.1473684 24.3923077
mpg DesvStd 3.8339664 6.1665038
mpg Mínimo 10.4000000 15.0000000
mpg Q1 14.9500000 21.0000000
mpg Mediana 17.3000000 22.8000000
mpg Q3 19.2000000 30.4000000
mpg Máximo 24.4000000 33.9000000
cyl Media 6.9473684 5.0769231
cyl DesvStd 1.5446569 1.5525001
cyl Mínimo 4.0000000 4.0000000
cyl Q1 6.0000000 4.0000000
cyl Mediana 8.0000000 4.0000000
cyl Q3 8.0000000 6.0000000
cyl Máximo 8.0000000 8.0000000
disp Media 290.3789474 143.5307692
disp DesvStd 110.1716468 87.2039887
disp Mínimo 120.1000000 71.1000000
disp Q1 196.3000000 79.0000000
disp Mediana 275.8000000 120.3000000
disp Q3 360.0000000 160.0000000
disp Máximo 472.0000000 351.0000000
hp Media 160.2631579 126.8461538
hp DesvStd 53.9081957 84.0623243
hp Mínimo 62.0000000 52.0000000
hp Q1 116.5000000 66.0000000
hp Mediana 175.0000000 109.0000000
hp Q3 192.5000000 113.0000000
hp Máximo 245.0000000 335.0000000
drat Media 3.2863158 4.0500000
drat DesvStd 0.3923039 0.3640513
drat Mínimo 2.7600000 3.5400000
drat Q1 3.0700000 3.8500000
drat Mediana 3.1500000 4.0800000
drat Q3 3.6950000 4.2200000
drat Máximo 3.9200000 4.9300000
wt Media 3.7688947 2.4110000
wt DesvStd 0.7774001 0.6169816
wt Mínimo 2.4650000 1.5130000
wt Q1 3.4375000 1.9350000
wt Mediana 3.5200000 2.3200000
wt Q3 3.8425000 2.7800000
wt Máximo 5.4240000 3.5700000
qsec Media 18.1831579 17.3600000
qsec DesvStd 1.7513076 1.7923588
qsec Mínimo 15.4100000 14.5000000
qsec Q1 17.1750000 16.4600000
qsec Mediana 17.8200000 17.0200000
qsec Q3 19.1700000 18.6100000
qsec Máximo 22.9000000 19.9000000
vs Media 0.3684211 0.5384615
vs DesvStd 0.4955946 0.5188745
vs Mínimo 0.0000000 0.0000000
vs Q1 0.0000000 0.0000000
vs Mediana 0.0000000 1.0000000
vs Q3 1.0000000 1.0000000
vs Máximo 1.0000000 1.0000000
gear Media 3.2105263 4.3846154
gear DesvStd 0.4188539 0.5063697
gear Mínimo 3.0000000 4.0000000
gear Q1 3.0000000 4.0000000
gear Mediana 3.0000000 4.0000000
gear Q3 3.0000000 5.0000000
gear Máximo 4.0000000 5.0000000
carb Media 2.7368421 2.9230769
carb DesvStd 1.1470787 2.1779784
carb Mínimo 1.0000000 1.0000000
carb Q1 2.0000000 1.0000000
carb Mediana 3.0000000 2.0000000
carb Q3 4.0000000 4.0000000
carb Máximo 4.0000000 8.0000000

mpg

Estadísticas de ‘mpg’ por grupo (am)
Variable Estadística Automático Manual
mpg Media 17.147368 24.392308
mpg DesvStd 3.833966 6.166504
mpg Mínimo 10.400000 15.000000
mpg Q1 14.950000 21.000000
mpg Mediana 17.300000 22.800000
mpg Q3 19.200000 30.400000
mpg Máximo 24.400000 33.900000
## [1] "p-value=  0.00137363833307103"

cyl

Estadísticas de ‘cyl’ por grupo (am)
Variable Estadística Automático Manual
cyl Media 6.947368 5.076923
cyl DesvStd 1.544657 1.552500
cyl Mínimo 4.000000 4.000000
cyl Q1 6.000000 4.000000
cyl Mediana 8.000000 4.000000
cyl Q3 8.000000 6.000000
cyl Máximo 8.000000 8.000000
## [1] "p-value=  0.00246471260018701"

disp

Estadísticas de ‘disp’ por grupo (am)
Variable Estadística Automático Manual
disp Media 290.3789 143.53077
disp DesvStd 110.1716 87.20399
disp Mínimo 120.1000 71.10000
disp Q1 196.3000 79.00000
disp Mediana 275.8000 120.30000
disp Q3 360.0000 160.00000
disp Máximo 472.0000 351.00000
## [1] "p-value=  0.000230041299162995"

hp

Estadísticas de la variable ‘hp’ por grupo (am)
Variable Estadística Automático Manual
hp Media 160.2632 126.84615
hp DesvStd 53.9082 84.06232
hp Mínimo 62.0000 52.00000
hp Q1 116.5000 66.00000
hp Mediana 175.0000 109.00000
hp Q3 192.5000 113.00000
hp Máximo 245.0000 335.00000
## [1] "p-value=  0.220979581335913"

drat

Estadísticas de la variable ‘drat’ por grupo (am)
Variable Estadística Automático Manual
drat Media 3.2863158 4.0500000
drat DesvStd 0.3923039 0.3640513
drat Mínimo 2.7600000 3.5400000
drat Q1 3.0700000 3.8500000
drat Mediana 3.1500000 4.0800000
drat Q3 3.6950000 4.2200000
drat Máximo 3.9200000 4.9300000
## [1] "p-value=  5.2667424950871e-06"

wt

Estadísticas de la variable ‘wt’ por grupo (am)
Variable Estadística Automático Manual
wt Media 3.7688947 2.4110000
wt DesvStd 0.7774001 0.6169816
wt Mínimo 2.4650000 1.5130000
wt Q1 3.4375000 1.9350000
wt Mediana 3.5200000 2.3200000
wt Q3 3.8425000 2.7800000
wt Máximo 5.4240000 3.5700000
## [1] "p-value=  6.27201991008014e-06"

qsec

Estadísticas de la variable ‘qsec’ por grupo (am)
Variable Estadística Automático Manual
qsec Media 18.183158 17.360000
qsec DesvStd 1.751308 1.792359
qsec Mínimo 15.410000 14.500000
qsec Q1 17.175000 16.460000
qsec Mediana 17.820000 17.020000
qsec Q3 19.170000 18.610000
qsec Máximo 22.900000 19.900000
## [1] "p-value=  0.209349760479263"

vs

Estadísticas de la variable ‘vs’ por grupo (am)
Variable Estadística Automático Manual
vs Media 0.3684211 0.5384615
vs DesvStd 0.4955946 0.5188745
vs Mínimo 0.0000000 0.0000000
vs Q1 0.0000000 0.0000000
vs Mediana 0.0000000 1.0000000
vs Q3 1.0000000 1.0000000
vs Máximo 1.0000000 1.0000000
## [1] "p-value=  0.362676287229613"

gear

Estadísticas de la variable ‘gear’ por grupo (am)
Variable Estadística Automático Manual
gear Media 3.2105263 4.3846154
gear DesvStd 0.4188539 0.5063697
gear Mínimo 3.0000000 4.0000000
gear Q1 3.0000000 4.0000000
gear Mediana 3.0000000 4.0000000
gear Q3 3.0000000 5.0000000
gear Máximo 4.0000000 5.0000000
## [1] "p-value=  5.4615991242273e-07"

carb

Estadísticas de la variable ‘carb’ por grupo (am)
Variable Estadística Automático Manual
carb Media 2.736842 2.923077
carb DesvStd 1.147079 2.177978
carb Mínimo 1.000000 1.000000
carb Q1 2.000000 1.000000
carb Mediana 3.000000 2.000000
carb Q3 4.000000 4.000000
carb Máximo 4.000000 8.000000
## [1] "p-value=  0.780943947215288"