# Cargar librerías necesarias
library(ggplot2)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.1
# Cargar los datos
data(mtcars)
# Ver las primeras filas del dataset
head(mtcars)
# Análisis descriptivo general
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
# Histograma para cada variable numérica
par(mfrow=c(3,4)) # dividir la ventana en paneles
for(i in 1:ncol(mtcars)){
hist(mtcars[[i]], main=names(mtcars)[i], col="lightblue", xlab="", ylab="Frecuencia")
}
par(mfrow=c(1,1))
# Matriz de correlación visual
ggpairs(mtcars)
# Boxplot de mpg según transmisión
mtcars$am <- factor(mtcars$am, labels = c("Automática", "Manual"))
ggplot(mtcars, aes(x=am, y=mpg, fill=am)) +
geom_boxplot() +
labs(title = "Distribución de mpg según tipo de transmisión") +
theme_minimal()
# Separar mpg según tipo de transmisión
auto <- mtcars$mpg[mtcars$am == "Automática"]
manual <- mtcars$mpg[mtcars$am == "Manual"]
# Prueba de varianzas iguales (F de varianzas)
var.test(auto, manual)
##
## F test to compare two variances
##
## data: auto and manual
## F = 0.38656, num df = 18, denom df = 12, p-value = 0.06691
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1243721 1.0703429
## sample estimates:
## ratio of variances
## 0.3865615
# Según resultado anterior (ver p-value):
# Si p > 0.05 -> varianzas iguales (use var.equal = TRUE)
# Si p < 0.05 -> varianzas distintas (use var.equal = FALSE)
t.test(auto, manual, var.equal = FALSE) # cambiar si varianzas iguales
##
## Welch Two Sample t-test
##
## data: auto and manual
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.280194 -3.209684
## sample estimates:
## mean of x mean of y
## 17.14737 24.39231
auto_wt <- mtcars$wt[mtcars$am == "Automática"]
manual_wt <- mtcars$wt[mtcars$am == "Manual"]
# Prueba de varianzas
var.test(auto_wt, manual_wt)
##
## F test to compare two variances
##
## data: auto_wt and manual_wt
## F = 1.5876, num df = 18, denom df = 12, p-value = 0.4177
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5107978 4.3959133
## sample estimates:
## ratio of variances
## 1.587613
t.test(auto_wt, manual_wt, var.equal = FALSE) # o TRUE si varianzas iguales
##
## Welch Two Sample t-test
##
## data: auto_wt and manual_wt
## t = 5.4939, df = 29.234, p-value = 6.272e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8525632 1.8632262
## sample estimates:
## mean of x mean of y
## 3.768895 2.411000
# Ajuste del modelo
modelo_hp <- lm(mpg ~ hp, data = mtcars)
# Resumen del modelo
summary(modelo_hp)
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## hp -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
# Gráfico con línea de regresión
ggplot(mtcars, aes(x=hp, y=mpg)) +
geom_point() +
geom_smooth(method="lm", se=TRUE, col="blue") +
labs(title="Regresión mpg ~ hp") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Explicación:
# Ajustar modelo
modelo_drat <- lm(mpg ~ drat, data = mtcars)
# Resumen
summary(modelo_drat)
##
## Call:
## lm(formula = mpg ~ drat, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.0775 -2.6803 -0.2095 2.2976 9.0225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.525 5.477 -1.374 0.18
## drat 7.678 1.507 5.096 1.78e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.485 on 30 degrees of freedom
## Multiple R-squared: 0.464, Adjusted R-squared: 0.4461
## F-statistic: 25.97 on 1 and 30 DF, p-value: 1.776e-05
# Gráfico
ggplot(mtcars, aes(x=drat, y=mpg)) +
geom_point() +
geom_smooth(method="lm", se=TRUE, col="green") +
labs(title="Regresión mpg ~ drat") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
mpg ~ hp tiene un R² de X y un coeficiente
negativo significativo, lo que indica que a mayor potencia (hp), menor
rendimiento (mpg). El modelo mpg ~ drat tiene un R² menor,
aunque también significativo. Por tanto, se prefiere el modelo con
hp como explicativa, pues tiene mejor ajuste y una
interpretación lógica y estadísticamente válida.