# ==========================================================
# Importación de datos
# ==========================================================
datos <- read.csv("/Users/rafaelpg/Downloads/Base_iris_trabajoR.csv")
# Exploración inicial
head(datos)
## Sepal_lenght Sepal_width Petal_length Petal_width Variety
## 1 5.1 3.5 1.4 0.2 Setosa
## 2 4.9 3.0 1.4 0.2 Setosa
## 3 4.7 3.2 1.3 0.2 Setosa
## 4 4.6 3.1 1.5 0.2 Setosa
## 5 5.0 3.6 1.4 0.2 Setosa
## 6 5.4 3.9 1.7 0.4 Setosa
str(datos)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal_lenght: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal_width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal_length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal_width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Variety : chr "Setosa" "Setosa" "Setosa" "Setosa" ...
summary(datos)
## Sepal_lenght Sepal_width Petal_length Petal_width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Variety
## Length:150
## Class :character
## Mode :character
##
##
##
num_vars <- datos[, sapply(datos, is.numeric)]
cor(num_vars)
## Sepal_lenght Sepal_width Petal_length Petal_width
## Sepal_lenght 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal_width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal_length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal_width 0.8179411 -0.3661259 0.9628654 1.0000000
pairs(num_vars)

# ==========================================================
# Pregunta Predictiva
# ==========================================================
# ¿Pueden el largo y ancho del petalo predecir el largo del sepalo?
# ==========================================================
# Ajuste del modelo lineal
# ==========================================================
modelo <- lm(Sepal_lenght ~ Petal_length + Petal_width, data = datos)
summary(modelo)
##
## Call:
## lm(formula = Sepal_lenght ~ Petal_length + Petal_width, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.18534 -0.29838 -0.02763 0.28925 1.02320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.19058 0.09705 43.181 < 2e-16 ***
## Petal_length 0.54178 0.06928 7.820 9.41e-13 ***
## Petal_width -0.31955 0.16045 -1.992 0.0483 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4031 on 147 degrees of freedom
## Multiple R-squared: 0.7663, Adjusted R-squared: 0.7631
## F-statistic: 241 on 2 and 147 DF, p-value: < 2.2e-16
resumen <- summary(modelo)
R2 <- resumen$r.squared
R2_ajustado <- resumen$adj.r.squared
R2
## [1] 0.7662613
R2_ajustado
## [1] 0.7630812
AIC(modelo)
## [1] 158.0468
coef(resumen)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1905824 0.09704587 43.181459 2.092645e-85
## Petal_length 0.5417772 0.06928179 7.819907 9.414477e-13
## Petal_width -0.3195506 0.16045262 -1.991557 4.827246e-02
# ==========================================================
# Diagnóstico del modelo
# ==========================================================
par(mfrow = c(2,2))
plot(modelo)

par(mfrow = c(1,1))
# ==========================================================
# Conclusiones
# ==========================================================
# Los resultados del modelo muestran que el largo y el ancho del petalo
# sí predicen significativamente el largo del sepalo
# Los coeficientes indican
# que ambas variables tienen un efecto positivo: conforme aumentan el largo
# y el ancho del pétalo, aumenta también el largo del sépalo.
# Los valores de R² y R² ajustado reflejan que el modelo explica una
# proporción considerable de la variabilidad del largo del sepalo.