# ==========================================================
# Importación de datos
# ==========================================================

datos <- read.csv("/Users/rafaelpg/Downloads/Base_iris_trabajoR.csv")

# Exploración inicial
head(datos)
##   Sepal_lenght Sepal_width Petal_length Petal_width Variety
## 1          5.1         3.5          1.4         0.2  Setosa
## 2          4.9         3.0          1.4         0.2  Setosa
## 3          4.7         3.2          1.3         0.2  Setosa
## 4          4.6         3.1          1.5         0.2  Setosa
## 5          5.0         3.6          1.4         0.2  Setosa
## 6          5.4         3.9          1.7         0.4  Setosa
str(datos)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal_lenght: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal_width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal_length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal_width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Variety     : chr  "Setosa" "Setosa" "Setosa" "Setosa" ...
summary(datos)
##   Sepal_lenght    Sepal_width     Petal_length    Petal_width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##    Variety         
##  Length:150        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
num_vars <- datos[, sapply(datos, is.numeric)]
cor(num_vars)
##              Sepal_lenght Sepal_width Petal_length Petal_width
## Sepal_lenght    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal_width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal_length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal_width     0.8179411  -0.3661259    0.9628654   1.0000000
pairs(num_vars)

# ==========================================================
# Pregunta Predictiva
# ==========================================================
# ¿Pueden el largo y ancho del petalo predecir el largo del sepalo?

# ==========================================================
# Ajuste del modelo lineal
# ==========================================================

modelo <- lm(Sepal_lenght ~ Petal_length + Petal_width, data = datos)
summary(modelo)
## 
## Call:
## lm(formula = Sepal_lenght ~ Petal_length + Petal_width, data = datos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18534 -0.29838 -0.02763  0.28925  1.02320 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.19058    0.09705  43.181  < 2e-16 ***
## Petal_length  0.54178    0.06928   7.820 9.41e-13 ***
## Petal_width  -0.31955    0.16045  -1.992   0.0483 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4031 on 147 degrees of freedom
## Multiple R-squared:  0.7663, Adjusted R-squared:  0.7631 
## F-statistic:   241 on 2 and 147 DF,  p-value: < 2.2e-16
resumen <- summary(modelo)
R2 <- resumen$r.squared
R2_ajustado <- resumen$adj.r.squared

R2
## [1] 0.7662613
R2_ajustado
## [1] 0.7630812
AIC(modelo)
## [1] 158.0468
coef(resumen)
##                Estimate Std. Error   t value     Pr(>|t|)
## (Intercept)   4.1905824 0.09704587 43.181459 2.092645e-85
## Petal_length  0.5417772 0.06928179  7.819907 9.414477e-13
## Petal_width  -0.3195506 0.16045262 -1.991557 4.827246e-02
# ==========================================================
# Diagnóstico del modelo
# ==========================================================

par(mfrow = c(2,2))
plot(modelo)

par(mfrow = c(1,1))

# ==========================================================
# Conclusiones
# ==========================================================

# Los resultados del modelo muestran que el largo y el ancho del petalo 
# sí predicen significativamente el largo del sepalo 
# Los coeficientes indican
# que ambas variables tienen un efecto positivo: conforme aumentan el largo
# y el ancho del pétalo, aumenta también el largo del sépalo.

# Los valores de R² y R² ajustado reflejan que el modelo explica una
# proporción considerable de la variabilidad del largo del sepalo.