Regresión lineal simple con el caso de los datos women existentes del paquete base
Realizar predicciones elementales dado el la altura de una mujer como variable independiente y el peso como variable dependiente
datos <- women
library(dplyr)
library(lubridate)
library(knitr)
library(ggplot2)
str(datos)
## 'data.frame': 15 obs. of 2 variables:
## $ height: num 58 59 60 61 62 63 64 65 66 67 ...
## $ weight: num 115 117 120 123 126 129 132 135 139 142 ...
summary(datos)
## height weight
## Min. :58.0 Min. :115.0
## 1st Qu.:61.5 1st Qu.:124.5
## Median :65.0 Median :135.0
## Mean :65.0 Mean :136.7
## 3rd Qu.:68.5 3rd Qu.:148.0
## Max. :72.0 Max. :164.0
ggplot(datos, aes(x = height, y = weight)) +
geom_point()
CR <- cor(datos$height, datos$weight) # Pearson
CR
## [1] 0.9954948
\[ y = a + b (x)\]
modelo <- lm(data = datos, formula = weight ~ height)
# modelo
summary(modelo)
##
## Call:
## lm(formula = weight ~ height, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7333 -1.1333 -0.3833 0.7417 3.1167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -87.51667 5.93694 -14.74 1.71e-09 ***
## height 3.45000 0.09114 37.85 1.09e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.525 on 13 degrees of freedom
## Multiple R-squared: 0.991, Adjusted R-squared: 0.9903
## F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14
a = modelo$coefficients[1]
b = modelo$coefficients[2]
a
## (Intercept)
## -87.51667
b
## height
## 3.45
ggplot(data = datos, mapping = aes(x = height, y = weight)) +
geom_point(color = "firebrick", size = 2) +
labs(title = 'weight ~ height', x = 'Altura') +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
x = c(64,70,76, 66)
y = a + b * x
y
## [1] 133.2833 153.9833 174.6833 140.1833
prediccion <- predict(modelo, newdata = data.frame(height = x))
prediccion
## 1 2 3 4
## 133.2833 153.9833 174.6833 140.1833