En la presente publicación voy a proceder a calcular los coeficientes
de una regresión lineal a partir de datos aleatorios, siguiendo el
método matricial y de Cuadrados Mínimos. Finalmente se comparará con los
resultados obtenidos por la función lm()
.
\[\left[ {\begin{array}{cc} \beta_0 \\ \beta_1 \\ \end{array} } \right] = (\mathbf{X}^T.X )^{-1}. (\mathbf{X}^T.Y)\]
<- c("y","X1","X2","X3")
columnas <- c(87,90,90,89,97,89,80,89,85,92)
x1 <- c(113,119,117,132,118,124,110,120,137,112)
x2 <- c(128,129,129,143,125,130,117,125,134,117)
x3 <- c(100,101,112,123,106,112,91,101,105,91) y
<- data.frame(y,x1,x2,x3)
df colnames(df) <- columnas
%>%
df kbl() %>%
kable_styling(bootstrap_options = "striped",full_width = F,position = "center")
y | X1 | X2 | X3 |
---|---|---|---|
100 | 87 | 113 | 128 |
101 | 90 | 119 | 129 |
112 | 90 | 117 | 129 |
123 | 89 | 132 | 143 |
106 | 97 | 118 | 125 |
112 | 89 | 124 | 130 |
91 | 80 | 110 | 117 |
101 | 89 | 120 | 125 |
105 | 85 | 137 | 134 |
91 | 92 | 112 | 117 |
<- matrix(c(rep(1,length(y)),x1,x2,x3),byrow=F,ncol=4)
x x
## [,1] [,2] [,3] [,4]
## [1,] 1 87 113 128
## [2,] 1 90 119 129
## [3,] 1 90 117 129
## [4,] 1 89 132 143
## [5,] 1 97 118 125
## [6,] 1 89 124 130
## [7,] 1 80 110 117
## [8,] 1 89 120 125
## [9,] 1 85 137 134
## [10,] 1 92 112 117
= t(x)
x_T x_T
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 1 1 1 1 1 1 1 1 1
## [2,] 87 90 90 89 97 89 80 89 85 92
## [3,] 113 119 117 132 118 124 110 120 137 112
## [4,] 128 129 129 143 125 130 117 125 134 117
= x_T%*%x
x.x_T x.x_T
## [,1] [,2] [,3] [,4]
## [1,] 10 888 1202 1277
## [2,] 888 79030 106730 113417
## [3,] 1202 106730 145156 153986
## [4,] 1277 113417 153986 163599
<- solve(x.x_T)
inv_x.x_T inv_x.x_T
## [,1] [,2] [,3] [,4]
## [1,] 72.37722499 -0.4900696670 -0.0628471322 -0.1660517875
## [2,] -0.49006967 0.0058187424 0.0006854003 -0.0008537179
## [3,] -0.06284713 0.0006854003 0.0046656918 -0.0043761360
## [4,] -0.16605179 -0.0008537179 -0.0043761360 0.0060131048
<- x_T%*%y
x.y x.y
## [,1]
## [1,] 1042
## [2,] 92633
## [3,] 125762
## [4,] 133665
<- inv_x.x_T%*%x.y betas_valores
<- data.frame(Betas=c("B0","B1","B2","B3"),betas_valores)
betas_df betas_df
## Betas betas_valores
## 1 B0 -78.6482460
## 2 B1 0.4400829
## 3 B2 -0.1655137
## 4 B3 1.2816260
\[\hat{Y} = -78.64 + 0.44.X_1-0.16.X_2 +1.28.X_3\]
Por medio de la función lm()
vamos a comprobar si los
resultados obtenidos por medio del método matricial son correctos.
lm(formula = y~.,data = df)
##
## Call:
## lm(formula = y ~ ., data = df)
##
## Coefficients:
## (Intercept) X1 X2 X3
## -78.6482 0.4401 -0.1655 1.2816