library(car)
library(skimr)
library(ggplot2)
library(ggcorrplot)
library(tidyverse)
library(texreg)
library(prediction)
library(lmtest)
library(sandwich)
library(miceadds)
library(gridExtra)
library(ggpubr)
library(pastecs)
library(olsrr)
library(coefplot)
library(stargazer)
library(psych)
library(modelsummary)
library(olsrr)
library(coefplot)
#creamos base de datos
x0 <- c(1,1,1,1,1,1,1,1,1,1)
x1 <- c(5,25,45,35,42,33,20,55,17,8)
x2 <- c(15,35,9,7,6,6,4,10,3,2)
y <- c(10,50,90,70,85,60,40,110,35,15)
data <- data.frame(y,x0,x1,x2)
data
## y x0 x1 x2
## 1 10 1 5 15
## 2 50 1 25 35
## 3 90 1 45 9
## 4 70 1 35 7
## 5 85 1 42 6
## 6 60 1 33 6
## 7 40 1 20 4
## 8 110 1 55 10
## 9 35 1 17 3
## 10 15 1 8 2
#creamos matriz de parametros desconocidos "K"
k <- data.matrix(data[,c("x0","x1","x2")])
k
## x0 x1 x2
## [1,] 1 5 15
## [2,] 1 25 35
## [3,] 1 45 9
## [4,] 1 35 7
## [5,] 1 42 6
## [6,] 1 33 6
## [7,] 1 20 4
## [8,] 1 55 10
## [9,] 1 17 3
## [10,] 1 8 2
#creamos matriz de Y
Y <- data.matrix(data[,c("y")])
Y
## [,1]
## [1,] 10
## [2,] 50
## [3,] 90
## [4,] 70
## [5,] 85
## [6,] 60
## [7,] 40
## [8,] 110
## [9,] 35
## [10,] 15
#calculamos transpuesta de las X
TRx <- t(k)
TRx
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## x0 1 1 1 1 1 1 1 1 1 1
## x1 5 25 45 35 42 33 20 55 17 8
## x2 15 35 9 7 6 6 4 10 3 2
#calculamos transpuesta de x por x
TRX_x_X = TRx %*% k
TRX_x_X
## x0 x1 x2
## x0 10 285 97
## x1 285 10531 2747
## x2 97 2747 1781
#generar inversa de "x_transpuesta"*"x"
invtrXxX = solve(TRX_x_X)
invtrXxX #matriz 1
## x0 x1 x2
## x0 0.55409237 -0.0119187894 -0.0117945230
## x1 -0.01191879 0.0004152590 0.0000086502
## x2 -0.01179452 0.0000086502 0.0011905147
#multiplicamos matriz transpuesta de x por matriz de y
trXxY = TRx %*% Y
trXxY #matriz 2
## [,1]
## x0 565
## x1 20915
## x2 5465
#Multiplicamos matriz 1 por matriz 2
betas_estimados = invtrXxX %*% trXxY
betas_estimados
## [,1]
## x0 -0.67635865
## x1 1.99830001
## x2 0.02317611
#multiplicamos matriz x por matriz estimado de vetas
Xxbeta = k %*% betas_estimados
Xxbeta
## [,1]
## [1,] 9.662783
## [2,] 50.092306
## [3,] 89.455727
## [4,] 69.426375
## [5,] 83.391299
## [6,] 65.406598
## [7,] 39.382346
## [8,] 109.461903
## [9,] 33.364270
## [10,] 15.356394
#Multiplicamos
Y_gorro <- k %*% betas_estimados
Y_gorro
## [,1]
## [1,] 9.662783
## [2,] 50.092306
## [3,] 89.455727
## [4,] 69.426375
## [5,] 83.391299
## [6,] 65.406598
## [7,] 39.382346
## [8,] 109.461903
## [9,] 33.364270
## [10,] 15.356394
#hallar errores estimados
error <- Y-Y_gorro
error
## [,1]
## [1,] 0.33721693
## [2,] -0.09230554
## [3,] 0.54427306
## [4,] 0.57362541
## [5,] 1.60870143
## [6,] -5.40659845
## [7,] 0.61765394
## [8,] 0.53809682
## [9,] 1.63573009
## [10,] -0.35639368
#sacamos tranpuesta del error
Tr_ee <- t(error)
Tr_ee
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.3372169 -0.09230554 0.5442731 0.5736254 1.608701 -5.406598 0.6176539
## [,8] [,9] [,10]
## [1,] 0.5380968 1.63573 -0.3563937
#multiplicamos "transpuesta del error" * "erro"
Tr_ee_x_ee <- Tr_ee %*% error
Tr_ee_x_ee
## [,1]
## [1,] 36.04042
#hallar constante de la varianza
a <- 1/(10-3)
a
## [1] 0.1428571
#multiplicar constante de la varianza por transpuesta del error por el error
Varianza <- a* Tr_ee_x_ee
Varianza
## [,1]
## [1,] 5.148631
#corroboramos varianza
Varianza_modelo <- sum(error^2)/(10-3)
Varianza_modelo
## [1] 5.148631
#Matriz de varianzas y covarinzas de los betas
V_b <- Varianza_modelo*invtrXxX
V_b
## x0 x1 x2
## x0 2.85281707 -0.06136544648 -0.06072564516
## x1 -0.06136545 0.00213801546 0.00004453669
## x2 -0.06072565 0.00004453669 0.00612952058
#errores de la matriz de betas en diagonal
errores_betas <- sqrt(diag(V_b))
errores_betas
## x0 x1 x2
## 1.68902844 0.04623868 0.07829125
#samaos media de y
media_y <- mean(y)
media_y
## [1] 56.5
#Nombramos matricialmente el intercepto
I <- data.matrix(data[,c("x0")])
I
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
## [7,] 1
## [8,] 1
## [9,] 1
## [10,] 1
#multiplicamos media de y por I
IxY <- I*media_y
IxY
## [,1]
## [1,] 56.5
## [2,] 56.5
## [3,] 56.5
## [4,] 56.5
## [5,] 56.5
## [6,] 56.5
## [7,] 56.5
## [8,] 56.5
## [9,] 56.5
## [10,] 56.5
#A la matriz de Y le restamos la media de y por I
Y_IxY <- Y-IxY
Y_IxY #matriz 3
## [,1]
## [1,] -46.5
## [2,] -6.5
## [3,] 33.5
## [4,] 13.5
## [5,] 28.5
## [6,] 3.5
## [7,] -16.5
## [8,] 53.5
## [9,] -21.5
## [10,] -41.5
#le sacamos transpuesta a la matriz 3
TR_Y_IxY <- t(Y_IxY)
TR_Y_IxY #matriz 4
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] -46.5 -6.5 33.5 13.5 28.5 3.5 -16.5 53.5 -21.5 -41.5
#multiplicamos matriz 4 por matriz 3
Variacion_total <- TR_Y_IxY %*% Y_IxY
Variacion_total
## [,1]
## [1,] 9652.5
#Hallamos explicada
Variacion_explicada <- sum((Y_gorro-IxY)^2)
Variacion_explicada
## [1] 9616.46
#hallamos R cuadrado
R_2 <- Variacion_explicada/Variacion_total
R_2
## [,1]
## [1,] 0.9962662
#Revisamos variable
# Tener presente que la base de datos de este ejemplo se llama "data"
skimr::skim(data)
Name | data |
Number of rows | 10 |
Number of columns | 4 |
_______________________ | |
Column type frequency: | |
numeric | 4 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
y | 0 | 1 | 56.5 | 32.75 | 10 | 36.25 | 55.0 | 81.25 | 110 | ▅▇▅▅▂ |
x0 | 0 | 1 | 1.0 | 0.00 | 1 | 1.00 | 1.0 | 1.00 | 1 | ▁▁▇▁▁ |
x1 | 0 | 1 | 28.5 | 16.36 | 5 | 17.75 | 29.0 | 40.25 | 55 | ▅▇▅▅▂ |
x2 | 0 | 1 | 9.7 | 9.66 | 2 | 4.50 | 6.5 | 9.75 | 35 | ▇▃▁▁▁ |
stat.desc(data)
## y x0 x1 x2
## nbr.val 10.0000000 10 10.0000000 10.0000000
## nbr.null 0.0000000 0 0.0000000 0.0000000
## nbr.na 0.0000000 0 0.0000000 0.0000000
## min 10.0000000 1 5.0000000 2.0000000
## max 110.0000000 1 55.0000000 35.0000000
## range 100.0000000 0 50.0000000 33.0000000
## sum 565.0000000 10 285.0000000 97.0000000
## median 55.0000000 1 29.0000000 6.5000000
## mean 56.5000000 1 28.5000000 9.7000000
## SE.mean 10.3561576 0 5.1731143 3.0552323
## CI.mean.0.95 23.4272561 0 11.7023975 6.9114156
## var 1072.5000000 0 267.6111111 93.3444444
## std.dev 32.7490458 0 16.3588236 9.6614929
## coef.var 0.5796291 0 0.5739938 0.9960302
# Correlacion con decimales completos
cor(data)
## Warning in cor(data): the standard deviation is zero
## y x0 x1 x2
## y 1.0000000 NA 0.99810794 -0.00544310
## x0 NA 1 NA NA
## x1 0.9981079 NA 1.00000000 -0.01230267
## x2 -0.0054431 NA -0.01230267 1.00000000
#Grafico de matriz de correlacion
corPlot(data, cex = 1.2, main = "Matriz de correlación")
## Warning in cor(x, use = use, method = method): the standard deviation is zero
ggplot(data, aes(x1, y)) +
geom_point() +
labs(x = "Variable Explicatoria", y = "Variable Dependiente",
caption = "Fuente: ejemplo clase")
ols <- lm(y ~ x1 + x2 , data = data) # regresion 1
ols2 <- lm(y ~ x1 , data = data) # regresion 2
ols #opcion 1
##
## Call:
## lm(formula = y ~ x1 + x2, data = data)
##
## Coefficients:
## (Intercept) x1 x2
## -0.67636 1.99830 0.02318
summary(ols) #opcion 2
##
## Call:
## lm(formula = y ~ x1 + x2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4066 0.0151 0.5412 0.6066 1.6357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.67636 1.68903 -0.400 0.701
## x1 1.99830 0.04624 43.217 0.000000000927 ***
## x2 0.02318 0.07829 0.296 0.776
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.269 on 7 degrees of freedom
## Multiple R-squared: 0.9963, Adjusted R-squared: 0.9952
## F-statistic: 933.9 on 2 and 7 DF, p-value: 0.000000003181
stargazer(ols, type = "text") #opcion 3
##
## ===============================================
## Dependent variable:
## ---------------------------
## y
## -----------------------------------------------
## x1 1.998***
## (0.046)
##
## x2 0.023
## (0.078)
##
## Constant -0.676
## (1.689)
##
## -----------------------------------------------
## Observations 10
## R2 0.996
## Adjusted R2 0.995
## Residual Std. Error 2.269 (df = 7)
## F Statistic 933.885*** (df = 2; 7)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
ols2 #opcion 1
##
## Call:
## lm(formula = y ~ x1, data = data)
##
## Coefficients:
## (Intercept) x1
## -0.4468 1.9981
summary(ols2) #opcion 2
##
## Call:
## lm(formula = y ~ x1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4916 0.4631 0.5028 0.5448 1.5252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.44675 1.41225 -0.316 0.76
## x1 1.99813 0.04352 45.914 0.0000000000559 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.136 on 8 degrees of freedom
## Multiple R-squared: 0.9962, Adjusted R-squared: 0.9957
## F-statistic: 2108 on 1 and 8 DF, p-value: 0.00000000005594
stargazer(ols2, type = "text") #opcion 3
##
## ===============================================
## Dependent variable:
## ---------------------------
## y
## -----------------------------------------------
## x1 1.998***
## (0.044)
##
## Constant -0.447
## (1.412)
##
## -----------------------------------------------
## Observations 10
## R2 0.996
## Adjusted R2 0.996
## Residual Std. Error 2.136 (df = 8)
## F Statistic 2,108.104*** (df = 1; 8)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
m_list <- list(Regresion_Simple = ols, Regresion_Multiple = ols)
msummary(m_list)
Regresion_Simple | Regresion_Multiple | |
---|---|---|
(Intercept) | −0.676 | −0.676 |
(1.689) | (1.689) | |
x1 | 1.998 | 1.998 |
(0.046) | (0.046) | |
x2 | 0.023 | 0.023 |
(0.078) | (0.078) | |
Num.Obs. | 10 | 10 |
R2 | 0.996 | 0.996 |
R2 Adj. | 0.995 | 0.995 |
AIC | 49.2 | 49.2 |
BIC | 50.4 | 50.4 |
Log.Lik. | −20.600 | −20.600 |
F | 933.885 | 933.885 |
RMSE | 1.90 | 1.90 |
ols_test_breusch_pagan(ols) #Analizar heterocedasticidad y homocedasticidad, si se rechaza hay heterocedasticidad
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## -----------------------------
## Response : y
## Variables: fitted values of y
##
## Test Summary
## ----------------------------
## DF = 1
## Chi2 = 0.3144014
## Prob > Chi2 = 0.5749918
ols_coll_diag(ols) #Analiza si hay colinealidad
## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 x1 0.9998486 1.000151
## 2 x2 0.9998486 1.000151
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept x1 x2
## 1 2.4975532 1.000000 0.02645886 0.03122997 0.05528154
## 2 0.3923799 2.522923 0.02563905 0.15600789 0.81320758
## 3 0.1100669 4.763530 0.94790209 0.81276214 0.13151088
ols_plot_resid_qq(ols) #Grafico prueba residuos qq deben estar proximos a la linea de proyeccion
ols_plot_resid_lev(ols) #Grafico para ver los residuos dentro del intervalo
ols_step_both_aic(ols) #Criterio akaika para comparar modelos
##
##
## Stepwise Summary
## ----------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ----------------------------------------------------------------------------
## x1 addition 47.324 36.492 9616.008 0.99622 0.99575
## ----------------------------------------------------------------------------
ols_test_normality(ols) #Test que tiene diferentes test de distribucion analisa el estadistico y el pvalue
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.6576 0.0003
## Kolmogorov-Smirnov 0.3293 0.1818
## Cramer-von Mises 0.1521 0.3881
## Anderson-Darling 1.4495 0.0004
## -----------------------------------------------
ols_plot_resid_hist(ols) #Grafico resiuos de la regresion con la distribucion
ols_plot_resid_stand(ols) #Muestra diferentes varainetes de los datos atipicos y como se estan distribuyendo, decirnos si la distribucion tienede a estar acumulada en una region de intervalos
ols_plot_resid_fit(ols) #sirve para detectar no linealidad, varianzas de error desiguales y valores atípicos.
ols_plot_cooksd_chart(ols) #Analizar la distribucion de cook, analizar residuos de la regresion
ols_plot_dfbetas(ols) #Panel of plo ts to detect influential observations using DFBETAs.
ols_plot_diagnostics(ols) #Panel of plots for regression diagnostics.
ols_step_both_p(ols) #Build regression model from a set of candidate predictor variables by entering and removing predictors based on p values,
##
## Stepwise Selection Summary
## -----------------------------------------------------------------------------------
## Added/ Adj.
## Step Variable Removed R-Square R-Square C(p) AIC RMSE
## -----------------------------------------------------------------------------------
## 1 x1 addition 0.996 0.996 1.0880 47.3237 2.1358
## -----------------------------------------------------------------------------------
coefplot(ols, title= "ols Regression") #function which extracts model coefficients from objects returned by modeling functions