library(rio)
enaho <- import("https://github.com/JairAlva/Estadistica-II/raw/master/enaho_2018_m85.sav")
str(enaho$P39B1)
##  num [1:37462] 5 5 6 3 3 4 1 NA 5 7 ...
##  - attr(*, "label")= chr "Si la condición económica de su hogar es medida en una escala del 1 al 10 ¿En que escalón considera se encuentr"| __truncated__
##  - attr(*, "format.spss")= chr "F2.0"
##  - attr(*, "display_width")= int 7
str(enaho$P208_02)
##  num [1:37462] 49 58 47 59 47 30 40 NA 27 61 ...
##  - attr(*, "label")= chr "¿Qué edad tiene en años cumplidos?"
##  - attr(*, "format.spss")= chr "F2.0"
##  - attr(*, "display_width")= int 9
sapply(enaho[c("P39B1", "P208_02", "P301_02", "P207_02")], class)
##     P39B1   P208_02   P301_02   P207_02 
## "numeric" "numeric" "numeric" "numeric"
modelo1=formula( P39B1~ P208_02+ P301_02)
reg1=lm(modelo1,data=enaho)
summary(reg1)
## 
## Call:
## lm(formula = modelo1, data = enaho)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8629 -1.0121 -0.2602  0.7638  7.3950 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0796829  0.0347360  31.083  < 2e-16 ***
## P208_02     0.0023984  0.0004883   4.912 9.08e-07 ***
## P301_02     0.3591330  0.0031478 114.089  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.352 on 33936 degrees of freedom
##   (3523 observations deleted due to missingness)
## Multiple R-squared:  0.2918, Adjusted R-squared:  0.2918 
## F-statistic:  6993 on 2 and 33936 DF,  p-value: < 2.2e-16
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
model1=list('Condicion economica'=reg1)
modelsummary(model1, title = "Regresion: modelo 1",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 1
Condicion economica
(Intercept) 1.080***
(0.035)
P208_02 0.002***
(0.000)
P301_02 0.359***
(0.003)
Num.Obs. 33939
R2 0.292
R2 Adj. 0.292
AIC 116784.2
BIC 116817.9
Log.Lik. -58388.104
RMSE 1.35
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
modelo2=formula( P39B1~ P208_02+ P301_02+P207_02)
reg2=lm(modelo2,data=enaho)
summary(reg2)
## 
## Call:
## lm(formula = modelo2, data = enaho)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9889 -1.0187 -0.2101  0.8085  7.3311 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.7654992  0.0447254  17.116  < 2e-16 ***
## P208_02     0.0030943  0.0004914   6.296 3.08e-10 ***
## P301_02     0.3644871  0.0031788 114.661  < 2e-16 ***
## P207_02     0.1654877  0.0148805  11.121  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.349 on 33935 degrees of freedom
##   (3523 observations deleted due to missingness)
## Multiple R-squared:  0.2944, Adjusted R-squared:  0.2944 
## F-statistic:  4720 on 3 and 33935 DF,  p-value: < 2.2e-16
library(magrittr)
library(knitr)
tanova=anova(reg1,reg2)

kable(tanova,
      caption = "Tabla ANOVA para comparar modelos")%>%kableExtra::kable_styling(full_width = FALSE)
Tabla ANOVA para comparar modelos
Res.Df RSS Df Sum of Sq F Pr(>F)
33936 62019.89 NA NA NA NA
33935 61794.68 1 225.217 123.6796 0
plot(reg1,1
    )

###homocedasticidad

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(kableExtra)
# null: modelo homocedastico
resBP=bptest(reg1)
data.frame(list('BP'=resBP$statistic,
             'df'=resBP$parameter,
             "p-value"=resBP$p.value))%>%
    kable(caption = resBP$method)%>%kable_styling(full_width = F)
studentized Breusch-Pagan test
BP df p.value
BP 170.2904 2 0

###Normalidad de residuos

plot(reg1, 2)

if (length(reg1$residuals) >= 3 & length(reg1$residuals) <= 5000) {
  resSW = shapiro.test(reg1$residuals)
  data.frame(SW = resSW$statistic,
             `p-value` = resSW$p.value) %>%
    kable(caption = resSW$method) %>%
    kable_styling(full_width = F)
} else {
  print("Shapiro-Wilk solo se aplica entre 3 y 5000 observaciones.")
}
## [1] "Shapiro-Wilk solo se aplica entre 3 y 5000 observaciones."
length(reg1$residuals)
## [1] 33939
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following objects are masked from 'package:modelsummary':
## 
##     Format, Mean, Median, N, SD, Var

###NO MULTICOLINEALIDAD

VIF(reg1) %>%kable(col.names = "VIF",caption ="Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)" )%>%kable_styling(full_width = F)
Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)
VIF
P208_02 1.101172
P301_02 1.101172
plot(reg1)

###Valores influyentes

plot(reg1,5)

checkReg2=as.data.frame(influence.measures(reg1)$is.inf)
checkReg2[checkReg2$cook.d & checkReg2$hat,c('cook.d','hat')]%>%kable(caption = "Valores Influyentes criticos")%>%kable_styling(full_width = F)
Valores Influyentes criticos
cook.d hat
NA NA
:—— :—

##PREGUNTA 2

library(rio)
data <- import("https://github.com/JairAlva/Estadistica-II/raw/master/iop_seguridad_2012.sav")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data$pnp <- recode(data$P7,
                                     `1` = 1,  # 1 se mantiene como 1
                                     `2` = 1,  # 2 se recodifica a 1
                                     `3` = 0,  # 3 se recodifica a 0
                                     `4` = 0,  # 4 se recodifica a 0
                                     `9` = NA_real_)  #9 como datos perdidos
sapply(data[c("VP1", "NSEGrup", "EDAD", "P2")], class)
##       VP1   NSEGrup      EDAD        P2 
## "numeric" "numeric" "numeric" "numeric"