PRACTICA_1.knit

library(rio)
enaho <- import("https://github.com/JairAlva/Estadistica-II/raw/master/enaho_2018_m85.sav")

str(enaho$P39B1)

##  num [1:37462] 5 5 6 3 3 4 1 NA 5 7 ...
##  - attr(*, "label")= chr "Si la condición económica de su hogar es medida en una escala del 1 al 10 ¿En que escalón considera se encuentr"| __truncated__
##  - attr(*, "format.spss")= chr "F2.0"
##  - attr(*, "display_width")= int 7

str(enaho$P208_02)

##  num [1:37462] 49 58 47 59 47 30 40 NA 27 61 ...
##  - attr(*, "label")= chr "¿Qué edad tiene en años cumplidos?"
##  - attr(*, "format.spss")= chr "F2.0"
##  - attr(*, "display_width")= int 9

sapply(enaho[c("P39B1", "P208_02", "P301_02", "P207_02")], class)

##     P39B1   P208_02   P301_02   P207_02 
## "numeric" "numeric" "numeric" "numeric"

modelo1=formula( P39B1~ P208_02+ P301_02)

reg1=lm(modelo1,data=enaho)
summary(reg1)

## 
## Call:
## lm(formula = modelo1, data = enaho)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8629 -1.0121 -0.2602  0.7638  7.3950 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0796829  0.0347360  31.083  < 2e-16 ***
## P208_02     0.0023984  0.0004883   4.912 9.08e-07 ***
## P301_02     0.3591330  0.0031478 114.089  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.352 on 33936 degrees of freedom
##   (3523 observations deleted due to missingness)
## Multiple R-squared:  0.2918, Adjusted R-squared:  0.2918 
## F-statistic:  6993 on 2 and 33936 DF,  p-value: < 2.2e-16

library(modelsummary)

## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)

model1=list('Condicion economica'=reg1)
modelsummary(model1, title = "Regresion: modelo 1",
             stars = TRUE,
             output = "kableExtra")

Regresion: modelo 1
	Condicion economica
(Intercept)	1.080***
	(0.035)
P208_02	0.002***
	(0.000)
P301_02	0.359***
	(0.003)
Num.Obs.	33939
R2	0.292
R2 Adj.	0.292
AIC	116784.2
BIC	116817.9
Log.Lik.	-58388.104
RMSE	1.35
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

modelo2=formula( P39B1~ P208_02+ P301_02+P207_02)

reg2=lm(modelo2,data=enaho)
summary(reg2)

## 
## Call:
## lm(formula = modelo2, data = enaho)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9889 -1.0187 -0.2101  0.8085  7.3311 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.7654992  0.0447254  17.116  < 2e-16 ***
## P208_02     0.0030943  0.0004914   6.296 3.08e-10 ***
## P301_02     0.3644871  0.0031788 114.661  < 2e-16 ***
## P207_02     0.1654877  0.0148805  11.121  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.349 on 33935 degrees of freedom
##   (3523 observations deleted due to missingness)
## Multiple R-squared:  0.2944, Adjusted R-squared:  0.2944 
## F-statistic:  4720 on 3 and 33935 DF,  p-value: < 2.2e-16

library(magrittr)
library(knitr)
tanova=anova(reg1,reg2)

kable(tanova,
      caption = "Tabla ANOVA para comparar modelos")%>%kableExtra::kable_styling(full_width = FALSE)

Tabla ANOVA para comparar modelos
Res.Df	RSS	Df	Sum of Sq	F	Pr(>F)
33936	62019.89	NA	NA	NA	NA
33935	61794.68	1	225.217	123.6796	0

plot(reg1,1
    )

###homocedasticidad

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(kableExtra)
# null: modelo homocedastico
resBP=bptest(reg1)
data.frame(list('BP'=resBP$statistic,
             'df'=resBP$parameter,
             "p-value"=resBP$p.value))%>%
    kable(caption = resBP$method)%>%kable_styling(full_width = F)

studentized Breusch-Pagan test
	BP	df	p.value
BP	170.2904	2	0

###Normalidad de residuos

plot(reg1, 2)

if (length(reg1$residuals) >= 3 & length(reg1$residuals) <= 5000) {
  resSW = shapiro.test(reg1$residuals)
  data.frame(SW = resSW$statistic,
             `p-value` = resSW$p.value) %>%
    kable(caption = resSW$method) %>%
    kable_styling(full_width = F)
} else {
  print("Shapiro-Wilk solo se aplica entre 3 y 5000 observaciones.")
}

## [1] "Shapiro-Wilk solo se aplica entre 3 y 5000 observaciones."

length(reg1$residuals)

## [1] 33939

library(DescTools)

## 
## Attaching package: 'DescTools'

## The following objects are masked from 'package:modelsummary':
## 
##     Format, Mean, Median, N, SD, Var

###NO MULTICOLINEALIDAD

VIF(reg1) %>%kable(col.names = "VIF",caption ="Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)" )%>%kable_styling(full_width = F)

Evaluando Multicolinealidad usando VIF (Variance Inflation Factors)
	VIF
P208_02	1.101172
P301_02	1.101172

plot(reg1)

###Valores influyentes

plot(reg1,5)

checkReg2=as.data.frame(influence.measures(reg1)$is.inf)
checkReg2[checkReg2$cook.d & checkReg2$hat,c('cook.d','hat')]%>%kable(caption = "Valores Influyentes criticos")%>%kable_styling(full_width = F)

Valores Influyentes criticos
cook.d	hat
NA	NA
:——	:—

##PREGUNTA 2

library(rio)
data <- import("https://github.com/JairAlva/Estadistica-II/raw/master/iop_seguridad_2012.sav")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:kableExtra':
## 
##     group_rows

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data$pnp <- recode(data$P7,
                                     `1` = 1,  # 1 se mantiene como 1
                                     `2` = 1,  # 2 se recodifica a 1
                                     `3` = 0,  # 3 se recodifica a 0
                                     `4` = 0,  # 4 se recodifica a 0
                                     `9` = NA_real_)  #9 como datos perdidos

sapply(data[c("VP1", "NSEGrup", "EDAD", "P2")], class)

##       VP1   NSEGrup      EDAD        P2 
## "numeric" "numeric" "numeric" "numeric"