library("stats")
library("psych")
library("readxl")
library("MASS")
library("ISLR")
library("fRegression")
library("vcd")
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:ISLR':
## 
##     Hitters
library("openxlsx")
library(readr)
Evidencia1 <- read_csv("Documents/Evidencia1.csv")
## Rows: 12510 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): ent, mun, est, pea, ocupados, informales
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
normalizar <- function(x) {   (x - min(x)) / (max(x) - min(x)) }  

# Aplicar la normalización a las variables pea, ocupados e informales

Evidencia1$pea <- normalizar(Evidencia1$pea) 
Evidencia1$ocupados <- normalizar(Evidencia1$ocupados) 
Evidencia1$informales <- normalizar(Evidencia1$informales) 

head(Evidencia1)
## # A tibble: 6 × 6
##     ent   mun   est     pea  ocupados informales
##   <dbl> <dbl> <dbl>   <dbl>     <dbl>      <dbl>
## 1     0     0     1 0.703   0.965        0.552  
## 2     0     0     2 0.00122 0            0.00231
## 3     0     0     3 0.700   0.964        0.548  
## 4     0     0     4 0.707   0.967        0.557  
## 5     0     0     5 0.00261 0.0000300    0.00450
## 6     1     0     1 0.700   0.965        0.406
modelo_logit1 = glm(pea~ocupados+informales,data=Evidencia1,family=binomial(link="logit")) 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
modelo_probit1 = glm(pea~ocupados+informales,data=Evidencia1,family=binomial(link="probit")) 
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(modelo_logit1) 
## 
## Call:
## glm(formula = pea ~ ocupados + informales, family = binomial(link = "logit"), 
##     data = Evidencia1)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.24025    0.07139 -45.385   <2e-16 ***
## ocupados     4.51189    0.14279  31.597   <2e-16 ***
## informales  -0.40448    0.15826  -2.556   0.0106 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6004.06  on 12509  degrees of freedom
## Residual deviance:  143.96  on 12507  degrees of freedom
## AIC: 6315.5
## 
## Number of Fisher Scoring iterations: 6
summary(modelo_probit1)
## 
## Call:
## glm(formula = pea ~ ocupados + informales, family = binomial(link = "probit"), 
##     data = Evidencia1)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.80403    0.03280 -54.998   <2e-16 ***
## ocupados     2.55722    0.08006  31.939   <2e-16 ***
## informales  -0.21877    0.09559  -2.289   0.0221 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6004.06  on 12509  degrees of freedom
## Residual deviance:  143.78  on 12507  degrees of freedom
## AIC: 6307.8
## 
## Number of Fisher Scoring iterations: 6
plot(modelo_logit1)

plot(modelo_probit1)