link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQWAwF5XnCfLYkPoPsWzR8Ut-h1_KSsTYQB1uC6lPxIgRe-EI9x7r5ELluLXgl_5g/pub?output=csv"
midata=read.csv(link,stringsAsFactors = F)
table(midata$BECARIO)
##
## NO SI
## 26478 40771
names(midata)
## [1] "REGION" "BECARIO" "GESTION" "DISTRITO" "POBREZA"
midata[ ,c(1:5)] = lapply(midata[ , c(1:5)], as.factor)
summary(midata)
## REGION BECARIO GESTION DISTRITO POBREZA
## LIMA : 7639 NO:26478 Privada: 5628 Rural :26837 NoP:34339
## CUSCO : 4760 SI:40771 Publica:61621 Urbana:40412 P :32910
## PIURA : 4744
## JUNIN : 4606
## LORETO : 4403
## CAJAMARCA: 3895
## (Other) :37202
set.seed(2019)
vars1=midata[,c("BECARIO","POBREZA")]
#regresion
rlog1=glm(BECARIO~., data=vars1,family = binomial)
#resultado clásico:
summary(rlog1)
##
## Call:
## glm(formula = BECARIO ~ ., family = binomial, data = vars1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3667 -1.3641 0.9992 1.0016 1.0016
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.428769 0.011042 38.831 <2e-16 ***
## POBREZAP 0.005903 0.015789 0.374 0.708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 90166 on 67248 degrees of freedom
## Residual deviance: 90166 on 67247 degrees of freedom
## AIC: 90170
##
## Number of Fisher Scoring iterations: 4
### semilla
set.seed(2019)
### primer modelo:
#data como subset
#BECARIO DEPENDIENTE
#TAMBIEN SE PUEDE PONER EL NUMERO DE LAS COLUMNAS DE LAS VARIABLES INDEPEDIENTES
vars2=midata[,c("BECARIO","POBREZA","GESTION")]
#regresion
rlog2=glm(BECARIO~., data=vars2,family = binomial)
#resultado clásico:
summary(rlog2)
##
## Call:
## glm(formula = BECARIO ~ ., family = binomial, data = vars2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3756 -1.3721 0.9914 0.9945 1.0846
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.230123 0.027282 8.435 < 2e-16 ***
## POBREZAP -0.007801 0.015893 -0.491 0.624
## GESTIONPublica 0.224493 0.028248 7.947 1.91e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 90166 on 67248 degrees of freedom
## Residual deviance: 90103 on 67246 degrees of freedom
## AIC: 90109
##
## Number of Fisher Scoring iterations: 4
vars3=midata[,c("BECARIO","POBREZA","GESTION","DISTRITO")]
#regresion
rlog3=glm(BECARIO~., data=vars3,family = binomial)
#resultado clásico:
summary(rlog3)
##
## Call:
## glm(formula = BECARIO ~ ., family = binomial, data = vars3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3820 -1.3624 0.9858 1.0030 1.0966
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.933e-01 3.058e-02 6.320 2.62e-10 ***
## POBREZAP -8.051e-05 1.616e-02 -0.005 0.99602
## GESTIONPublica 2.318e-01 2.838e-02 8.167 3.16e-16 ***
## DISTRITOUrbana 4.401e-02 1.649e-02 2.669 0.00762 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 90166 on 67248 degrees of freedom
## Residual deviance: 90096 on 67245 degrees of freedom
## AIC: 90104
##
## Number of Fisher Scoring iterations: 4
##TABLAS
t1 = table(midata$REGION,midata$BECARIO)
t1
##
## NO SI
## AMAZONAS 894 1701
## ANCASH 1389 2146
## APURIMAC 1194 1773
## AREQUIPA 510 799
## AYACUCHO 709 1586
## CAJAMARCA 1447 2448
## CALLAO 499 914
## CUSCO 2095 2665
## HUANCAVELICA 899 1614
## HUANUCO 938 1338
## ICA 591 906
## JUNIN 1889 2717
## LA LIBERTAD 1297 2031
## LAMBAYEQUE 825 1537
## LIMA 3121 4518
## LORETO 1993 2410
## MADRE DE DIOS 204 227
## MOQUEGUA 88 224
## PASCO 824 1048
## PIURA 1699 3045
## PUNO 1512 1476
## SAN MARTIN 989 1843
## TACNA 210 472
## TUMBES 220 432
## UCAYALI 442 901
t2 = table(midata$DISTRITO,midata$BECARIO)
t2
##
## NO SI
## Rural 10678 16159
## Urbana 15800 24612
t3 = table(midata$POBREZA,midata$BECARIO)
t3
##
## NO SI
## NoP 13544 20795
## P 12934 19976