library(rio)
dataperu=import("dataPeru.xlsx")

names(dataperu)
## [1] "DEPARTAMENTO"        "UBIGEO"              "buenEstado"         
## [4] "contribuyentesSunat" "peaOcupada"          "pobUrbana"          
## [7] "PobRural"            "pobTotal"
porPEA=dataperu$peaOcupada/dataperu$pobTotal
porSUNAT=dataperu$contribuyentesSunat/dataperu$pobTotal
h1=formula(buenEstado~porPEA+porSUNAT)
r1=lm(h1,data=dataperu)
summary(r1)
## 
## Call:
## lm(formula = h1, data = dataperu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0928  -4.3610   0.2575   4.4003  11.0196 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -22.61      15.96  -1.416    0.171
## porPEA        102.18      64.24   1.590    0.126
## porSUNAT       10.03      31.21   0.321    0.751
## 
## Residual standard error: 6.299 on 22 degrees of freedom
## Multiple R-squared:  0.4669, Adjusted R-squared:  0.4184 
## F-statistic: 9.633 on 2 and 22 DF,  p-value: 0.000989
h2=formula(peaOcupada~buenEstado+contribuyentesSunat)
r2=glm(h2,data=dataperu,offset = log(pobTotal),family = poisson(link = "log"))
summary(r2)
## 
## Call:
## glm(formula = h2, family = poisson(link = "log"), data = dataperu, 
##     offset = log(pobTotal))
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.159e+00  9.827e-04 -1179.0   <2e-16 ***
## buenEstado           8.075e-03  5.023e-05   160.7   <2e-16 ***
## contribuyentesSunat  2.092e-08  1.904e-10   109.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 192616  on 24  degrees of freedom
## Residual deviance:  55608  on 22  degrees of freedom
## AIC: 55975
## 
## Number of Fisher Scoring iterations: 3
#ver overdispersion
AER::dispersiontest(r2,alternative='greater')$ p.value<0.05
## [1] TRUE
h2nb=formula(peaOcupada~buenEstado+contribuyentesSunat + log(pobTotal))
r2nb=MASS::glm.nb(h2nb,data=dataperu)
summary(r2nb)
## 
## Call:
## MASS::glm.nb(formula = h2nb, data = dataperu, init.theta = 140.0673113, 
##     link = log)
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -5.594e-01  3.581e-01  -1.562   0.1183    
## buenEstado           8.986e-03  2.248e-03   3.997 6.41e-05 ***
## contribuyentesSunat  4.708e-08  2.668e-08   1.764   0.0777 .  
## log(pobTotal)        9.545e-01  2.678e-02  35.649  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(140.0673) family taken to be 1)
## 
##     Null deviance: 3708.711  on 24  degrees of freedom
## Residual deviance:   25.031  on 21  degrees of freedom
## AIC: 586.32
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  140.1 
##           Std. Err.:  39.6 
## 
##  2 x log-likelihood:  -576.319