library(rio)
dataperu=import("dataPeru.xlsx")
names(dataperu)
## [1] "DEPARTAMENTO" "UBIGEO" "buenEstado"
## [4] "contribuyentesSunat" "peaOcupada" "pobUrbana"
## [7] "PobRural" "pobTotal"
porPEA=dataperu$peaOcupada/dataperu$pobTotal
porSUNAT=dataperu$contribuyentesSunat/dataperu$pobTotal
h1=formula(buenEstado~porPEA+porSUNAT)
r1=lm(h1,data=dataperu)
summary(r1)
##
## Call:
## lm(formula = h1, data = dataperu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0928 -4.3610 0.2575 4.4003 11.0196
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.61 15.96 -1.416 0.171
## porPEA 102.18 64.24 1.590 0.126
## porSUNAT 10.03 31.21 0.321 0.751
##
## Residual standard error: 6.299 on 22 degrees of freedom
## Multiple R-squared: 0.4669, Adjusted R-squared: 0.4184
## F-statistic: 9.633 on 2 and 22 DF, p-value: 0.000989
h2=formula(peaOcupada~buenEstado+contribuyentesSunat)
r2=glm(h2,data=dataperu,offset = log(pobTotal),family = poisson(link = "log"))
summary(r2)
##
## Call:
## glm(formula = h2, family = poisson(link = "log"), data = dataperu,
## offset = log(pobTotal))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.159e+00 9.827e-04 -1179.0 <2e-16 ***
## buenEstado 8.075e-03 5.023e-05 160.7 <2e-16 ***
## contribuyentesSunat 2.092e-08 1.904e-10 109.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 192616 on 24 degrees of freedom
## Residual deviance: 55608 on 22 degrees of freedom
## AIC: 55975
##
## Number of Fisher Scoring iterations: 3
#ver overdispersion
AER::dispersiontest(r2,alternative='greater')$ p.value<0.05
## [1] TRUE
h2nb=formula(peaOcupada~buenEstado+contribuyentesSunat + log(pobTotal))
r2nb=MASS::glm.nb(h2nb,data=dataperu)
summary(r2nb)
##
## Call:
## MASS::glm.nb(formula = h2nb, data = dataperu, init.theta = 140.0673113,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.594e-01 3.581e-01 -1.562 0.1183
## buenEstado 8.986e-03 2.248e-03 3.997 6.41e-05 ***
## contribuyentesSunat 4.708e-08 2.668e-08 1.764 0.0777 .
## log(pobTotal) 9.545e-01 2.678e-02 35.649 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(140.0673) family taken to be 1)
##
## Null deviance: 3708.711 on 24 degrees of freedom
## Residual deviance: 25.031 on 21 degrees of freedom
## AIC: 586.32
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 140.1
## Std. Err.: 39.6
##
## 2 x log-likelihood: -576.319