Ejercicio 1

library(rio)
datita = import("dataPeru.xlsx")
str(datita)
## 'data.frame':    25 obs. of  8 variables:
##  $ DEPARTAMENTO       : chr  "AMAZONAS" "ÁNCASH" "APURÍMAC" "AREQUIPA" ...
##  $ UBIGEO             : chr  "010000" "020000" "030000" "040000" ...
##  $ buenEstado         : num  18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
##  $ contribuyentesSunat: num  75035 302906 103981 585628 151191 ...
##  $ peaOcupada         : num  130019 387976 140341 645001 235857 ...
##  $ pobUrbana          : num  205976 806065 243354 1383694 444473 ...
##  $ PobRural           : num  211389 333050 180905 76739 206467 ...
##  $ pobTotal           : num  417365 1139115 424259 1460433 650940 ...

Regresión Gaussiana

Pasar a porcentaje

h1 = formula(buenEstado ~ contribuyentesSunat/pobTotal + peaOcupada/pobTotal) 

h1 = formula(buenEstado ~ (((contribuyentesSunat/pobTotal)100) + (peaOcupada/pobTotal)100))

reg1=lm(h1,data=datita)
summary(reg1)
## 
## Call:
## lm(formula = h1, data = datita)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.6468  -4.0593  -0.1762   3.4804  20.0829 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1.728e+01  3.941e+00   4.384 0.000287 ***
## contribuyentesSunat           1.205e-04  5.981e-05   2.015 0.057556 .  
## peaOcupada                   -1.023e-04  6.608e-05  -1.548 0.137226    
## contribuyentesSunat:pobTotal -3.766e-11  3.644e-11  -1.033 0.313751    
## pobTotal:peaOcupada           3.817e-11  3.983e-11   0.958 0.349349    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.34 on 20 degrees of freedom
## Multiple R-squared:  0.3419, Adjusted R-squared:  0.2103 
## F-statistic: 2.598 on 4 and 20 DF,  p-value: 0.06734
h2 = formula(peaOcupada ~ contribuyentesSunat + buenEstado)
rp2=glm(h2, data = datita, 
        offset=log(pobTotal), 
        family = poisson(link = "log"))
summary(rp2)
## 
## Call:
## glm(formula = h2, family = poisson(link = "log"), data = datita, 
##     offset = log(pobTotal))
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.159e+00  9.827e-04 -1179.0   <2e-16 ***
## contribuyentesSunat  2.092e-08  1.904e-10   109.9   <2e-16 ***
## buenEstado           8.075e-03  5.023e-05   160.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 192616  on 24  degrees of freedom
## Residual deviance:  55608  on 22  degrees of freedom
## AIC: 55975
## 
## Number of Fisher Scoring iterations: 3
library(magrittr)
library(kableExtra)
overdispersion=AER::dispersiontest(rp2,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp2,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
Test de Equidispersión
Es probable?
overdispersion TRUE
underdispersion FALSE
rq2 = glm(h2, data = datita,
          offset=log(pobTotal),
          family = quasipoisson(link = "log"))
library(MASS)
negativa=formula(peaOcupada ~ contribuyentesSunat + buenEstado + offset(log(pobTotal)))
rbn2=glm.nb(negativa,data=datita)
performance::check_overdispersion(rp2)
## # Overdispersion test
## 
##        dispersion ratio =  2526.621
##   Pearson's Chi-Squared = 55585.662
##                 p-value =   < 0.001
## Overdispersion detected.
performance::check_overdispersion(rq2)
## # Overdispersion test
## 
##        dispersion ratio =  2526.621
##   Pearson's Chi-Squared = 55585.662
##                 p-value =   < 0.001
## Overdispersion detected.
performance::check_overdispersion(rbn2)
## # Overdispersion test
## 
##  dispersion ratio = 0.140
##           p-value = 0.168
## No overdispersion detected.
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
modelsQP_BN=list('Poisson asegurados (II)'=rp2,
                 'QuasiPoisson asegurados (II)'=rq2,
                 'Binomial Negativa asegurados (II)'=rbn2)


modelsummary(modelsQP_BN,
             exponentiate = T, 
             statistic = 'conf.int',
             title = "EXP() de la Regresiones Poisson, Quasi Poisson  y Binomial Negativa",
             stars = TRUE,
             output = "kableExtra")
EXP() de la Regresiones Poisson, Quasi Poisson y Binomial Negativa
 Poisson asegurados (II)  QuasiPoisson asegurados (II)  Binomial Negativa asegurados (II)
(Intercept) 0.314*** 0.314*** 0.312***
[0.313, 0.315] [0.285, 0.346] [0.286, 0.341]
contribuyentesSunat 1.000*** 1.000* 1.000
[1.000, 1.000] [1.000, 1.000] [1.000, 1.000]
buenEstado 1.008*** 1.008** 1.009***
[1.008, 1.008] [1.003, 1.013] [1.004, 1.014]
Num.Obs. 25 25 25
AIC 55974.5 587.1
BIC 55978.2 592.0
Log.Lik. -27984.274 -289.566
F 68713.131 27.195 9.985
RMSE 30706.19 30706.19 31601.43
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Interpretación:

El mejor modelo es la BN y se encuentra que solo los locales en buen estado tienen un efecto significativo