library(rio)
MYDATA = import("dataPeru.xlsx")
str(MYDATA)
## 'data.frame':    25 obs. of  8 variables:
##  $ DEPARTAMENTO       : chr  "AMAZONAS" "ÁNCASH" "APURÍMAC" "AREQUIPA" ...
##  $ UBIGEO             : chr  "010000" "020000" "030000" "040000" ...
##  $ buenEstado         : num  18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
##  $ contribuyentesSunat: num  75035 302906 103981 585628 151191 ...
##  $ peaOcupada         : num  130019 387976 140341 645001 235857 ...
##  $ pobUrbana          : num  205976 806065 243354 1383694 444473 ...
##  $ PobRural           : num  211389 333050 180905 76739 206467 ...
##  $ pobTotal           : num  417365 1139115 424259 1460433 650940 ...
MYDATA$UBIGEO = trimws(MYDATA$UBIGEO, whitespace = "[\\h\\v]")
MYDATA$UBIGEO=as.numeric(MYDATA$UBIGEO)
str(MYDATA)
## 'data.frame':    25 obs. of  8 variables:
##  $ DEPARTAMENTO       : chr  "AMAZONAS" "ÁNCASH" "APURÍMAC" "AREQUIPA" ...
##  $ UBIGEO             : num  1e+04 2e+04 3e+04 4e+04 5e+04 6e+04 7e+04 8e+04 9e+04 1e+05 ...
##  $ buenEstado         : num  18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
##  $ contribuyentesSunat: num  75035 302906 103981 585628 151191 ...
##  $ peaOcupada         : num  130019 387976 140341 645001 235857 ...
##  $ pobUrbana          : num  205976 806065 243354 1383694 444473 ...
##  $ PobRural           : num  211389 333050 180905 76739 206467 ...
##  $ pobTotal           : num  417365 1139115 424259 1460433 650940 ...
modelo1=formula(buenEstado~contribuyentesSunat+peaOcupada)
reg1=lm(modelo1,data=MYDATA)
summary(reg1)
## 
## Call:
## lm(formula = modelo1, data = MYDATA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.589  -3.966  -1.347   1.907  21.518 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.865e+01  2.694e+00   6.922 5.98e-07 ***
## contribuyentesSunat  1.786e-05  2.060e-05   0.867    0.395    
## peaOcupada          -1.596e-05  2.241e-05  -0.712    0.484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.925 on 22 degrees of freedom
## Multiple R-squared:  0.1561, Adjusted R-squared:  0.07939 
## F-statistic: 2.035 on 2 and 22 DF,  p-value: 0.1546
library(knitr)
library(modelsummary)

hPOI=formula(buenEstado~contribuyentesSunat+peaOcupada)

rlPOI=lm(hPOI, data = MYDATA)

modelPOI=list('Locales en Buen Estado'=rlPOI)
modelsummary(modelPOI, title = "Resumen de Regresion Lineal",
             stars = TRUE,
             output = "kableExtra")
Resumen de Regresion Lineal
Locales en Buen Estado
(Intercept) 18.646***
(2.694)
contribuyentesSunat 0.000
(0.000)
peaOcupada 0.000
(0.000)
Num.Obs. 25
R2 0.156
R2 Adj. 0.079
AIC 179.3
BIC 184.1
Log.Lik. -85.626
F 2.035
RMSE 7.43
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
rpPOI=glm(hPOI, data = MYDATA, 
        offset=log(pobTotal), #exposure 
        family = poisson(link = "log"))
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.900000
# displaying results
modelslmpoi=list('Locales Buen Estado'=rlPOI,
                 'POISSON Buen Estado'=rpPOI)

modelsummary(modelslmpoi, title = "Regresiones OLS y Poisson",
             stars = TRUE,
             output = "kableExtra")
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 13.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.900000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 10.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.600000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 31.400000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.100000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.800000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.700000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.300000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.900000
Regresiones OLS y Poisson
Locales Buen Estado  POISSON Buen Estado
(Intercept) 18.646*** -9.842***
(2.694) (0.081)
contribuyentesSunat 0.000 0.000***
(0.000) (0.000)
peaOcupada 0.000 0.000***
(0.000) (0.000)
Num.Obs. 25 25
R2 0.156
R2 Adj. 0.079
AIC 179.3 226.9
BIC 184.1 230.6
Log.Lik. -85.626 -110.473
F 2.035 122.655
RMSE 7.43 9.20
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
h2 = formula(peaOcupada~contribuyentesSunat+buenEstado)
rl2=lm(h2, data = MYDATA)
rp2=glm(h2, data = MYDATA, 
        offset=log(pobTotal), #exposure 
        family = poisson(link = "log"))

# displaying results
modelslmpoi=list('PEA Ocupada'=rl2,
                 'POISSON PEA Ocupada'=rp2)

modelsummary(modelslmpoi, title = "Regresiones OLS y Poisson",
             stars = TRUE,
             output = "kableExtra")
Regresiones OLS y Poisson
PEA Ocupada  POISSON PEA Ocupada
(Intercept) 115468.390** -1.159***
(37869.931) (0.001)
contribuyentesSunat 0.921*** 0.000***
(0.017) (0.000)
buenEstado -1411.649 0.008***
(1982.682) (0.000)
Num.Obs. 25 25
R2 0.993
R2 Adj. 0.993
AIC 636.7 55974.5
BIC 641.6 55978.2
Log.Lik. -314.354 -27984.274
F 1603.130 68713.131
RMSE 69927.80 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
library(magrittr)
overdispersion=AER::dispersiontest(rp2,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp2,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
Test de Equidispersión
Es probable?
overdispersion TRUE
underdispersion FALSE
rqp = glm(h2, data = MYDATA,
          offset=log(pobTotal),
          family = quasipoisson(link = "log"))

modelsPQP=list('POISSON PEA oc'=rp2,'QUASIPOISSON PEA Oc (II)'=rqp)

modelsummary(modelsPQP, 
             title = "Regresiones Poisson y QuasiPoisson",
             stars = TRUE,
             output = "kableExtra")
Regresiones Poisson y QuasiPoisson
 POISSON PEA oc  QUASIPOISSON PEA Oc (II)
(Intercept) -1.159*** -1.159***
(0.001) (0.049)
contribuyentesSunat 0.000*** 0.000*
(0.000) (0.000)
buenEstado 0.008*** 0.008**
(0.000) (0.003)
Num.Obs. 25 25
AIC 55974.5
BIC 55978.2
Log.Lik. -27984.274
F 68713.131 27.195
RMSE 30706.19 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
library(arm)
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:rio':
## 
##     factorize
## 
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is C:/2024/PUCP 24/Estadística 2.2/Bases/Bases/Bases/PARCIAL
cbind(se_Poi=se.coef(rp2),se_QuasiPoi=se.coef(rqp))
##                           se_Poi  se_QuasiPoi
## (Intercept)         9.826772e-04 4.939494e-02
## contribuyentesSunat 1.903657e-10 9.568862e-09
## buenEstado          5.023233e-05 2.524962e-03
formatoNum <- function(x) format(x, digits = 4, scientific = FALSE)

modelsummary(modelsPQP,
             fmt=formatoNum, # uso mi formula
             exponentiate = T, # exponenciar!!!!!
             statistic = 'conf.int',
             title = "Regresión Poisson - coeficientes exponenciados",
             stars = TRUE,
             output = "kableExtra")
Regresión Poisson - coeficientes exponenciados
 POISSON PEA oc  QUASIPOISSON PEA Oc (II)
(Intercept) 0.3139*** 0.3139***
[0.3133, 0.3145] [0.2849, 0.3457]
contribuyentesSunat 1.0000*** 1.0000*
[1.0000, 1.0000] [1.0000, 1.0000]
buenEstado 1.0081*** 1.0081**
[1.0080, 1.0082] [1.0031, 1.0131]
Num.Obs. 25 25
AIC 55974.5
BIC 55978.2
Log.Lik. -27984.274
F 68713.131 27.195
RMSE 30706.19 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
modelsQPexp=list('QuasiPoisson asegurados (II) exponenciado'=rqp)


modelsummary(modelsQPexp,fmt=formatoNum,
             exponentiate = T, 
             statistic = 'conf.int',
             title = "EXP() de la Regresión Quasi Poisson (II) para Interpretación",
             stars = TRUE,
             output = "kableExtra")
EXP() de la Regresión Quasi Poisson (II) para Interpretación
 QuasiPoisson asegurados (II) exponenciado
(Intercept) 0.3139***
[0.2849, 0.3457]
contribuyentesSunat 1.0000*
[1.0000, 1.0000]
buenEstado 1.0081**
[1.0031, 1.0131]
Num.Obs. 25
F 27.195
RMSE 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
h2off=formula(peaOcupada~contribuyentesSunat+buenEstado + offset(log(pobTotal)))

rbn=glm.nb(h2off,data=MYDATA)

modelsQP_BN=list('PoiSSON PEA OC(II)'=rp2,
                 'QuasiPoisson PEA OC (II)'=rqp,
                 'Binomial Negativa PEA OC (II)'=rbn)


modelsummary(modelsQP_BN,fmt=formatoNum,
             exponentiate = T, 
             statistic = 'conf.int',
             title = "EXP() de la Regresiones Poisson, Quasi Poisson  y Binomial Negativa",
             stars = TRUE,
             output = "kableExtra")
EXP() de la Regresiones Poisson, Quasi Poisson y Binomial Negativa
 PoiSSON PEA OC(II)  QuasiPoisson PEA OC (II)  Binomial Negativa PEA OC (II)
(Intercept) 0.3139*** 0.3139*** 0.3123***
[0.3133, 0.3145] [0.2849, 0.3457] [0.286, 0.3411]
contribuyentesSunat 1.0000*** 1.0000* 1.0000
[1.0000, 1.0000] [1.0000, 1.0000] [1.000, 1.0000]
buenEstado 1.0081*** 1.0081** 1.0091***
[1.0080, 1.0082] [1.0031, 1.0131] [1.004, 1.0138]
Num.Obs. 25 25 25
AIC 55974.5 587.1
BIC 55978.2 592.0
Log.Lik. -27984.274 -289.566
F 68713.131 27.195 9.985
RMSE 30706.19 30706.19 31601.43
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
data.frame(Model = c("poisson", "quasipoisson", "negative-binomial"),
           AIC = c(AIC(rp2), AIC(rqp), AIC(rbn)),
           BIC = c(BIC(rp2), BIC(rqp), BIC(rbn)),stringsAsFactors = FALSE
)%>%
kable(caption = "AIC / BIC para los modelos de conteos")%>%kableExtra::kable_styling(full_width = FALSE)
AIC / BIC para los modelos de conteos
Model AIC BIC
poisson 55974.5479 55978.2045
quasipoisson NA NA
negative-binomial 587.1316 592.0071