EJERCICIO UNO

DATA

library(readxl)
dataPeru <- read_excel("dataPeru.xlsx")
View(dataPeru)

#PORCENTAJE SUNAT Y PEA OCUPADA
dataPeru$pctsunat= dataPeru$contribuyentesSunat/dataPeru$pobTotal
dataPeru$pctpeaOcupada= dataPeru$peaOcupada/dataPeru$pobTotal

buenEstado ~ contribuyentesSunat + peaOcupada
## buenEstado ~ contribuyentesSunat + peaOcupada
modelo1=formula(buenEstado ~ pctsunat+ pctpeaOcupada)
regre1=lm(modelo1,data = dataPeru)
summary(regre1)
## 
## Call:
## lm(formula = modelo1, data = dataPeru)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0928  -4.3610   0.2575   4.4003  11.0196 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -22.61      15.96  -1.416    0.171
## pctsunat         10.03      31.21   0.321    0.751
## pctpeaOcupada   102.18      64.24   1.590    0.126
## 
## Residual standard error: 6.299 on 22 degrees of freedom
## Multiple R-squared:  0.4669, Adjusted R-squared:  0.4184 
## F-statistic: 9.633 on 2 and 22 DF,  p-value: 0.000989

EJERCICIO DOS

GAUSSIANA (no mide conteos)

modelo2=formula(peaOcupada ~ contribuyentesSunat + buenEstado)
regre2=lm(modelo2,data = dataPeru)
summary(regre2)
## 
## Call:
## lm(formula = modelo2, data = dataPeru)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -91867 -58573 -11166  46174 155851 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.155e+05  3.787e+04   3.049  0.00588 ** 
## contribuyentesSunat  9.206e-01  1.741e-02  52.872  < 2e-16 ***
## buenEstado          -1.412e+03  1.983e+03  -0.712  0.48395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 74540 on 22 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9926 
## F-statistic:  1603 on 2 and 22 DF,  p-value: < 2.2e-16

POISSON (SI MIDE CONTEOS) (El gaussiano era para comparar solo importa este modelo)

library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
library(kableExtra)
h1=formula(peaOcupada ~ contribuyentesSunat + buenEstado)

rp1=glm(h1, data = dataPeru, 
        offset=log(pobTotal), #exposure 
        family = poisson(link = "log"))
summary(rp1)
## 
## Call:
## glm(formula = h1, family = poisson(link = "log"), data = dataPeru, 
##     offset = log(pobTotal))
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.159e+00  9.827e-04 -1179.0   <2e-16 ***
## contribuyentesSunat  2.092e-08  1.904e-10   109.9   <2e-16 ***
## buenEstado           8.075e-03  5.023e-05   160.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 192616  on 24  degrees of freedom
## Residual deviance:  55608  on 22  degrees of freedom
## AIC: 55975
## 
## Number of Fisher Scoring iterations: 3
# displaying results
modelslmpoi=list('OLS asegurados (II)'=regre2,
                 'POISSON asegurados'=rp1)

modelsummary(modelslmpoi, title = "Regresiones OLS y Poisson",
             stars = TRUE,
             output = "kableExtra")
Regresiones OLS y Poisson
 OLS asegurados (II)  POISSON asegurados
(Intercept) 115468.390** -1.159***
(37869.931) (0.001)
contribuyentesSunat 0.921*** 0.000***
(0.017) (0.000)
buenEstado -1411.649 0.008***
(1982.682) (0.000)
Num.Obs. 25 25
R2 0.993
R2 Adj. 0.993
AIC 636.7 55974.5
BIC 641.6 55978.2
Log.Lik. -314.354 -27984.274
F 1603.130 68713.131
RMSE 69927.80 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Test de equidispersión

VarDep=dataPeru$peaOcupada
descris=list(min=min(VarDep),
             max=max(VarDep),
             media=round(mean(VarDep),2),
             var=round(var(VarDep),2),
             asim=round(e1071::skewness(VarDep),2),
             kurt=round(e1071::kurtosis(VarDep),2))

round(descris$var/descris$media,2)
## [1] 1518474
library(magrittr)
overdispersion=AER::dispersiontest(rp1,alternative='greater')$ p.value<0.05
underdispersion=AER::dispersiontest(rp1,alternative='less')$ p.value<0.05
# tabla
testResult=as.data.frame(rbind(overdispersion,underdispersion))
names(testResult)='Es probable?'
testResult%>%kable(caption = "Test de Equidispersión")%>%kableExtra::kable_styling()
Test de Equidispersión
Es probable?
overdispersion TRUE
underdispersion FALSE
rqp = glm(h1, data = dataPeru,
          offset=log(pobTotal),
          family = quasipoisson(link = "log"))

summary(rqp)
## 
## Call:
## glm(formula = h1, family = quasipoisson(link = "log"), data = dataPeru, 
##     offset = log(pobTotal))
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.159e+00  4.939e-02 -23.456  < 2e-16 ***
## contribuyentesSunat  2.092e-08  9.569e-09   2.186  0.03971 *  
## buenEstado           8.075e-03  2.525e-03   3.198  0.00415 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 2526.639)
## 
##     Null deviance: 192616  on 24  degrees of freedom
## Residual deviance:  55608  on 22  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 3
modelsPQP=list('POISSON asegurados (I)'=rp1,'QUASIPOISSON asegurados (I)'=rqp)

modelsummary(modelsPQP, 
             title = "Regresiones Poisson y QuasiPoisson",
             stars = TRUE,
             output = "kableExtra")
Regresiones Poisson y QuasiPoisson
 POISSON asegurados (I)  QUASIPOISSON asegurados (I)
(Intercept) -1.159*** -1.159***
(0.001) (0.049)
contribuyentesSunat 0.000*** 0.000*
(0.000) (0.000)
buenEstado 0.008*** 0.008**
(0.000) (0.003)
Num.Obs. 25 25
AIC 55974.5
BIC 55978.2
Log.Lik. -27984.274
F 68713.131 27.195
RMSE 30706.19 30706.19
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
h2off=formula(peaOcupada ~ contribuyentesSunat + buenEstado + offset(log(pobTotal)))

library(MASS)
rbn=glm.nb(h2off,data=dataPeru)

modelsQP_BN=list('Poisson asegurados (I)'=rp1,
                 'QuasiPoisson asegurados (II)'=rqp,
                 'Binomial Negativa asegurados (II)'=rbn)


formatoNum <- function(x) format(x, digits = 4, scientific = FALSE)


modelsummary(modelsQP_BN,fmt=formatoNum,
             exponentiate = T, 
             statistic = 'conf.int',
             title = "EXP() de la Regresiones Poisson, Quasi Poisson  y Binomial Negativa",
             stars = TRUE,
             output = "kableExtra")
EXP() de la Regresiones Poisson, Quasi Poisson y Binomial Negativa
 Poisson asegurados (I)  QuasiPoisson asegurados (II)  Binomial Negativa asegurados (II)
(Intercept) 0.3139*** 0.3139*** 0.3123***
[0.3133, 0.3145] [0.2849, 0.3457] [0.286, 0.3411]
contribuyentesSunat 1.0000*** 1.0000* 1.0000
[1.0000, 1.0000] [1.0000, 1.0000] [1.000, 1.0000]
buenEstado 1.0081*** 1.0081** 1.0091***
[1.0080, 1.0082] [1.0031, 1.0131] [1.004, 1.0138]
Num.Obs. 25 25 25
AIC 55974.5 587.1
BIC 55978.2 592.0
Log.Lik. -27984.274 -289.566
F 68713.131 27.195 9.985
RMSE 30706.19 30706.19 31601.43
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Interpretación

Ambas predictoras son significativos al 0.05