archivo="provinciasPeru.dbf"
library(foreign)
perudat=read.dbf(archivo,as.is = T)
names(perudat)
## [1] "SP_ID" "IDPROV" "IDDPTO" "NOMBDEP" "NOMBPROV"
## [6] "SUP_INSULA" "SUP_LACUST" "SUP_INS_D" "IDH" "esperanza"
## [11] "secundaria" "educa" "percapitaf" "IDE" "identidad"
## [16] "salud" "educacion" "saneamient" "electrific" "poblacion"
## [21] "costa" "capital" "tamano" "fecundidad" "desnutrici"
## [26] "densidadpo" "mortalidad" "analfa" "analfa1" "analfa2"
## [31] "analfa3" "analfa4" "pob" "pob_ur" "pob_rural"
## [36] "pob_h" "pob_m" "Voto_PPK" "Voto_FP"
tail(perudat)
## SP_ID IDPROV IDDPTO NOMBDEP NOMBPROV SUP_INSULA SUP_LACUST SUP_INS_D
## 191 155 2001 20 PIURA PIURA 0.00 0 0
## 192 160 2006 20 PIURA SULLANA 0.00 0 0
## 193 161 2007 20 PIURA TALARA 0.00 0 0
## 194 158 2004 20 PIURA MORROPON 0.00 0 0
## 195 159 2005 20 PIURA PAITA 0.00 0 0
## 196 67 0701 07 CALLAO CALLAO 17.63 0 0
## IDH esperanza secundaria educa percapitaf IDE identidad
## 191 0.3890754 72.92 62.89432 8.810065 361.2346 0.7426 98.5621
## 192 0.3716435 74.62 63.46141 8.271043 319.5071 0.7548 97.9271
## 193 0.4174419 73.36 65.52080 9.802832 401.1025 0.8082 99.0036
## 194 0.2891422 70.46 50.09164 5.745931 245.7949 0.6653 97.1181
## 195 0.3862217 73.54 58.79517 7.786611 387.5828 0.7154 99.2638
## 196 0.4821672 76.24 75.28926 10.368500 515.0399 0.8514 99.4939
## salud educacion saneamient electrific poblacion costa capital tamano
## 191 15.3145 89.0860 77.9517 80.1926 734437 NO SI 4
## 192 12.9206 91.7215 78.0363 88.1845 309605 NO NO 4
## 193 17.2074 97.9404 84.5123 93.9844 133148 SI NO 3
## 194 7.3392 80.9296 67.5797 74.8067 159486 NO NO 3
## 195 6.7769 85.3579 76.5216 85.2715 122725 SI NO 3
## 196 26.4218 89.8493 93.0432 99.2529 969170 SI SI 4
## fecundidad desnutrici densidadpo mortalidad analfa analfa1 analfa2
## 191 2.45 20.58573 107.22 19.0 7.43 5.97 17.14
## 192 2.45 12.79055 53.04 14.2 6.25 5.60 12.21
## 193 2.30 10.66612 46.22 17.7 1.90 1.84 4.47
## 194 2.93 26.86987 41.83 27.1 13.72 10.09 19.00
## 195 2.74 15.96835 60.80 17.2 5.51 5.16 12.85
## 196 2.48 7.50000 5965.96 10.1 1.56 1.56 NA
## analfa3 analfa4 pob pob_ur pob_rural pob_h pob_m Voto_PPK Voto_FP
## 191 4.35 10.33 665991 573139 92852 327852 338139 140940 206840
## 192 5.15 7.31 287680 258723 28957 142411 145269 55271 126417
## 193 1.36 2.43 129396 126866 2530 65002 64394 40685 38701
## 194 10.12 17.40 159693 91798 67895 80951 78742 32301 57567
## 195 3.96 7.06 108535 103615 4920 54581 53954 25790 41585
## 196 0.69 2.39 876877 876877 0 430582 446295 296562 293068
str(perudat$IDE)
## num [1:196] 0.757 0.527 0.588 0.784 0.888 ...
niveles=c("muy baja", "baja", "medio", "alta", "muy alta")
perudat$IDEcat=cut(perudat$IDE,
breaks = length(niveles),
labels = niveles,
ordered_result = T)
str(perudat$IDEcat)
## Ord.factor w/ 5 levels "muy baja"<"baja"<..: 4 2 2 4 5 4 2 2 1 3 ...
table(perudat$IDEcat)
##
## muy baja baja medio alta muy alta
## 15 68 64 34 15
prop.table(table(perudat$IDEcat))*100
##
## muy baja baja medio alta muy alta
## 7.653061 34.693878 32.653061 17.346939 7.653061
toPlot=table(perudat$IDEcat)
barplot(toPlot,
xlab = "IDE niveles",
ylab = "Conteo de Provincias",
main = "Provincias según niveles de IDE",
col = c("yellow", "red", "blue", "black", "pink"))
str(perudat$percapitaf)
## num [1:196] 343 156 178 361 512 ...
summary(perudat$percapitaf)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 107.8 166.7 212.6 242.1 299.7 556.4
boxplot(perudat$percapitaf)
boxplot(perudat$percapitaf~perudat$IDEcat)
model = aov(percapitaf ~ IDEcat, data = perudat)
summary(model) #hay diferencias?
## Df Sum Sq Mean Sq F value Pr(>F)
## IDEcat 4 1239663 309916 115.5 <2e-16 ***
## Residuals 191 512604 2684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = percapitaf ~ IDEcat, data = perudat)
##
## $IDEcat
## diff lwr upr p adj
## baja-muy baja -9.278119 -49.97670 31.42046 0.9704319
## medio-muy baja 64.334895 23.40711 105.26268 0.0002322
## alta-muy baja 150.082475 105.85896 194.30599 0.0000000
## muy alta-muy baja 257.421004 205.32437 309.51764 0.0000000
## medio-baja 73.613014 48.76551 98.46052 0.0000000
## alta-baja 159.360594 129.39337 189.32781 0.0000000
## muy alta-baja 266.699123 226.00054 307.39770 0.0000000
## alta-medio 85.747580 55.46981 116.02535 0.0000000
## muy alta-medio 193.086109 152.15833 234.01389 0.0000000
## muy alta-alta 107.338529 63.11502 151.56204 0.0000000
str(perudat$pob_rural)
## int [1:196] 13827 28099 67638 6659 19624 13999 10896 57921 99636 3670 ...
perudat$porpobrural = (perudat$pob_rural /perudat$pob)*100
str(perudat$porpobrural)
## num [1:196] 18.2 85.19 76.15 7.06 2.42 ...
summary(perudat$porpobrural)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 29.75 49.74 47.93 69.70 88.55
hist(perudat$porpobrural)
boxplot(perudat$porpobrural)
boxplot(perudat$porpobrural~perudat$IDEcat)
model = aov(porpobrural ~ IDEcat, data = perudat)
summary(model) #hay diferencias?
## Df Sum Sq Mean Sq F value Pr(>F)
## IDEcat 4 81201 20300 96.8 <2e-16 ***
## Residuals 191 40055 210
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = porpobrural ~ IDEcat, data = perudat)
##
## $IDEcat
## diff lwr upr p adj
## baja-muy baja -6.345107 -17.72187 5.031655 0.5404337
## medio-muy baja -30.466011 -41.90684 -19.025178 0.0000000
## alta-muy baja -50.085936 -62.44805 -37.723824 0.0000000
## muy alta-muy baja -65.827243 -80.39018 -51.264302 0.0000000
## medio-baja -24.120904 -31.06670 -17.175105 0.0000000
## alta-baja -43.740829 -52.11778 -35.363879 0.0000000
## muy alta-baja -59.482136 -70.85890 -48.105374 0.0000000
## alta-medio -19.619924 -28.08368 -11.156165 0.0000000
## muy alta-medio -35.361232 -46.80206 -23.920398 0.0000000
## muy alta-alta -15.741307 -28.10342 -3.379195 0.0050705
str(perudat$costa)
## chr [1:196] "SI" "NO" "NO" "SI" "SI" "SI" "NO" "NO" "NO" "NO" "NO" ...
perudat$costa=factor(perudat$costa)
table(perudat$costa)
##
## NO SI
## 169 27
prop.table(table(perudat$costa))*100
##
## NO SI
## 86.22449 13.77551
table(perudat$IDEcat,perudat$costa)
##
## NO SI
## muy baja 15 0
## baja 68 0
## medio 60 4
## alta 20 14
## muy alta 6 9
tablaCosta = table(perudat$IDEcat,perudat$costa)
barplot(tablaCosta, beside = T)
chisq.test(tablaCosta)
## Warning in chisq.test(tablaCosta): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: tablaCosta
## X-squared = 64.787, df = 4, p-value = 2.853e-13
str(perudat$capital)
## chr [1:196] "NO" "NO" "NO" "NO" "SI" "NO" "NO" "NO" "NO" "NO" "NO" ...
perudat$capital=factor(perudat$capital)
table(perudat$capital)
##
## NO SI
## 170 26
table(perudat$IDEcat,perudat$capital)
##
## NO SI
## muy baja 15 0
## baja 68 0
## medio 59 5
## alta 23 11
## muy alta 5 10
tablaCapital = table(perudat$IDEcat,perudat$capital)
barplot(tablaCapital, beside = T)
chisq.test(tablaCapital)
## Warning in chisq.test(tablaCapital): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: tablaCapital
## X-squared = 62.292, df = 4, p-value = 9.561e-13
subdata=perudat[c(2,40,13,21,22,41,33)]
head(subdata)
## IDPROV IDEcat percapitaf costa capital porpobrural pob
## 1 1304 alta 343.0569 SI NO 18.198210 75980
## 2 1305 baja 155.5582 NO NO 85.187206 32985
## 3 1306 baja 178.3444 NO NO 76.154340 88817
## 4 1307 alta 361.2748 SI NO 7.055745 94377
## 5 1301 muy alta 511.9673 SI SI 2.416811 811979
## 6 1302 alta 396.0647 SI NO 12.044326 116229
row.names(subdata)=subdata$IDPROV
subdata=subdata[-1]
head(subdata)
## IDEcat percapitaf costa capital porpobrural pob
## 1304 alta 343.0569 SI NO 18.198210 75980
## 1305 baja 155.5582 NO NO 85.187206 32985
## 1306 baja 178.3444 NO NO 76.154340 88817
## 1307 alta 361.2748 SI NO 7.055745 94377
## 1301 muy alta 511.9673 SI SI 2.416811 811979
## 1302 alta 396.0647 SI NO 12.044326 116229
library("oglmx")
## Loading required package: maxLik
## Loading required package: miscTools
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
model1=ologit.reg(IDEcat ~ .,
data = subdata)
summary(model1)
## Ordered Logit Regression
## Log-Likelihood: -158.7017
## No. Iterations: 7
## McFadden's R2: 0.4337699
## AIC: 335.4033
## Estimate Std. error t value Pr(>|t|)
## percapitaf 1.5490e-02 3.7362e-03 4.1458 3.386e-05 ***
## costaSI 9.0915e-01 6.3017e-01 1.4427 0.149105
## capitalSI 1.9778e+00 6.6189e-01 2.9881 0.002807 **
## porpobrural -6.2514e-02 1.2517e-02 -4.9943 5.906e-07 ***
## pob 4.4713e-07 1.6651e-06 0.2685 0.788294
## ----- Threshold Parameters -----
## Estimate Std. error t value Pr(>|t|)
## Threshold (muy baja->baja) -3.68479 1.34859 -2.7323 0.006289 **
## Threshold (baja->medio) -0.26515 1.27141 -0.2085 0.834801
## Threshold (medio->alta) 3.79096 1.35729 2.7930 0.005221 **
## Threshold (alta->muy alta) 7.40548 1.54722 4.7863 1.699e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(model1m=margins.oglmx(model1,ascontinuous = TRUE))
## Marginal Effects on Pr(Outcome==muy baja)
## Marg. Eff Std. error t value Pr(>|t|)
## percapitaf -1.1485e-04 4.7737e-05 -2.4058 0.016137 *
## costaSI -6.7407e-03 5.2800e-03 -1.2767 0.201721
## capitalSI -1.4664e-02 7.3457e-03 -1.9963 0.045906 *
## porpobrural 4.6350e-04 1.7883e-04 2.5919 0.009546 **
## pob -3.3151e-09 1.2228e-08 -0.2711 0.786309
## ------------------------------------
## Marginal Effects on Pr(Outcome==baja)
## Marg. Eff Std. error t value Pr(>|t|)
## percapitaf -2.2399e-03 5.7927e-04 -3.8668 0.0001103 ***
## costaSI -1.3147e-01 8.9245e-02 -1.4731 0.1407113
## capitalSI -2.8600e-01 1.0273e-01 -2.7840 0.0053693 **
## porpobrural 9.0400e-03 2.3556e-03 3.8377 0.0001242 ***
## pob -6.4658e-08 2.3759e-07 -0.2721 0.7855142
## ------------------------------------
## Marginal Effects on Pr(Outcome==medio)
## Marg. Eff Std. error t value Pr(>|t|)
## percapitaf 1.3462e-03 5.5396e-04 2.4301 0.01510 *
## costaSI 7.9011e-02 5.9032e-02 1.3384 0.18075
## capitalSI 1.7188e-01 8.7619e-02 1.9617 0.04980 *
## porpobrural -5.4329e-03 2.3500e-03 -2.3119 0.02078 *
## pob 3.8858e-08 1.3923e-07 0.2791 0.78017
## ------------------------------------
## Marginal Effects on Pr(Outcome==alta)
## Marg. Eff Std. error t value Pr(>|t|)
## percapitaf 9.7735e-04 3.6133e-04 2.7049 0.006833 **
## costaSI 5.7364e-02 4.2363e-02 1.3541 0.175700
## capitalSI 1.2479e-01 5.0336e-02 2.4792 0.013169 *
## porpobrural -3.9444e-03 1.3127e-03 -3.0047 0.002658 **
## pob 2.8212e-08 1.0784e-07 0.2616 0.793616
## ------------------------------------
## Marginal Effects on Pr(Outcome==muy alta)
## Marg. Eff Std. error t value Pr(>|t|)
## percapitaf 3.1279e-05 1.8538e-05 1.6873 0.09154 .
## costaSI 1.8359e-03 1.5061e-03 1.2190 0.22286
## capitalSI 3.9938e-03 2.4613e-03 1.6226 0.10467
## porpobrural -1.2624e-04 7.8053e-05 -1.6173 0.10581
## pob 9.0291e-10 3.4240e-09 0.2637 0.79201
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
UN SOL MAS DE INFRESO PEECAPITA DISMINUYE TU PROBABILIDAD QUE TU FAMILIA ESTER EN LA CATEGORIA MUY BAJA EN 0.01%
-1.1706e-04*100
## [1] -0.011706
-1.5453e-02*100
## [1] -1.5453
que yo tenga un porcentaje ded pobl rural aumenta mi probabilidad en 0.04 de estar en muy baja
4.7121e-04*100
## [1] 0.047121