# Cargar librerías
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Importar los datos
data <- read.csv("DATA PROVINCIAS_CHATA - Data.csv", stringsAsFactors = FALSE)

# Limpiar y transformar variables numéricas
data <- data %>%
  mutate(across(c(PORC_GAN, POR_POB_ELEC, SERV_GAS, SERV_AGUA, EDUC_PBL, CP_MED, IND_CORR),
                ~ as.numeric(gsub(",", ".", .)))) # Reemplazar comas y convertir a numérico
# Verificar la estructura de los datos
str(data)
## 'data.frame':    196 obs. of  10 variables:
##  $ PROVINCIA   : chr  "Chachapoyas " "Bagua" "Bongará " "Condorcanqui " ...
##  $ PORC_GAN    : num  59.9 91.1 61.6 90.7 61.7 ...
##  $ POR_POB_ELEC: num  64.1 73.4 77.9 74.5 73.7 ...
##  $ MCR         : chr  "MACROREGION ORIENTE" "MACROREGION ORIENTE" "MACROREGION ORIENTE" "MACROREGION ORIENTE" ...
##  $ SERV_GAS    : num  65.3 45.9 56.1 15.4 45.3 ...
##  $ SERV_AGUA   : num  87.6 70.8 83 22.4 80.5 ...
##  $ EDUC_PBL    : num  46.8 78.1 61.5 113.3 85.5 ...
##  $ CP_MED      : num  13.8 11 13.8 15 16.1 ...
##  $ POBR_MON    : int  4 3 3 1 2 3 3 3 2 3 ...
##  $ IND_CORR    : num  62.7 36.3 27.7 30.2 33.3 21.8 57.6 33.5 41.2 21.4 ...
data <- data %>%
  mutate(across(c(PORC_GAN, POR_POB_ELEC, SERV_GAS, SERV_AGUA, EDUC_PBL, CP_MED, IND_CORR),
                ~ as.numeric(gsub(",", ".", .)))) %>%  # Reemplazar comas y convertir a numérico
  mutate(MCR = as.factor(MCR))  # Asegurar que la macroregión es categórica
colnames(data)
##  [1] "PROVINCIA"    "PORC_GAN"     "POR_POB_ELEC" "MCR"          "SERV_GAS"    
##  [6] "SERV_AGUA"    "EDUC_PBL"     "CP_MED"       "POBR_MON"     "IND_CORR"
colnames(data)[colnames(data) == "PORC_GAN "] <- "PORC_GAN"
modelo1 <- lm(PORC_GAN ~ SERV_GAS + SERV_AGUA, data = data)
summary(modelo1)
## 
## Call:
## lm(formula = PORC_GAN ~ SERV_GAS + SERV_AGUA, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.763 -11.725  -1.039  13.352  34.037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 78.19608    4.60506  16.980  < 2e-16 ***
## SERV_GAS    -0.42323    0.05674  -7.459 2.89e-12 ***
## SERV_AGUA    0.13388    0.06837   1.958   0.0516 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.32 on 193 degrees of freedom
## Multiple R-squared:  0.2267, Adjusted R-squared:  0.2187 
## F-statistic: 28.29 on 2 and 193 DF,  p-value: 1.684e-11
reg1=lm(modelo1,data=data)
summary(reg1)
## 
## Call:
## lm(formula = modelo1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.763 -11.725  -1.039  13.352  34.037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 78.19608    4.60506  16.980  < 2e-16 ***
## SERV_GAS    -0.42323    0.05674  -7.459 2.89e-12 ***
## SERV_AGUA    0.13388    0.06837   1.958   0.0516 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.32 on 193 degrees of freedom
## Multiple R-squared:  0.2267, Adjusted R-squared:  0.2187 
## F-statistic: 28.29 on 2 and 193 DF,  p-value: 1.684e-11
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
model1=list('apropiacion (I)'=reg1)
modelsummary(model1, title = "Regresion: modelo 1",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 1
 apropiacion (I)
(Intercept) 78.196***
(4.605)
SERV_GAS −0.423***
(0.057)
SERV_AGUA 0.134+
(0.068)
Num.Obs. 196
R2 0.227
R2 Adj. 0.219
AIC 1655.9
BIC 1669.0
Log.Lik. −823.943
F 28.288
RMSE 16.20
+ p
modelo2 <- lm(PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL , data = data)
summary(modelo2)
## 
## Call:
## lm(formula = PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.126 -12.240  -1.196  13.276  33.331 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 85.96018    8.56227  10.039  < 2e-16 ***
## SERV_GAS    -0.48314    0.07950  -6.077 6.46e-09 ***
## SERV_AGUA    0.11094    0.07159   1.550    0.123    
## EDUC_PBL    -0.04844    0.04504  -1.075    0.284    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.32 on 192 degrees of freedom
## Multiple R-squared:  0.2313, Adjusted R-squared:  0.2193 
## F-statistic: 19.26 on 3 and 192 DF,  p-value: 5.839e-11
reg2=lm(modelo2,data=data)
summary(reg2)
## 
## Call:
## lm(formula = modelo2, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.126 -12.240  -1.196  13.276  33.331 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 85.96018    8.56227  10.039  < 2e-16 ***
## SERV_GAS    -0.48314    0.07950  -6.077 6.46e-09 ***
## SERV_AGUA    0.11094    0.07159   1.550    0.123    
## EDUC_PBL    -0.04844    0.04504  -1.075    0.284    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.32 on 192 degrees of freedom
## Multiple R-squared:  0.2313, Adjusted R-squared:  0.2193 
## F-statistic: 19.26 on 3 and 192 DF,  p-value: 5.839e-11
model2=list('apropiacion (II)'=reg2)
modelsummary(model2, title = "Regresion: modelo 2",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 2
 apropiacion (II)
(Intercept) 85.960***
(8.562)
SERV_GAS −0.483***
(0.080)
SERV_AGUA 0.111
(0.072)
EDUC_PBL −0.048
(0.045)
Num.Obs. 196
R2 0.231
R2 Adj. 0.219
AIC 1656.7
BIC 1673.1
Log.Lik. −823.354
F 19.259
RMSE 16.15
+ p
modelo3 <- lm(PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED , data = data)
summary(modelo3)
## 
## Call:
## lm(formula = PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.914 -11.981  -0.503  13.350  33.144 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 87.58302    9.05060   9.677  < 2e-16 ***
## SERV_GAS    -0.50122    0.08590  -5.835 2.27e-08 ***
## SERV_AGUA    0.10209    0.07343   1.390    0.166    
## EDUC_PBL    -0.07525    0.06566  -1.146    0.253    
## CP_MED       0.19592    0.34863   0.562    0.575    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.35 on 191 degrees of freedom
## Multiple R-squared:  0.2326, Adjusted R-squared:  0.2165 
## F-statistic: 14.47 on 4 and 191 DF,  p-value: 2.432e-10
reg3=lm(modelo3,data=data)
summary(reg3)
## 
## Call:
## lm(formula = modelo3, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.914 -11.981  -0.503  13.350  33.144 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 87.58302    9.05060   9.677  < 2e-16 ***
## SERV_GAS    -0.50122    0.08590  -5.835 2.27e-08 ***
## SERV_AGUA    0.10209    0.07343   1.390    0.166    
## EDUC_PBL    -0.07525    0.06566  -1.146    0.253    
## CP_MED       0.19592    0.34863   0.562    0.575    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.35 on 191 degrees of freedom
## Multiple R-squared:  0.2326, Adjusted R-squared:  0.2165 
## F-statistic: 14.47 on 4 and 191 DF,  p-value: 2.432e-10
model3=list('apropiacion (III)'=reg3)
modelsummary(model3, title = "Regresion: modelo 3",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 3
 apropiacion (III)
(Intercept) 87.583***
(9.051)
SERV_GAS −0.501***
(0.086)
SERV_AGUA 0.102
(0.073)
EDUC_PBL −0.075
(0.066)
CP_MED 0.196
(0.349)
Num.Obs. 196
R2 0.233
R2 Adj. 0.217
AIC 1658.4
BIC 1678.1
Log.Lik. −823.192
F 14.472
RMSE 16.14
+ p
modelo4 <- lm(PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED + IND_CORR , data = data)
summary(modelo4)
## 
## Call:
## lm(formula = PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED + 
##     IND_CORR, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.899 -11.821  -0.009  13.439  33.966 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 98.43629   11.23794   8.759 1.08e-15 ***
## SERV_GAS    -0.47972    0.08656  -5.542 9.89e-08 ***
## SERV_AGUA    0.08975    0.07351   1.221    0.224    
## EDUC_PBL    -0.09846    0.06694  -1.471    0.143    
## CP_MED       0.17941    0.34731   0.517    0.606    
## IND_CORR    -0.21956    0.13581  -1.617    0.108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.28 on 190 degrees of freedom
## Multiple R-squared:  0.243,  Adjusted R-squared:  0.2231 
## F-statistic:  12.2 on 5 and 190 DF,  p-value: 2.917e-10
reg4=lm(modelo4,data=data)
summary(reg4)
## 
## Call:
## lm(formula = modelo4, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.899 -11.821  -0.009  13.439  33.966 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 98.43629   11.23794   8.759 1.08e-15 ***
## SERV_GAS    -0.47972    0.08656  -5.542 9.89e-08 ***
## SERV_AGUA    0.08975    0.07351   1.221    0.224    
## EDUC_PBL    -0.09846    0.06694  -1.471    0.143    
## CP_MED       0.17941    0.34731   0.517    0.606    
## IND_CORR    -0.21956    0.13581  -1.617    0.108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.28 on 190 degrees of freedom
## Multiple R-squared:  0.243,  Adjusted R-squared:  0.2231 
## F-statistic:  12.2 on 5 and 190 DF,  p-value: 2.917e-10
model4=list('apropiacion (IV)'=reg4)
modelsummary(model4, title = "Regresion: modelo 4",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 4
 apropiacion (IV)
(Intercept) 98.436***
(11.238)
SERV_GAS −0.480***
(0.087)
SERV_AGUA 0.090
(0.074)
EDUC_PBL −0.098
(0.067)
CP_MED 0.179
(0.347)
IND_CORR −0.220
(0.136)
Num.Obs. 196
R2 0.243
R2 Adj. 0.223
AIC 1657.7
BIC 1680.7
Log.Lik. −821.853
F 12.198
RMSE 16.03
+ p
models=list('apropiacion (I)'=reg1,
            'apropiacion (II)'=reg2,
            'apropiacion (III)'=reg3,
            'apropiacion (IV)' = reg4)
modelsummary(models, title = "Resultados de todos los modelos",
             stars = TRUE,
             output = "kableExtra")
Resultados de todos los modelos
 apropiacion (I)  apropiacion (II)  apropiacion (III)  apropiacion (IV)
(Intercept) 78.196*** 85.960*** 87.583*** 98.436***
(4.605) (8.562) (9.051) (11.238)
SERV_GAS −0.423*** −0.483*** −0.501*** −0.480***
(0.057) (0.080) (0.086) (0.087)
SERV_AGUA 0.134+ 0.111 0.102 0.090
(0.068) (0.072) (0.073) (0.074)
EDUC_PBL −0.048 −0.075 −0.098
(0.045) (0.066) (0.067)
CP_MED 0.196 0.179
(0.349) (0.347)
IND_CORR −0.220
(0.136)
Num.Obs. 196 196 196 196
R2 0.227 0.231 0.233 0.243
R2 Adj. 0.219 0.219 0.217 0.223
AIC 1655.9 1656.7 1658.4 1657.7
BIC 1669.0 1673.1 1678.1 1680.7
Log.Lik. −823.943 −823.354 −823.192 −821.853
F 28.288 19.259 14.472 12.198
RMSE 16.20 16.15 16.14 16.03
+ p
models=list('apropiacion (I)'=reg1,
            'apropiacion (II)'=reg2,
            'apropiacion (III)'=reg3,
            'apropiacion (IV)'=reg4)
modelsummary(models, title = "Resultados de todos los modelos",statistic = "conf.int",
             stars = TRUE,
             output = "kableExtra")
Resultados de todos los modelos
 apropiacion (I)  apropiacion (II)  apropiacion (III)  apropiacion (IV)
(Intercept) 78.196*** 85.960*** 87.583*** 98.436***
[69.113, 87.279] [69.072, 102.848] [69.731, 105.435] [76.269, 120.603]
SERV_GAS −0.423*** −0.483*** −0.501*** −0.480***
[−0.535, −0.311] [−0.640, −0.326] [−0.671, −0.332] [−0.650, −0.309]
SERV_AGUA 0.134+ 0.111 0.102 0.090
[−0.001, 0.269] [−0.030, 0.252] [−0.043, 0.247] [−0.055, 0.235]
EDUC_PBL −0.048 −0.075 −0.098
[−0.137, 0.040] [−0.205, 0.054] [−0.231, 0.034]
CP_MED 0.196 0.179
[−0.492, 0.884] [−0.506, 0.864]
IND_CORR −0.220
[−0.487, 0.048]
Num.Obs. 196 196 196 196
R2 0.227 0.231 0.233 0.243
R2 Adj. 0.219 0.219 0.217 0.223
AIC 1655.9 1656.7 1658.4 1657.7
BIC 1669.0 1673.1 1678.1 1680.7
Log.Lik. −823.943 −823.354 −823.192 −821.853
F 28.288 19.259 14.472 12.198
RMSE 16.20 16.15 16.14 16.03
+ p
library(ggplot2)
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
plot_models(reg1,reg2,reg3, reg4, vline.color = "black",m.labels=c("Modelo 1","Modelo 2","Modelo 3", "Modelo 4"),dot.size = 1,line.size = 0.6)

modelo5_c <- lm(PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED + IND_CORR + POR_POB_ELEC + MCR, data = data)
summary(modelo5_c)
## 
## Call:
## lm(formula = PORC_GAN ~ SERV_GAS + SERV_AGUA + EDUC_PBL + CP_MED + 
##     IND_CORR + POR_POB_ELEC + MCR, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.810  -6.446  -0.136   6.730  31.594 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            69.36970   10.34884   6.703 2.39e-10 ***
## SERV_GAS               -0.62261    0.05878 -10.592  < 2e-16 ***
## SERV_AGUA               0.23087    0.05287   4.366 2.10e-05 ***
## EDUC_PBL               -0.07761    0.04487  -1.730   0.0854 .  
## CP_MED                 -0.02291    0.23678  -0.097   0.9230    
## IND_CORR                0.07253    0.09021   0.804   0.4224    
## POR_POB_ELEC            0.02800    0.07932   0.353   0.7245    
## MCRMACROREGION CENTRO  16.01045    3.61759   4.426 1.64e-05 ***
## MCRMACROREGION NORTE   -1.09847    3.69987  -0.297   0.7669    
## MCRMACROREGION ORIENTE  4.03745    3.94642   1.023   0.3076    
## MCRMACROREGION SUR     32.22452    3.68252   8.751 1.30e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.54 on 185 degrees of freedom
## Multiple R-squared:  0.6907, Adjusted R-squared:  0.674 
## F-statistic: 41.31 on 10 and 185 DF,  p-value: < 2.2e-16
reg5=lm(modelo5_c,data=data)
summary(reg5)
## 
## Call:
## lm(formula = modelo5_c, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.810  -6.446  -0.136   6.730  31.594 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            69.36970   10.34884   6.703 2.39e-10 ***
## SERV_GAS               -0.62261    0.05878 -10.592  < 2e-16 ***
## SERV_AGUA               0.23087    0.05287   4.366 2.10e-05 ***
## EDUC_PBL               -0.07761    0.04487  -1.730   0.0854 .  
## CP_MED                 -0.02291    0.23678  -0.097   0.9230    
## IND_CORR                0.07253    0.09021   0.804   0.4224    
## POR_POB_ELEC            0.02800    0.07932   0.353   0.7245    
## MCRMACROREGION CENTRO  16.01045    3.61759   4.426 1.64e-05 ***
## MCRMACROREGION NORTE   -1.09847    3.69987  -0.297   0.7669    
## MCRMACROREGION ORIENTE  4.03745    3.94642   1.023   0.3076    
## MCRMACROREGION SUR     32.22452    3.68252   8.751 1.30e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.54 on 185 degrees of freedom
## Multiple R-squared:  0.6907, Adjusted R-squared:  0.674 
## F-statistic: 41.31 on 10 and 185 DF,  p-value: < 2.2e-16
model5_c=list('apropiacion (V)'=reg5)
modelsummary(model5_c, title = "Regresion: modelo 5 Control",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 5 Control
 apropiacion (V)
(Intercept) 69.370***
(10.349)
SERV_GAS −0.623***
(0.059)
SERV_AGUA 0.231***
(0.053)
EDUC_PBL −0.078+
(0.045)
CP_MED −0.023
(0.237)
IND_CORR 0.073
(0.090)
POR_POB_ELEC 0.028
(0.079)
MCRMACROREGION CENTRO 16.010***
(3.618)
MCRMACROREGION NORTE −1.098
(3.700)
MCRMACROREGION ORIENTE 4.037
(3.946)
MCRMACROREGION SUR 32.225***
(3.683)
Num.Obs. 196
R2 0.691
R2 Adj. 0.674
AIC 1492.3
BIC 1531.6
Log.Lik. −734.144
F 41.309
RMSE 10.24
+ p
models=list('apropiacion (I)'=reg1,
            'apropiacion (II)'=reg2,
            'apropiacion (III)'=reg3,
            'apropiacion (IV)' = reg4,
            'apropiacion (V)' = reg5)
modelsummary(models, title = "Resultados de todos los modelos",
             stars = TRUE,
             output = "kableExtra")
Resultados de todos los modelos
 apropiacion (I)  apropiacion (II)  apropiacion (III)  apropiacion (IV)  apropiacion (V)
(Intercept) 78.196*** 85.960*** 87.583*** 98.436*** 69.370***
(4.605) (8.562) (9.051) (11.238) (10.349)
SERV_GAS −0.423*** −0.483*** −0.501*** −0.480*** −0.623***
(0.057) (0.080) (0.086) (0.087) (0.059)
SERV_AGUA 0.134+ 0.111 0.102 0.090 0.231***
(0.068) (0.072) (0.073) (0.074) (0.053)
EDUC_PBL −0.048 −0.075 −0.098 −0.078+
(0.045) (0.066) (0.067) (0.045)
CP_MED 0.196 0.179 −0.023
(0.349) (0.347) (0.237)
IND_CORR −0.220 0.073
(0.136) (0.090)
POR_POB_ELEC 0.028
(0.079)
MCRMACROREGION CENTRO 16.010***
(3.618)
MCRMACROREGION NORTE −1.098
(3.700)
MCRMACROREGION ORIENTE 4.037
(3.946)
MCRMACROREGION SUR 32.225***
(3.683)
Num.Obs. 196 196 196 196 196
R2 0.227 0.231 0.233 0.243 0.691
R2 Adj. 0.219 0.219 0.217 0.223 0.674
AIC 1655.9 1656.7 1658.4 1657.7 1492.3
BIC 1669.0 1673.1 1678.1 1680.7 1531.6
Log.Lik. −823.943 −823.354 −823.192 −821.853 −734.144
F 28.288 19.259 14.472 12.198 41.309
RMSE 16.20 16.15 16.14 16.03 10.24
+ p
models=list('apropiacion (I)'=reg1,
            'apropiacion (II)'=reg2,
            'apropiacion (III)'=reg3,
            'apropiacion (IV)'=reg4,
            'apropiacion (V)'=reg5)
modelsummary(models, title = "Resultados de todos los modelos",statistic = "conf.int",
             stars = TRUE,
             output = "kableExtra")
Resultados de todos los modelos
 apropiacion (I)  apropiacion (II)  apropiacion (III)  apropiacion (IV)  apropiacion (V)
(Intercept) 78.196*** 85.960*** 87.583*** 98.436*** 69.370***
[69.113, 87.279] [69.072, 102.848] [69.731, 105.435] [76.269, 120.603] [48.953, 89.787]
SERV_GAS −0.423*** −0.483*** −0.501*** −0.480*** −0.623***
[−0.535, −0.311] [−0.640, −0.326] [−0.671, −0.332] [−0.650, −0.309] [−0.739, −0.507]
SERV_AGUA 0.134+ 0.111 0.102 0.090 0.231***
[−0.001, 0.269] [−0.030, 0.252] [−0.043, 0.247] [−0.055, 0.235] [0.127, 0.335]
EDUC_PBL −0.048 −0.075 −0.098 −0.078+
[−0.137, 0.040] [−0.205, 0.054] [−0.231, 0.034] [−0.166, 0.011]
CP_MED 0.196 0.179 −0.023
[−0.492, 0.884] [−0.506, 0.864] [−0.490, 0.444]
IND_CORR −0.220 0.073
[−0.487, 0.048] [−0.105, 0.250]
POR_POB_ELEC 0.028
[−0.128, 0.184]
MCRMACROREGION CENTRO 16.010***
[8.873, 23.147]
MCRMACROREGION NORTE −1.098
[−8.398, 6.201]
MCRMACROREGION ORIENTE 4.037
[−3.748, 11.823]
MCRMACROREGION SUR 32.225***
[24.959, 39.490]
Num.Obs. 196 196 196 196 196
R2 0.227 0.231 0.233 0.243 0.691
R2 Adj. 0.219 0.219 0.217 0.223 0.674
AIC 1655.9 1656.7 1658.4 1657.7 1492.3
BIC 1669.0 1673.1 1678.1 1680.7 1531.6
Log.Lik. −823.943 −823.354 −823.192 −821.853 −734.144
F 28.288 19.259 14.472 12.198 41.309
RMSE 16.20 16.15 16.14 16.03 10.24
+ p
library(ggplot2)
library(sjPlot)


plot_models(reg1,reg2,reg3, reg4, reg5, vline.color = "black",m.labels=c("Modelo 1","Modelo 2","Modelo 3", "Modelo 4", "Modelo 5 Control"),dot.size = 1,line.size = 0.6)

library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(knitr)
tanova=anova(reg1,reg2,reg3, reg4, reg5)

kable(tanova,
      caption = "Comparación de modelos")%>%kableExtra::kable_styling(full_width = FALSE)
Comparación de modelos
Res.Df RSS Df Sum of Sq F Pr(>F)
193 51421.63 NA NA NA NA
192 51113.74 1 307.88484 2.7692677 0.0977830
191 51029.36 1 84.38012 0.7589563 0.3847844
190 50336.86 1 692.50283 6.2287112 0.0134445
185 20568.14 5 29768.71750 53.5508988 0.0000000

#Analisis factorial

names(data)
##  [1] "PROVINCIA"    "PORC_GAN"     "POR_POB_ELEC" "MCR"          "SERV_GAS"    
##  [6] "SERV_AGUA"    "EDUC_PBL"     "CP_MED"       "POBR_MON"     "IND_CORR"
# Cargar librerías
library(magrittr)
library(dplyr)


# Definir columnas a excluir (puedes personalizar según tu archivo)
dontselect <- c("PROVINCIA", "MCR", "POBR_MON")  # Excluye columnas categóricas o irrelevantes

# Seleccionar columnas numéricas
select <- setdiff(names(data), dontselect)
theData <- data[, select]

# Limpiar las columnas numéricas (si hay comas en lugar de puntos)
theData <- theData %>%
  mutate(across(everything(), ~ as.numeric(gsub(",", ".", .))))

# Mostrar las primeras filas como tabla
library(rmarkdown)
head(theData, 10) %>%
  rmarkdown::paged_table()
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)
##              PORC_GAN POR_POB_ELEC SERV_GAS SERV_AGUA EDUC_PBL CP_MED IND_CORR
## PORC_GAN         1.00         0.17    -0.46     -0.06     0.28   0.13    -0.29
## POR_POB_ELEC     0.17         1.00    -0.27     -0.10     0.36   0.21    -0.24
## SERV_GAS        -0.46        -0.27     1.00      0.38    -0.75  -0.35     0.50
## SERV_AGUA       -0.06        -0.10     0.38      1.00    -0.47  -0.19     0.18
## EDUC_PBL         0.28         0.36    -0.75     -0.47     1.00   0.71    -0.55
## CP_MED           0.13         0.21    -0.35     -0.19     0.71   1.00    -0.39
## IND_CORR        -0.29        -0.24     0.50      0.18    -0.55  -0.39     1.00
library(ggcorrplot)

ggcorrplot(corMatrix)

library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
## 
##     polyserial
## The following object is masked from 'package:modelsummary':
## 
##     SD
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.67
## MSA for each item = 
##     PORC_GAN POR_POB_ELEC     SERV_GAS    SERV_AGUA     EDUC_PBL       CP_MED 
##         0.70         0.81         0.66         0.70         0.61         0.56 
##     IND_CORR 
##         0.92
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

##SI BIEN ES CIERTO AQUI ME SALE 2 FACTORES, cada vez que lo corro en otro archivo, salen 3 por lo que no supe muy bien como cambiarlo en mi dashboard

library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
print(resfa$loadings)
## 
## Loadings:
##              MR1    MR2   
## PORC_GAN            -0.536
## POR_POB_ELEC  0.272 -0.228
## SERV_GAS     -0.452  0.793
## SERV_AGUA    -0.353  0.199
## EDUC_PBL      0.939 -0.391
## CP_MED        0.694       
## IND_CORR     -0.439  0.400
## 
##                  MR1   MR2
## SS loadings    1.966 1.329
## Proportion Var 0.281 0.190
## Cumulative Var 0.281 0.471
print(resfa$loadings,cutoff = 0.5)
## 
## Loadings:
##              MR1    MR2   
## PORC_GAN            -0.536
## POR_POB_ELEC              
## SERV_GAS             0.793
## SERV_AGUA                 
## EDUC_PBL      0.939       
## CP_MED        0.694       
## IND_CORR                  
## 
##                  MR1   MR2
## SS loadings    1.966 1.329
## Proportion Var 0.281 0.190
## Cumulative Var 0.281 0.471
fa.diagram(resfa,main = "Resultados")

sort(resfa$communality)
## POR_POB_ELEC    SERV_AGUA     PORC_GAN     IND_CORR       CP_MED     SERV_GAS 
##    0.1258413    0.1638421    0.2952804    0.3527498    0.4893836    0.8339456 
##     EDUC_PBL 
##    1.0345268
sort(resfa$complexity)
##       CP_MED     PORC_GAN     EDUC_PBL    SERV_AGUA     SERV_GAS POR_POB_ELEC 
##     1.032948     1.052794     1.337492     1.575692     1.588159     1.939394 
##     IND_CORR 
##     1.983142

#TUCKER LEWIS

resfa$TLI
## [1] 0.8967672

no es mayor

#RMS CERCA A CERO

resfa$rms
## [1] 0.03902024

SI

#RMSEA cerca a cero

resfa$RMSEA
##      RMSEA      lower      upper confidence 
## 0.11343509 0.07008286 0.16082036 0.90000000

#BIC

resfa$BIC
## [1] -14.00762

#indices

as.data.frame(resfa$scores)%>%head()
##          MR1        MR2
## 1 -0.2368571  0.9245780
## 2  0.2398385 -0.1773225
## 3 -0.2017179  0.2070784
## 4  0.2661158 -1.2595331
## 5  0.4263647  0.0973378
## 6 -0.3153642 -0.4514099
library(psych)
library(ggplot2)

# Seleccionar las variables relevantes
variables <- data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")]

# Escalar las variables
variables_scaled <- scale(variables)

# Realizar el análisis factorial exploratorio (EFA) con 2 factores
resfa <- fa(variables_scaled, nfactors = 2, rotate = "varimax", fm = "ml")

# Asignar puntajes factoriales al dataset original
data$Factor1 <- resfa$scores[,1]  # Puntajes del primer factor
data$Factor2 <- resfa$scores[,2]  # Puntajes del segundo factor

# Generar el gráfico comparando una variable original con el primer puntaje factorial
ggplot(data = data, aes(x = SERV_GAS, y = Factor1)) +
  geom_point() +
  theme_minimal() +
  labs(
    x = "Servicio de Gas (original)",
    y = "Puntaje Factorial 1 "
  )

# normalizando
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## The following object is masked from 'package:base':
## 
##     isFALSE

#Analisis de conglomerados

# Seleccionar las variables relevantes
variables <- data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON" )]

# Estandarizar las variables (si las escalas son diferentes)
variables_scaled <- scale(variables)

# Visualizar la distribución de las variables con boxplots
boxplot(variables_scaled, horizontal = FALSE, las = 2, cex.axis = 0.7)

library(BBmisc)
# Cargar librería
library(scales)
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
# Seleccionar y normalizar variables relevantes
variables <- data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")]

# Normalizar variables al rango 0-10
variables_normalized <- as.data.frame(lapply(variables, rescale, to = c(0, 10)))

# Visualizar la distribución con boxplot
boxplot(variables_normalized, horizontal = FALSE, las = 2, cex.axis = 0.7,
        main = "Distribución de Variables Normalizadas (Rango 0-10)")

# Normalizar o estandarizar las variables (opcional, dependiendo de los rangos)
variables_scaled <- scale(variables)

# Calcular la matriz de correlación
cor_matrix <- cor(variables_scaled, use = "pairwise.complete.obs")

# Mostrar la matriz de correlación
print(cor_matrix)
##            SERV_GAS SERV_AGUA  IND_CORR  POBR_MON
## SERV_GAS  1.0000000 0.3827218 0.4957952 0.8070535
## SERV_AGUA 0.3827218 1.0000000 0.1827262 0.3168633
## IND_CORR  0.4957952 0.1827262 1.0000000 0.3739093
## POBR_MON  0.8070535 0.3168633 0.3739093 1.0000000
# Seleccionar variables relevantes
dataClus <- data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")]

# Escalar las variables
dataClus <- scale(dataClus)

# Establecer nombres de fila (opcional, si tienes identificadores únicos como provincias)
row.names(dataClus) <- data$PROVINCIA
# Calcular matriz de distancias
dist_matrix <- dist(dataClus, method = "euclidean")
# Librerías necesarias
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Escalar las variables seleccionadas
variables <- scale(data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")])

# Calcular el número óptimo de clusters con Gap Statistic
fviz_nbclust(
  variables,        # Variables escaladas
  FUN = hcut,       # Función de clusterización jerárquica
  method = "gap_stat",  # Método de evaluación
  k.max = 10,       # Máximo número de clusters evaluados
  verbose = FALSE   # Desactiva mensajes en consola
)

library(factoextra)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Escalar las variables seleccionadas
dataClus <- scale(data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")])

# Realizar la clusterización jerárquica aglomerativa (AGNES) con linkage ward.D
set.seed(123)
res.agnes <- hcut(
  dataClus,                # Datos escalados
  k = 3,                   # Número de clusters deseados
  hc_func = "agnes",       # Método aglomerativo
  hc_method = "ward.D"     # Linkage ward
)

# Agregar los clusters al dataset original
data$agnes_cluster <- res.agnes$cluster

# Mostrar las primeras filas con los resultados
head(data, 15) %>% 
  kbl() %>% 
  kable_styling(full_width = FALSE)
PROVINCIA PORC_GAN POR_POB_ELEC MCR SERV_GAS SERV_AGUA EDUC_PBL CP_MED POBR_MON IND_CORR Factor1 Factor2 agnes_cluster
Chachapoyas 59.94 64.10 MACROREGION ORIENTE 65.28 87.62 46.84 13.77 4 62.7 0.4330506 0.7543148 1
Bagua 91.14 73.39 MACROREGION ORIENTE 45.91 70.80 78.07 10.98 3 36.3 0.0208357 -0.1343857 2
Bongará 61.65 77.93 MACROREGION ORIENTE 56.13 83.00 61.50 13.79 3 27.7 0.4429481 0.0579086 2
Condorcanqui 90.65 74.54 MACROREGION ORIENTE 15.36 22.38 113.35 15.00 1 30.2 -1.2329723 -0.7684571 3
Luya 61.66 73.69 MACROREGION ORIENTE 45.28 80.50 85.52 16.10 2 33.3 -0.1435424 0.0129080 2
Rodríguez de Mendoza 59.20 68.08 MACROREGION ORIENTE 38.98 73.38 69.54 12.48 3 21.8 0.0282620 -0.6428235 2
Utcubamba 65.53 72.27 MACROREGION ORIENTE 48.57 77.71 62.87 9.72 3 57.6 -0.1835367 0.3241129 1
Aija 62.20 93.35 MACROREGION NORTE 22.76 76.92 149.23 13.99 3 33.5 -0.5790006 -1.0234500 2
Antonio Raymondi 86.52 78.80 MACROREGION NORTE 18.93 91.15 98.92 8.60 2 41.2 -0.9712663 -0.8101144 2
Asunción 77.57 77.92 MACROREGION NORTE 31.48 90.62 97.28 3.89 3 21.4 -0.1488779 -0.9492707 2
Bolognesi 65.82 78.67 MACROREGION NORTE 31.27 79.33 68.30 10.00 4 40.3 -0.2267558 -0.8433548 1
Carhuaz 68.62 77.41 MACROREGION NORTE 25.27 86.42 40.39 5.20 3 41.0 -0.5957927 -0.8183556 2
Carlos F. Fitzcarrald 79.20 76.07 MACROREGION NORTE 15.34 85.79 82.72 7.03 2 41.6 -1.0855929 -0.9219609 2
Casma 47.15 65.25 MACROREGION NORTE 61.48 64.82 23.40 2.45 4 52.5 0.4294802 0.4792701 1
Corongo 59.82 70.92 MACROREGION NORTE 23.18 79.60 104.78 12.47 2 30.0 -0.7182899 -0.8348002 2
fviz_silhouette(res.agnes,print.summary = F)

# Escalar las variables seleccionadas
dataClus <- scale(data[, c("SERV_GAS", "SERV_AGUA", "IND_CORR", "POBR_MON")])

# Realizar la clusterización jerárquica aglomerativa (AGNES)
set.seed(123)
res.agnes <- hcut(
  dataClus,                # Datos escalados
  k = 3,                   # Número de clusters deseados
  hc_func = "agnes",       # Método aglomerativo
  hc_method = "ward.D"     # Linkage ward
)

# Visualizar el dendrograma en formato horizontal
fviz_dend(
  res.agnes,               # Objeto del resultado de hcut
  cex = 0.7,               # Tamaño del texto en el dendrograma
  horiz = TRUE,            # Dendrograma horizontal
  main = "Dendrograma de Clusterización Jerárquica (AGNES)"
)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Crear un data frame con la información de las siluetas
silAGNES <- data.frame(res.agnes$silinfo$widths)

# Agregar las etiquetas de las filas (observaciones, por ejemplo, provincias)
silAGNES$observation <- row.names(silAGNES)

# Identificar las observaciones con un ancho de silueta negativo
poorAGNES <- silAGNES[silAGNES$sil_width < 0, 'observation'] %>% sort()

# Mostrar las observaciones con valores de silueta negativos
poorAGNES
##  [1] "11"  "137" "153" "156" "157" "170" "21"  "24"  "46"  "48"  "61"  "71" 
## [13] "79"  "89"

#ANALISIS UNIVARIADO

table(data$MCR)
## 
##       LIMA Y CALLAO  MACROREGION CENTRO   MACROREGION NORTE MACROREGION ORIENTE 
##                  11                  53                  59                  29 
##     MACROREGION SUR 
##                  44
median(data$PORC_GAN,na.rm=T)
## [1] 68.025
median(data$POR_POB_ELEC,na.rm=T)
## [1] 79.135
median(data$SERV_GAS,na.rm=T)
## [1] 45.08
median(data$SERV_AGUA,na.rm=T)
## [1] 74.17
median(data$EDUC_PBL,na.rm=T)
## [1] 68.005
median(data$CP_MED,na.rm=T)
## [1] 7.05
median(data$IND_CORR,na.rm=T)
## [1] 41.15
mean(data$PORC_GAN,na.rm=T)
## [1] 67.4046
data %>%                      ## Paso 1: DATA 
  summarize(Promedio=mean(PORC_GAN),Mediana=median(PORC_GAN))  ## Paso 2: Resumir 
##   Promedio Mediana
## 1  67.4046  68.025
data %>%                      ## Paso 1: DATA 
  group_by(MCR) %>%    ## Paso 2: group_by
  summarize(Promedio=mean(PORC_GAN),Mediana=median(PORC_GAN)) ## Paso 2:summarize
## # A tibble: 5 × 3
##   MCR                 Promedio Mediana
##   <fct>                  <dbl>   <dbl>
## 1 LIMA Y CALLAO           46.5    45.3
## 2 MACROREGION CENTRO      73.1    79.9
## 3 MACROREGION NORTE       58.7    59.0
## 4 MACROREGION ORIENTE     57.3    59.2
## 5 MACROREGION SUR         84.0    87.1

HISTOGRAMA

library(ggplot2)
ggplot(data, aes(x=PORC_GAN)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = data, aes(x = MCR)) +
  geom_bar(fill = 'red')

sd(data$PORC_GAN)
## [1] 18.46623
var(data$PORC_GAN)
## [1] 341.0015
max(data$PORC_GAN)
## [1] 99.03
min(data$PORC_GAN)
## [1] 25.93
range(data$PORC_GAN)
## [1] 25.93 99.03
quantile(data$PORC_GAN)
##      0%     25%     50%     75%    100% 
## 25.9300 52.9650 68.0250 84.0975 99.0300
IQR(data$PORC_GAN)
## [1] 31.1325
ggplot(data, aes(y = PORC_GAN )) + 
  stat_boxplot(geom = "errorbar", # Error bars
               width = 0.25) +    # Bars width
  geom_boxplot()

library(ggplot2)
ggplot(data, aes(x=MCR,y = PORC_GAN)) + 
  geom_boxplot()

library(ggplot2)
ggplot(data, aes(x=MCR,y = PORC_GAN,fill=MCR)) + 
  geom_boxplot()

ggplot(data.frame(data), aes(x = PORC_GAN)) +
       geom_histogram(aes(y = ..density..),
                      color = "gray", fill = "white") +
       geom_density(fill = "black", alpha = 0.2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = PORC_GAN, fill = MCR , colour = MCR)) + 
  geom_histogram(alpha = 0.5, position = "identity") + 
  theme(legend.position = "left") # Izquierda
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(dplyr)
data %>%
  ggplot(aes(x = PORC_GAN, group = MCR)) +
  geom_histogram() +
  facet_wrap(~ MCR) +
  labs(x = "Macroregiones", y = "porcentaje en que gano PEDRO CASTILLO")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.