library(rio)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
dataparcial=import("dataPeru.xlsx")

Hipotesis: Buen estado de locales escolares = % población contribuyentes + % PEA laboral

names(dataparcial)
## [1] "DEPARTAMENTO"        "UBIGEO"              "buenEstado"         
## [4] "contribuyentesSunat" "peaOcupada"          "pobUrbana"          
## [7] "PobRural"            "pobTotal"
str(dataparcial$buenEstado)
##  num [1:25] 18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
str(dataparcial$contribuyentesSunat)
##  num [1:25] 75035 302906 103981 585628 151191 ...
str(dataparcial$peaOcupada)
##  num [1:25] 130019 387976 140341 645001 235857 ...
#Gaussiana debido a que la VD es númerica continua
h1=formula(buenEstado ~ contribuyentesSunat + peaOcupada)
rg1= lm(h1, data=dataparcial)
summary(rg1)
## 
## Call:
## lm(formula = h1, data = dataparcial)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.589  -3.966  -1.347   1.907  21.518 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.865e+01  2.694e+00   6.922 5.98e-07 ***
## contribuyentesSunat  1.786e-05  2.060e-05   0.867    0.395    
## peaOcupada          -1.596e-05  2.241e-05  -0.712    0.484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.925 on 22 degrees of freedom
## Multiple R-squared:  0.1561, Adjusted R-squared:  0.07939 
## F-statistic: 2.035 on 2 and 22 DF,  p-value: 0.1546

No muestra significancia por lo que se procede a escalar

regscal1= lm(scale(buenEstado)~ scale(contribuyentesSunat) + scale(peaOcupada), data = dataparcial)
summary(regscal1)
## 
## Call:
## lm(formula = scale(buenEstado) ~ scale(contribuyentesSunat) + 
##     scale(peaOcupada), data = dataparcial)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2821 -0.4802 -0.1631  0.2309  2.6052 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)
## (Intercept)                 3.313e-16  1.919e-01   0.000    1.000
## scale(contribuyentesSunat)  2.034e+00  2.346e+00   0.867    0.395
## scale(peaOcupada)          -1.670e+00  2.346e+00  -0.712    0.484
## 
## Residual standard error: 0.9595 on 22 degrees of freedom
## Multiple R-squared:  0.1561, Adjusted R-squared:  0.07939 
## F-statistic: 2.035 on 2 and 22 DF,  p-value: 0.1546

No hay mejora en el modelo y en la significancia de las variables

library(lm.beta)
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
rg1_esc=list("regresión escalada" = lm.beta(rg1))
modelsummary(rg1_esc, title = "Reg. escalada 1", stars = TRUE, output = "kableExtra")
Reg. escalada 1
regresión escalada
contribuyentesSunat 2.034
(0.000)
peaOcupada -1.670
(0.000)
Num.Obs. 25
R2 0.156
R2 Adj. 0.079
AIC 179.3
BIC 184.2
Log.Lik. -85.639
RMSE 7.43
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Se concluye que ninguna de las variables es significativa, la hipotesis se descarta.

Hipotesis 2: % PEA laboral = % población contribuyentes + Buen estado de locales escolares

h2=formula(peaOcupada ~ contribuyentesSunat + buenEstado)
rg2= lm(h2, data=dataparcial)
summary(rg2)
## 
## Call:
## lm(formula = h2, data = dataparcial)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -91867 -58573 -11166  46174 155851 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.155e+05  3.787e+04   3.049  0.00588 ** 
## contribuyentesSunat  9.206e-01  1.741e-02  52.872  < 2e-16 ***
## buenEstado          -1.412e+03  1.983e+03  -0.712  0.48395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 74540 on 22 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9926 
## F-statistic:  1603 on 2 and 22 DF,  p-value: < 2.2e-16

Se escala para corroborar

regscal2= lm(scale(peaOcupada)~ scale(contribuyentesSunat) + scale(buenEstado), data = dataparcial)
summary(regscal2)
## 
## Call:
## lm(formula = scale(peaOcupada) ~ scale(contribuyentesSunat) + 
##     scale(buenEstado), data = dataparcial)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10626 -0.06775 -0.01292  0.05341  0.18027 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 8.043e-17  1.724e-02   0.000    1.000    
## scale(contribuyentesSunat)  1.001e+00  1.894e-02  52.872   <2e-16 ***
## scale(buenEstado)          -1.349e-02  1.894e-02  -0.712    0.484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08622 on 22 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9926 
## F-statistic:  1603 on 2 and 22 DF,  p-value: < 2.2e-16

Se concluye que solo la variable de contribuyetes genera efecto en la VD.