rm(list = ls()) # limpiar el working environment

#carga de data
linkToData='https://docs.google.com/spreadsheets/d/e/2PACX-1vSRpCC8gKIxMxpK0wjgLcl-GQWdw6sAeB16Sixkq6kZXeTfMS8_n70twbEbQ2tMcGp8tm-6x8qf8ieo/pub?gid=234740779&single=true&output=csv'
pavi=read.csv(linkToData)
str(pavi)
## 'data.frame':    1096 obs. of  9 variables:
##  $ municipio       : chr  "m1" "m2" "m3" "m4" ...
##  $ apropiaciondolar: num  102.17 4.19 1.59 0 0 ...
##  $ consejocomunal  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ejecucion       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ uribista        : int  0 1 1 1 1 1 1 1 1 NA ...
##  $ pctopo          : num  14.82 14.51 15.08 6.15 47.31 ...
##  $ priorizado      : int  0 1 0 0 0 0 0 0 1 0 ...
##  $ poblacioncienmil: num  20.9155 0.2406 0.0398 0.0558 0.2723 ...
##  $ nbi             : num  12.2 33.8 28.5 33.1 27.1 ...
pavi=pavi[complete.cases(pavi),]
seleccion=c("consejocomunal","ejecucion","uribista","priorizado")
pavi[,seleccion]=lapply(pavi[,seleccion],as.factor)
paviStats=summary(pavi[,-1])
paviStats
##  apropiaciondolar  consejocomunal ejecucion uribista     pctopo      
##  Min.   :  0.000   0:827          0:844     0:325    Min.   : 0.000  
##  1st Qu.:  0.000   1: 49          1: 32     1:551    1st Qu.: 6.285  
##  Median :  0.000                                     Median :19.607  
##  Mean   :  8.926                                     Mean   :27.306  
##  3rd Qu.: 11.436                                     3rd Qu.:44.455  
##  Max.   :132.643                                     Max.   :99.419  
##  priorizado poblacioncienmil        nbi       
##  0:639      Min.   : 0.00629   Min.   : 6.84  
##  1:237      1st Qu.: 0.07460   1st Qu.:27.33  
##             Median : 0.14593   Median :40.26  
##             Mean   : 0.45639   Mean   :41.72  
##             3rd Qu.: 0.27433   3rd Qu.:53.78  
##             Max.   :69.26836   Max.   :97.32
library(ggcorrplot)
## Loading required package: ggplot2
colNums=names(pavi)[c(2,6,8,9)]
numXs=pavi[,colNums]
ggcorrplot(cor(numXs),lab = T,show.diag = F)

library(magrittr)
library(kableExtra)

colCats=setdiff(names(pavi), colNums)[-1]
diffPara=c()
diffNoPara=c()

for (col in colCats){
    diffPara=c(diffPara,t.test(pavi[,"apropiaciondolar"]~pavi[,col])['p.value']<=0.05)
    diffNoPara=c(diffNoPara,wilcox.test(pavi[,"apropiaciondolar"]~pavi[,col])['p.value']<=0.05)
}
data.frame(cbind(colCats,diffPara,diffNoPara),
           row.names = 1:length(colCats))%>%
           kable(caption = "Diferencia de 'VD:Apropiacion' por Grupo")%>%
            kableExtra::kable_styling(full_width = FALSE)
Diferencia de ‘VD:Apropiacion’ por Grupo
colCats diffPara diffNoPara
consejocomunal TRUE TRUE
ejecucion FALSE FALSE
uribista TRUE FALSE
priorizado FALSE TRUE
par(mfrow = c(2, 2))  

for (col in colCats) { 
  boxplot(pavi$apropiaciondolar~pavi[,col],
       main = col,xlab="",ylab = "")
}

# hipotesis en R
modelo1=formula(apropiaciondolar~pctopo+poblacioncienmil)
reg1=lm(modelo1,data=pavi)
summary(reg1)
## 
## Call:
## lm(formula = modelo1, data = pavi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.809  -8.671  -7.448   2.602  90.426 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8.80319    0.79976  11.007   <2e-16 ***
## pctopo           -0.03146    0.02150  -1.463    0.144    
## poblacioncienmil  2.15125    0.20073  10.717   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.85 on 873 degrees of freedom
## Multiple R-squared:  0.1181, Adjusted R-squared:  0.1161 
## F-statistic: 58.45 on 2 and 873 DF,  p-value: < 2.2e-16
# modelo 1 mas una nueva independiente
modelo2=formula(apropiaciondolar~pctopo+consejocomunal+poblacioncienmil)
reg2=lm(modelo2,data=pavi)
summary(reg2)
## 
## Call:
## lm(formula = modelo2, data = pavi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.378  -7.933  -6.834   1.573  91.247 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8.04119    0.79129  10.162  < 2e-16 ***
## pctopo           -0.02987    0.02103  -1.420    0.156    
## consejocomunal1  14.79601    2.32127   6.374 2.98e-10 ***
## poblacioncienmil  1.91236    0.19987   9.568  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.51 on 872 degrees of freedom
## Multiple R-squared:  0.1573, Adjusted R-squared:  0.1545 
## F-statistic: 54.28 on 3 and 872 DF,  p-value: < 2.2e-16
# modelo 2 mas 'uribista'
modelo3=formula(apropiaciondolar~pctopo+consejocomunal+uribista+poblacioncienmil)

reg3=lm(modelo3,data=pavi)
summary(reg3)
## 
## Call:
## lm(formula = modelo3, data = pavi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.004  -7.546  -6.482   1.847  91.958 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       9.80542    1.11705   8.778  < 2e-16 ***
## pctopo           -0.03753    0.02126  -1.765   0.0779 .  
## consejocomunal1  14.73795    2.31613   6.363 3.19e-10 ***
## uribista1        -2.45126    1.09801  -2.232   0.0258 *  
## poblacioncienmil  1.89052    0.19965   9.469  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.47 on 871 degrees of freedom
## Multiple R-squared:  0.1621, Adjusted R-squared:  0.1583 
## F-statistic: 42.14 on 4 and 871 DF,  p-value: < 2.2e-16
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
modelo3_st=formula(scale(apropiaciondolar)~scale(pctopo)+scale(as.numeric(consejocomunal))+scale(as.numeric(uribista))+scale(poblacioncienmil))

modelo3_st=lm(modelo3_st,data=pavi)

modelo3_st=list('apropiacion (III_st)'=modelo3_st)
modelsummary(modelo3_st, title = "Regresion: modelo 3 con \ncoeficientes estandarizados",
             stars = TRUE,
             output = "kableExtra")
Regresion: modelo 3 con coeficientes estandarizados
 apropiacion (III_st)
(Intercept) 0.000
(0.031)
scale(pctopo) -0.055+
(0.031)
scale(as.numeric(consejocomunal)) 0.201***
(0.032)
scale(as.numeric(uribista)) -0.070*
(0.031)
scale(poblacioncienmil) 0.299***
(0.032)
Num.Obs. 876
R2 0.162
R2 Adj. 0.158
AIC 2342.0
BIC 2370.7
Log.Lik. -1165.004
F 42.140
RMSE 0.91
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001