link="https://docs.google.com/spreadsheets/d/e/2PACX-1vTLwlJ0dQJCuiidZvNH121tKRKCkpecEpdPolQO60oQ0SuAO66bVSZwTLtFJgXYeUiTSQJ5ripcwEv9/pub?gid=0&single=true&output=csv"
paviData=read.csv(link, stringsAsFactors = F)
Revisando estructura
str(paviData)
## 'data.frame': 1096 obs. of 8 variables:
## $ apropiaciondolar : num 102.2 62.8 0 0 0 ...
## $ priorizado : chr "no" "no" "si" "no" ...
## $ pctopo : num 14.8 33.6 55.4 63.4 74.5 ...
## $ uribista : chr "no" "no" "no" "no" ...
## $ con_consejocomunal: chr "no" "no" "no" "no" ...
## $ ejecucion : chr "no" "no" "no" "no" ...
## $ poblacioncienmil : num 20.92 0.39 0.11 0.17 0.08 ...
## $ nbi : num 12.2 21.6 31.2 45.3 63.9 ...
Conversion a categoricas
paviData[,c(2,4:6)]=lapply(paviData[,c(2,4:6)],as.factor)
Eliminar valores perdidos
paviData=paviData[complete.cases(paviData),]
summary(paviData)
## apropiaciondolar priorizado pctopo uribista con_consejocomunal
## Min. : 0.000 no:790 Min. : 0.00 :184 no:1001
## 1st Qu.: 0.000 si:270 1st Qu.: 6.24 no:325 si: 59
## Median : 0.000 Median :20.39 si:551
## Mean : 8.448 Mean :27.97
## 3rd Qu.: 10.560 3rd Qu.:45.70
## Max. :132.640 Max. :99.42
## ejecucion poblacioncienmil nbi
## no:1021 Min. : 0.0000 Min. : 5.36
## si: 39 1st Qu.: 0.0700 1st Qu.:28.30
## Median : 0.1400 Median :41.27
## Mean : 0.4143 Mean :42.94
## 3rd Qu.: 0.2700 3rd Qu.:55.49
## Max. :69.2700 Max. :98.81
Convertir
levels(paviData$uribista)
## [1] "" "no" "si"
levels(paviData$uribista)[levels(paviData$uribista)==''] = NA
shapiro.test(paviData$apropiaciondolar)
##
## Shapiro-Wilk normality test
##
## data: paviData$apropiaciondolar
## W = 0.595, p-value < 2.2e-16
library(DescTools)
Skew(paviData$apropiaciondolar)
## [1] 2.708773
Histograma
library(ggplot2)
base=ggplot(data=paviData, aes(x=apropiaciondolar))
base+geom_histogram(bins=20)
Diagrama de cajas
base=ggplot(data=paviData, aes(y=apropiaciondolar))
base+geom_boxplot() + coord_flip()
Ha=formula(apropiaciondolar ~ priorizado)
aggregate(Ha, data=paviData,median)
## priorizado apropiaciondolar
## 1 no 0.000
## 2 si 0.655
Grafico
base=ggplot(data=paviData, aes(x=priorizado,y=apropiaciondolar))
base+geom_boxplot() + coord_flip()
wilcox.test(Ha,data=paviData)
##
## Wilcoxon rank sum test with continuity correction
##
## data: apropiaciondolar by priorizado
## W = 96581, p-value = 0.01152
## alternative hypothesis: true location shift is not equal to 0
Hb1=formula(~ apropiaciondolar + pctopo)
cor.test(Hb1,data=paviData,method = "spearm",exact=F)
##
## Spearman's rank correlation rho
##
## data: apropiaciondolar and pctopo
## S = 205100315, p-value = 0.2796
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.03323799
Grafico
library(ggpubr)
ggscatter(paviData,
x = "pctopo", y = "apropiaciondolar",
cor.coef = TRUE,
cor.method = "spearman",
add = "reg.line",
add.params = list(color = "blue", fill = "lightgray"),
conf.int = TRUE)
## `geom_smooth()` using formula 'y ~ x'
Hb2=formula(apropiaciondolar ~ uribista)
aggregate(Hb2, data=paviData,median)
## uribista apropiaciondolar
## 1 no 0
## 2 si 0
Serían distribuciones iguales?
wilcox.test(Hb2,data=paviData)
##
## Wilcoxon rank sum test with continuity correction
##
## data: apropiaciondolar by uribista
## W = 95048, p-value = 0.09883
## alternative hypothesis: true location shift is not equal to 0
Grafico
base=ggplot(data=paviData, aes(x=uribista,y=apropiaciondolar))
base+geom_boxplot() + coord_flip()
### Hc1: “appropiacion” tiene relacion directa con ‘consejocomunal’
Hc1=formula(apropiaciondolar ~ con_consejocomunal)
aggregate(Hc1, data=paviData,median)
## con_consejocomunal apropiaciondolar
## 1 no 0.00
## 2 si 16.92
Parece que hay diferencias:
wilcox.test(Hc1,data=paviData)
##
## Wilcoxon rank sum test with continuity correction
##
## data: apropiaciondolar by con_consejocomunal
## W = 14004, p-value = 1.32e-13
## alternative hypothesis: true location shift is not equal to 0
Grafico
base=ggplot(data=paviData, aes(x=con_consejocomunal,y=apropiaciondolar))
base+geom_boxplot() + coord_flip()
Hc2=formula(apropiaciondolar ~ ejecucion)
aggregate(Hc2, data=paviData,median)
## ejecucion apropiaciondolar
## 1 no 0.00
## 2 si 0.99
Grafico
base=ggplot(data=paviData, aes(x=ejecucion,y=apropiaciondolar))
base+geom_boxplot() + coord_flip()
wilcox.test(Hc2,data=paviData)
##
## Wilcoxon rank sum test with continuity correction
##
## data: apropiaciondolar by ejecucion
## W = 17497, p-value = 0.1612
## alternative hypothesis: true location shift is not equal to 0
names(paviData)
## [1] "apropiaciondolar" "priorizado" "pctopo"
## [4] "uribista" "con_consejocomunal" "ejecucion"
## [7] "poblacioncienmil" "nbi"
paviModeloOrd=lm(apropiaciondolar~.,data=paviData)
summary(paviModeloOrd)
##
## Call:
## lm(formula = apropiaciondolar ~ ., data = paviData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.200 -8.208 -5.877 2.512 92.495
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.13107 1.67647 8.429 < 2e-16 ***
## priorizadosi -2.21440 1.18713 -1.865 0.06247 .
## pctopo -0.03096 0.02126 -1.456 0.14562
## uribistasi -2.57304 1.09185 -2.357 0.01867 *
## con_consejocomunalsi 14.05085 2.32445 6.045 2.22e-09 ***
## ejecucionsi 2.95645 2.80751 1.053 0.29261
## poblacioncienmil 1.83856 0.20023 9.182 < 2e-16 ***
## nbi -0.09290 0.02941 -3.159 0.00164 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 868 degrees of freedom
## (184 observations deleted due to missingness)
## Multiple R-squared: 0.1751, Adjusted R-squared: 0.1684
## F-statistic: 26.32 on 7 and 868 DF, p-value: < 2.2e-16