REPASO DE R - estadística descriptiva Diego Lara
install.packages("devtools")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("devtools")
## Loading required package: usethis
install_github("hofnerb/papeR")
## Downloading GitHub repo hofnerb/papeR@master
##
##
checking for file ‘/tmp/RtmpegAQ1q/remotes108365fefa2f/hofnerb-papeR-ec5fd87/DESCRIPTION’ ...
✓ checking for file ‘/tmp/RtmpegAQ1q/remotes108365fefa2f/hofnerb-papeR-ec5fd87/DESCRIPTION’
##
─ preparing ‘papeR’:
##
checking DESCRIPTION meta-information ...
✓ checking DESCRIPTION meta-information
##
─ checking for LF line-endings in source and make files and shell scripts
##
─ checking for empty or unneeded directories
## Removed empty directory ‘papeR/tests/Examples’
##
─ building ‘papeR_1.0-4.tar.gz’
##
##
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("papeR")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(papeR)
## Loading required package: car
## Loading required package: carData
## Loading required package: xtable
## Registered S3 method overwritten by 'papeR':
## method from
## Anova.lme car
##
## Attaching package: 'papeR'
## The following object is masked from 'package:utils':
##
## toLatex
link="https://docs.google.com/spreadsheets/d/e/2PACX-1vRq_z7dcVQae82F92aErK8S6f3Tv0E0hI_8tiNFRqRy3Lp4gyj0Tf8HCcBSWeboQgCquoB4D56DhOdx/pub?output=csv"
midata=read.csv(link, stringsAsFactors = F)
summary(midata)
## poblacioncienmil nbi consejocomunal priorizado
## Min. : 0.00158 Min. : 5.36 Min. :0.00000 Min. :0.0000
## 1st Qu.: 0.07422 1st Qu.:28.35 1st Qu.:0.00000 1st Qu.:0.0000
## Median : 0.13998 Median :41.30 Median :0.00000 Median :0.0000
## Mean : 0.40470 Mean :42.96 Mean :0.05474 Mean :0.2518
## 3rd Qu.: 0.26255 3rd Qu.:55.48 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :69.26836 Max. :98.81 Max. :1.00000 Max. :1.0000
## NA's :30
## uribista ejecucion apropiaciondolar pctopo
## Min. :0.0000 Min. :0.00000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.: 0.000 1st Qu.: 5.922
## Median :1.0000 Median :0.00000 Median : 0.000 Median :20.308
## Mean :0.6278 Mean :0.03741 Mean : 8.276 Mean :27.874
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.: 9.385 3rd Qu.:45.711
## Max. :1.0000 Max. :1.00000 Max. :132.643 Max. :99.419
## NA's :204 NA's :7
VARIABLES NUMÉRICAS
str(midata$apropiaciondolar)
## num [1:1096] 102.17 4.19 1.59 0 0 ...
summary(midata$apropiaciondolar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 8.276 9.385 132.643
sd(midata$apropiaciondolar)
## [1] 16.04451
str(midata$pctopo)
## num [1:1096] 14.82 14.51 15.08 6.15 47.31 ...
summary(midata$pctopo)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.922 20.308 27.874 45.711 99.419 7
sd(midata$pctopo, na.rm=TRUE)
## [1] 25.58515
str(midata$poblacioncienmil)
## num [1:1096] 20.9155 0.2406 0.0398 0.0558 0.2723 ...
summary(midata$poblacioncienmil)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00158 0.07422 0.13999 0.40470 0.26256 69.26836
sd(midata$poblacioncienmil)
## [1] 2.393711
str(midata$nbi)
## num [1:1096] 12.2 33.8 28.5 33.1 27.1 ...
summary(midata$nbi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5.36 28.35 41.30 42.96 55.48 98.81 30
sd(midata$nbi, na.rm=TRUE)
## [1] 18.69776
VARIABLES CATEGÓRICAS
str(midata$uribista)
## num [1:1096] 0 1 1 1 1 1 1 1 1 NA ...
midata$uribista = factor(midata$uribista, labels = c("no","si"))
table(midata$uribista)
##
## no si
## 332 560
prop.table(table(midata$uribista))*100
##
## no si
## 37.21973 62.78027
str(midata$priorizado)
## num [1:1096] 0 1 0 0 0 0 0 0 1 0 ...
midata$priorizado = factor(midata$priorizado, labels = c("no","si"))
table(midata$priorizado)
##
## no si
## 820 276
prop.table(table(midata$priorizado))*100
##
## no si
## 74.81752 25.18248
str(midata$consejocomunal)
## num [1:1096] 0 0 0 0 0 0 0 0 0 0 ...
midata$consejocomunal = factor(midata$consejocomunal, labels = c("no","si"))
table(midata$consejocomunal)
##
## no si
## 1036 60
prop.table(table(midata$consejocomunal))*100
##
## no si
## 94.525547 5.474453
str(midata$ejecucion)
## num [1:1096] 0 0 0 0 0 0 0 0 0 0 ...
midata$ejecucion = factor(midata$ejecucion, labels = c("no","si"))
table(midata$ejecucion)
##
## no si
## 1055 41
prop.table(table(midata$ejecucion))*100
##
## no si
## 96.259124 3.740876
CUADRO ESTADISTICO DESCRIPTIVO DE VARIABLES NUMERICAS
summarize(midata, type = "numeric")
## Factors are dropped from the summary
## N Missing Mean SD Min Q1 Median Q3 Max
## 1 poblacioncienmil 1096 0 0.40 2.39 0.00 0.07 0.14 0.26 69.27
## 2 nbi 1066 30 42.96 18.70 5.36 28.34 41.30 55.49 98.81
## 3 apropiaciondolar 1096 0 8.28 16.04 0.00 0.00 0.00 9.40 132.64
## 4 pctopo 1089 7 27.87 25.59 0.00 5.92 20.31 45.71 99.42
CUADRO ESTADISTICO DESCRIPTIVO DE VARIABLES CATEGORICAS
summarize(midata, type = "factor")
## Non-factors are dropped from the summary
## Level N %
## 1 consejocomunal no 1036 94.5
## 2 si 60 5.5
## 3 priorizado no 820 74.8
## 4 si 276 25.2
## 5 uribista no 332 30.3
## 6 si 560 51.1
## 7 <Missing> 204 18.6
## 8 ejecucion no 1055 96.3
## 9 si 41 3.7