REPASO DE R - estadística descriptiva Diego Lara

install.packages("devtools")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("devtools")
## Loading required package: usethis
install_github("hofnerb/papeR")
## Downloading GitHub repo hofnerb/papeR@master
## 
##   
   checking for file ‘/tmp/RtmpegAQ1q/remotes108365fefa2f/hofnerb-papeR-ec5fd87/DESCRIPTION’ ...
  
✓  checking for file ‘/tmp/RtmpegAQ1q/remotes108365fefa2f/hofnerb-papeR-ec5fd87/DESCRIPTION’
## 
  
─  preparing ‘papeR’:
## 
  
   checking DESCRIPTION meta-information ...
  
✓  checking DESCRIPTION meta-information
## 
  
─  checking for LF line-endings in source and make files and shell scripts
## 
  
─  checking for empty or unneeded directories
##    Removed empty directory ‘papeR/tests/Examples’
## 
  
─  building ‘papeR_1.0-4.tar.gz’
## 
  
   
## 
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("papeR")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(papeR)
## Loading required package: car
## Loading required package: carData
## Loading required package: xtable
## Registered S3 method overwritten by 'papeR':
##   method    from
##   Anova.lme car
## 
## Attaching package: 'papeR'
## The following object is masked from 'package:utils':
## 
##     toLatex
link="https://docs.google.com/spreadsheets/d/e/2PACX-1vRq_z7dcVQae82F92aErK8S6f3Tv0E0hI_8tiNFRqRy3Lp4gyj0Tf8HCcBSWeboQgCquoB4D56DhOdx/pub?output=csv"
midata=read.csv(link, stringsAsFactors = F)

summary(midata)
##  poblacioncienmil        nbi        consejocomunal      priorizado    
##  Min.   : 0.00158   Min.   : 5.36   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.: 0.07422   1st Qu.:28.35   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median : 0.13998   Median :41.30   Median :0.00000   Median :0.0000  
##  Mean   : 0.40470   Mean   :42.96   Mean   :0.05474   Mean   :0.2518  
##  3rd Qu.: 0.26255   3rd Qu.:55.48   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :69.26836   Max.   :98.81   Max.   :1.00000   Max.   :1.0000  
##                     NA's   :30                                        
##     uribista        ejecucion       apropiaciondolar      pctopo      
##  Min.   :0.0000   Min.   :0.00000   Min.   :  0.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:  0.000   1st Qu.: 5.922  
##  Median :1.0000   Median :0.00000   Median :  0.000   Median :20.308  
##  Mean   :0.6278   Mean   :0.03741   Mean   :  8.276   Mean   :27.874  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:  9.385   3rd Qu.:45.711  
##  Max.   :1.0000   Max.   :1.00000   Max.   :132.643   Max.   :99.419  
##  NA's   :204                                          NA's   :7

VARIABLES NUMÉRICAS

str(midata$apropiaciondolar)
##  num [1:1096] 102.17 4.19 1.59 0 0 ...
summary(midata$apropiaciondolar)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   8.276   9.385 132.643
sd(midata$apropiaciondolar)
## [1] 16.04451
str(midata$pctopo)
##  num [1:1096] 14.82 14.51 15.08 6.15 47.31 ...
summary(midata$pctopo)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.922  20.308  27.874  45.711  99.419       7
sd(midata$pctopo, na.rm=TRUE)
## [1] 25.58515
str(midata$poblacioncienmil)
##  num [1:1096] 20.9155 0.2406 0.0398 0.0558 0.2723 ...
summary(midata$poblacioncienmil)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00158  0.07422  0.13999  0.40470  0.26256 69.26836
sd(midata$poblacioncienmil)
## [1] 2.393711
str(midata$nbi)
##  num [1:1096] 12.2 33.8 28.5 33.1 27.1 ...
summary(midata$nbi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    5.36   28.35   41.30   42.96   55.48   98.81      30
sd(midata$nbi, na.rm=TRUE)
## [1] 18.69776

VARIABLES CATEGÓRICAS

str(midata$uribista)
##  num [1:1096] 0 1 1 1 1 1 1 1 1 NA ...
midata$uribista = factor(midata$uribista, labels = c("no","si"))

table(midata$uribista)
## 
##  no  si 
## 332 560
prop.table(table(midata$uribista))*100
## 
##       no       si 
## 37.21973 62.78027
str(midata$priorizado)
##  num [1:1096] 0 1 0 0 0 0 0 0 1 0 ...
midata$priorizado = factor(midata$priorizado, labels = c("no","si"))

table(midata$priorizado)
## 
##  no  si 
## 820 276
prop.table(table(midata$priorizado))*100
## 
##       no       si 
## 74.81752 25.18248
str(midata$consejocomunal)
##  num [1:1096] 0 0 0 0 0 0 0 0 0 0 ...
midata$consejocomunal = factor(midata$consejocomunal, labels = c("no","si"))

table(midata$consejocomunal)
## 
##   no   si 
## 1036   60
prop.table(table(midata$consejocomunal))*100
## 
##        no        si 
## 94.525547  5.474453
str(midata$ejecucion)
##  num [1:1096] 0 0 0 0 0 0 0 0 0 0 ...
midata$ejecucion = factor(midata$ejecucion, labels = c("no","si"))

table(midata$ejecucion)
## 
##   no   si 
## 1055   41
prop.table(table(midata$ejecucion))*100
## 
##        no        si 
## 96.259124  3.740876

CUADRO ESTADISTICO DESCRIPTIVO DE VARIABLES NUMERICAS

summarize(midata, type = "numeric")
## Factors are dropped from the summary
##                       N Missing    Mean    SD    Min    Q1 Median    Q3    Max
## 1 poblacioncienmil 1096       0    0.40  2.39   0.00  0.07   0.14  0.26  69.27
## 2              nbi 1066      30   42.96 18.70   5.36 28.34  41.30 55.49  98.81
## 3 apropiaciondolar 1096       0    8.28 16.04   0.00  0.00   0.00  9.40 132.64
## 4           pctopo 1089       7   27.87 25.59   0.00  5.92  20.31 45.71  99.42

CUADRO ESTADISTICO DESCRIPTIVO DE VARIABLES CATEGORICAS

summarize(midata, type = "factor")
## Non-factors are dropped from the summary
##                      Level      N    %
## 1 consejocomunal        no   1036 94.5
## 2                       si     60  5.5
## 3     priorizado        no    820 74.8
## 4                       si    276 25.2
## 5       uribista        no    332 30.3
## 6                       si    560 51.1
## 7                <Missing>    204 18.6
## 8      ejecucion        no   1055 96.3
## 9                       si     41  3.7