Carga la data:

rm(list = ls())
info=file.path('DataFiles','vuelta2.csv')
laData=read.csv(info)
laData=laData[laData$costa>=0,]
summary(laData$Castillo)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41464  154223  241491  348948  392224 2195770
summary(laData$Fujimori)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23372   58307  130240  342861  285375 4014342
boxplot(laData[,c(2,3)])

hist(laData$Castillo,ylim = c(0,30))

hist(laData$Fujimori,ylim = c(0,30))

DescTools::Desc(laData$Castillo)
## ------------------------------------------------------------------------------ 
## laData$Castillo (integer)
## 
##         length           n         NAs      unique          0s        mean'
##             25          25           0         = n           0  348'948.36
##                     100.0%        0.0%                    0.0%            
##                                                                           
##            .05         .10         .25      median         .75         .90
##      62'013.60   81'452.40  154'223.00  241'491.00  392'224.00  589'546.60
##                                                                           
##          range          sd       vcoef         mad         IQR        skew
##   2'154'306.00  421'369.23        1.21  178'519.87  238'001.00        3.32
##                                                                           
##       meanCI
##   175'015.69
##   522'881.03
##             
##          .95
##   638'754.60
##             
##         kurt
##        11.83
##             
## lowest : 41'464, 57'387, 80'520, 82'851, 121'081
## highest: 509'790, 558'085, 610'521, 645'813, 2'195'770
## 
## ' 95%-CI (classic)

DescTools::Desc(laData$Fujimori)
## ------------------------------------------------------------------------------ 
## laData$Fujimori (integer)
## 
##         length           n        NAs      unique          0s        mean'
##             25          25          0         = n           0  342'861.16
##                     100.0%       0.0%                    0.0%            
##                                                                          
##            .05         .10        .25      median         .75         .90
##      29'618.80   33'735.60  58'307.00  130'240.00  285'375.00  515'094.40
##                                                                          
##          range          sd      vcoef         mad         IQR        skew
##   3'990'970.00  782'433.97       2.28  134'285.01  227'068.00        4.11
##                                                                          
##       meanCI
##    19'888.29
##   665'834.03
##             
##          .95
##   587'650.40
##             
##         kurt
##        16.40
##             
## lowest : 23'372, 29'578, 29'782, 39'666, 43'922
## highest: 403'216, 410'860, 584'584, 588'417, 4'014'342
## 
## ' 95%-CI (classic)

DescTools::Desc(laData$Castillo~laData$costa)
## ------------------------------------------------------------------------------ 
## laData$Castillo ~ laData$costa
## 
## Summary: 
## n pairs: 25, valid: 25 (100.0%), missings: 0 (0.0%), groups: 2
## 
##                                 
##                   0            1
## mean    274'533.000  443'658.818
## median  217'628.000  289'784.000
## sd      192'501.471  600'275.953
## IQR     231'713.250  214'199.500
## n                14           11
## np          56.000%      44.000%
## NAs               0            0
## 0s                0            0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 0.19181, df = 1, p-value = 0.6614

DescTools::CoefVar(laData[laData$costa==1,'Castillo'])
## [1] 1.353013
DescTools::CoefVar(laData[laData$costa==0,'Castillo'])
## [1] 0.7011961
DescTools::Skew(laData[laData$costa==1,'Castillo'])
## [1] 2.191514
DescTools::Skew(laData[laData$costa==0,'Castillo'])
## [1] 0.7670194
DescTools::Desc(laData$Fujimori~laData$costa)
## ------------------------------------------------------------------------------ 
## laData$Fujimori ~ laData$costa
## 
## Summary: 
## n pairs: 25, valid: 25 (100.0%), missings: 0 (0.0%), groups: 2
## 
##                                     
##                     0              1
## mean      113'876.500    634'296.182
## median     99'819.000    302'622.000
## sd         81'286.706  1'137'480.486
## IQR       127'228.750    333'876.000
## n                  14             11
## np            56.000%        44.000%
## NAs                 0              0
## 0s                  0              0
## 
## Kruskal-Wallis rank sum test:
##   Kruskal-Wallis chi-squared = 6.3417, df = 1, p-value = 0.01179

DescTools::CoefVar(laData[laData$costa==1,'Fujimori'])
## [1] 1.793295
DescTools::CoefVar(laData[laData$costa==0,'Fujimori'])
## [1] 0.7138146
DescTools::Skew(laData[laData$costa==1,'Fujimori'])
## [1] 2.338732
DescTools::Skew(laData[laData$costa==0,'Fujimori'])
## [1] 0.6114445

Usemos el formato largo de la data:

vueltaLong=read.csv(file = "DataFiles/vuelta2Long.csv")
dataCandis=vueltaLong[vueltaLong$variable%in%c("Castillo","Fujimori"),]
dataCandis=dataCandis[dataCandis$Departamento!="Voto extranjero",]

library(ggplot2)

base=ggplot(data=dataCandis)
base + geom_histogram(aes(x=valor)) + facet_grid(~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

base + geom_boxplot(aes(y=valor)) + facet_grid(~variable)

vueltaLong2=read.csv(file = "DataFiles/vuelta2Long2.csv")
dataDonde=vueltaLong2[vueltaLong2$costa>-1,]
dataDonde=dataDonde[dataDonde$variable%in%c("Castillo","Fujimori"),]

library(ggplot2)

base=ggplot(data=dataDonde)
base + geom_histogram(aes(x=valor)) + facet_grid(costa~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

base + geom_boxplot(aes(y=valor)) + facet_grid(costa~variable)