Carga la data:
rm(list = ls())
info=file.path('DataFiles','vuelta2.csv')
laData=read.csv(info)
laData=laData[laData$costa>=0,]
summary(laData$Castillo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 41464 154223 241491 348948 392224 2195770
summary(laData$Fujimori)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23372 58307 130240 342861 285375 4014342
boxplot(laData[,c(2,3)])
hist(laData$Castillo,ylim = c(0,30))
hist(laData$Fujimori,ylim = c(0,30))
DescTools::Desc(laData$Castillo)
## ------------------------------------------------------------------------------
## laData$Castillo (integer)
##
## length n NAs unique 0s mean'
## 25 25 0 = n 0 348'948.36
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 62'013.60 81'452.40 154'223.00 241'491.00 392'224.00 589'546.60
##
## range sd vcoef mad IQR skew
## 2'154'306.00 421'369.23 1.21 178'519.87 238'001.00 3.32
##
## meanCI
## 175'015.69
## 522'881.03
##
## .95
## 638'754.60
##
## kurt
## 11.83
##
## lowest : 41'464, 57'387, 80'520, 82'851, 121'081
## highest: 509'790, 558'085, 610'521, 645'813, 2'195'770
##
## ' 95%-CI (classic)
DescTools::Desc(laData$Fujimori)
## ------------------------------------------------------------------------------
## laData$Fujimori (integer)
##
## length n NAs unique 0s mean'
## 25 25 0 = n 0 342'861.16
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 29'618.80 33'735.60 58'307.00 130'240.00 285'375.00 515'094.40
##
## range sd vcoef mad IQR skew
## 3'990'970.00 782'433.97 2.28 134'285.01 227'068.00 4.11
##
## meanCI
## 19'888.29
## 665'834.03
##
## .95
## 587'650.40
##
## kurt
## 16.40
##
## lowest : 23'372, 29'578, 29'782, 39'666, 43'922
## highest: 403'216, 410'860, 584'584, 588'417, 4'014'342
##
## ' 95%-CI (classic)
DescTools::Desc(laData$Castillo~laData$costa)
## ------------------------------------------------------------------------------
## laData$Castillo ~ laData$costa
##
## Summary:
## n pairs: 25, valid: 25 (100.0%), missings: 0 (0.0%), groups: 2
##
##
## 0 1
## mean 274'533.000 443'658.818
## median 217'628.000 289'784.000
## sd 192'501.471 600'275.953
## IQR 231'713.250 214'199.500
## n 14 11
## np 56.000% 44.000%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 0.19181, df = 1, p-value = 0.6614
DescTools::CoefVar(laData[laData$costa==1,'Castillo'])
## [1] 1.353013
DescTools::CoefVar(laData[laData$costa==0,'Castillo'])
## [1] 0.7011961
DescTools::Skew(laData[laData$costa==1,'Castillo'])
## [1] 2.191514
DescTools::Skew(laData[laData$costa==0,'Castillo'])
## [1] 0.7670194
DescTools::Desc(laData$Fujimori~laData$costa)
## ------------------------------------------------------------------------------
## laData$Fujimori ~ laData$costa
##
## Summary:
## n pairs: 25, valid: 25 (100.0%), missings: 0 (0.0%), groups: 2
##
##
## 0 1
## mean 113'876.500 634'296.182
## median 99'819.000 302'622.000
## sd 81'286.706 1'137'480.486
## IQR 127'228.750 333'876.000
## n 14 11
## np 56.000% 44.000%
## NAs 0 0
## 0s 0 0
##
## Kruskal-Wallis rank sum test:
## Kruskal-Wallis chi-squared = 6.3417, df = 1, p-value = 0.01179
DescTools::CoefVar(laData[laData$costa==1,'Fujimori'])
## [1] 1.793295
DescTools::CoefVar(laData[laData$costa==0,'Fujimori'])
## [1] 0.7138146
DescTools::Skew(laData[laData$costa==1,'Fujimori'])
## [1] 2.338732
DescTools::Skew(laData[laData$costa==0,'Fujimori'])
## [1] 0.6114445
Usemos el formato largo de la data:
vueltaLong=read.csv(file = "DataFiles/vuelta2Long.csv")
dataCandis=vueltaLong[vueltaLong$variable%in%c("Castillo","Fujimori"),]
dataCandis=dataCandis[dataCandis$Departamento!="Voto extranjero",]
library(ggplot2)
base=ggplot(data=dataCandis)
base + geom_histogram(aes(x=valor)) + facet_grid(~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
base + geom_boxplot(aes(y=valor)) + facet_grid(~variable)
vueltaLong2=read.csv(file = "DataFiles/vuelta2Long2.csv")
dataDonde=vueltaLong2[vueltaLong2$costa>-1,]
dataDonde=dataDonde[dataDonde$variable%in%c("Castillo","Fujimori"),]
library(ggplot2)
base=ggplot(data=dataDonde)
base + geom_histogram(aes(x=valor)) + facet_grid(costa~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
base + geom_boxplot(aes(y=valor)) + facet_grid(costa~variable)