library(htmltab)
link ='https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad'
linkfelicidad ='//*[@id="mw-content-text"]/div/table'
felicidad = htmltab(doc = link, which = linkfelicidad)
link2='https://es.wikipedia.org/wiki/Anexo:Pa%C3%ADses'
linkmapa='//*[@id="mw-content-text"]/div/table[1]/tbody'
mapa = htmltab(doc = link2, which = linkmapa)
## Warning: Columns [Ubicación] seem to have no data and are removed. Use
## rm_nodata_cols = F to suppress this behavior
mapa = mapa[,c(2,3,5)]
names(mapa)[1]='PaĆs'
tabla = merge(felicidad, mapa)
tabla=tabla[,c(1,3,4,5,6,7,8,9,10,11)]
tabla[,]=lapply(tabla[,],trimws,whitespace= '[\\h\\v]')
str(tabla)
## 'data.frame': 137 obs. of 10 variables:
## $ PaĆs : chr "AfganistĆ”n" "Albania" "Alemania" "Angola" ...
## $ Puntuación : chr "3.632" "4.586" "6.965" "3.795" ...
## $ PIB per cƔpita : chr "0.332" "0.916" "1.340" "0.730" ...
## $ Apoyo social : chr "0.537" "0.817" "1.474" "1.125" ...
## $ Esperanza de aƱos de vida saludable : chr "0.255" "0.790" "0.861" "0.269" ...
## $ Libertad para tomar decisiones vitales: chr "0.085" "0.419" "0.586" "0.000" ...
## $ Generosidad : chr "0.191" "0.149" "0.273" "0.079" ...
## $ Percepción de la corrupción : chr "0.036" "0.032" "0.280" "0.061" ...
## $ Forma de gobierno : chr "República islÔmica presidencialista" "República parlamentaria" "República parlamentaria" "República presidencialista" ...
## $ Continente : chr "Asia" "Europa" "Europa" "Ćfrica" ...
names(tabla)[3]='PBI'
names(tabla)[4]='ApoyoSocial'
names(tabla)[5]='EsperanzaVida'
names(tabla)[6]='Libertad'
names(tabla)[8]='Corrupcion'
names(tabla)[9]='FormaGobierno'
tabla$Continente=gsub("Asia-Europa","Europa",tabla$Continente)
tabla$Continente=gsub("Europa-Asia","Europa",tabla$Continente)
tabla$Continente=gsub("Ćfrica-Asia","Ćfrica",tabla$Continente)
library(readr)
tabla[,c(2:8)]=lapply(tabla[,c(2:8)],parse_number)
tabla$Continente=as.factor(tabla$Continente)
tabla$FormaGobierno=as.factor(tabla$FormaGobierno)
tabla[!complete.cases(tabla),]
## [1] PaĆs Puntuación PBI ApoyoSocial EsperanzaVida
## [6] Libertad Generosidad Corrupcion FormaGobierno Continente
## <0 rows> (or 0-length row.names)
summary(tabla)
## PaĆs Puntuación PBI ApoyoSocial
## Length:137 Min. :2.905 Min. :0.0000 Min. :0.000
## Class :character 1st Qu.:4.433 1st Qu.:0.5920 1st Qu.:1.048
## Mode :character Median :5.358 Median :0.9180 Median :1.265
## Mean :5.363 Mean :0.8779 Mean :1.216
## 3rd Qu.:6.167 3rd Qu.:1.1760 3rd Qu.:1.469
## Max. :7.633 Max. :2.0960 Max. :1.644
##
## EsperanzaVida Libertad Generosidad Corrupcion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.4040 1st Qu.:0.3550 1st Qu.:0.1110 1st Qu.:0.0510
## Median :0.6430 Median :0.4680 Median :0.1750 Median :0.0820
## Mean :0.5915 Mean :0.4512 Mean :0.1797 Mean :0.1125
## 3rd Qu.:0.7810 3rd Qu.:0.5830 3rd Qu.:0.2380 3rd Qu.:0.1340
## Max. :1.0080 Max. :0.7240 Max. :0.4840 Max. :0.4570
##
## FormaGobierno Continente
## RepĆŗblica presidencialista :44 Ćfrica :41
## República parlamentaria :35 América:24
## RepĆŗblica semipresidencialista :24 Asia :31
## MonarquĆa constitucional :11 Europa :40
## MonarquĆa parlamentaria : 9 OceanĆa: 1
## Estado socialista unipartidista: 2
## (Other) :12
Hmisc::describe(tabla)
## tabla
##
## 10 Variables 137 Observations
## --------------------------------------------------------------------------------
## PaĆs
## n missing distinct
## 137 0 136
##
## lowest : AfganistƔn Albania Alemania Angola Argelia
## highest: Venezuela Vietnam Yemen Zambia Zimbabue
## --------------------------------------------------------------------------------
## Puntuación
## n missing distinct Info Mean Gmd .05 .10
## 137 0 136 1 5.363 1.311 3.488 3.803
## .25 .50 .75 .90 .95
## 4.433 5.358 6.167 6.942 7.317
##
## lowest : 2.905 3.083 3.254 3.303 3.355, highest: 7.487 7.495 7.555 7.560 7.633
## --------------------------------------------------------------------------------
## PBI
## n missing distinct Info Mean Gmd .05 .10
## 137 0 131 1 0.8779 0.4475 0.2444 0.3280
## .25 .50 .75 .90 .95
## 0.5920 0.9180 1.1760 1.3404 1.4024
##
## lowest : 0.000 0.024 0.069 0.076 0.091, highest: 1.456 1.474 1.529 1.576 2.096
## --------------------------------------------------------------------------------
## ApoyoSocial
## n missing distinct Info Mean Gmd .05 .10
## 137 0 129 1 1.216 0.3377 0.6232 0.7950
## .25 .50 .75 .90 .95
## 1.0480 1.2650 1.4690 1.5320 1.5748
##
## lowest : 0.000 0.372 0.382 0.474 0.537, highest: 1.584 1.590 1.592 1.595 1.644
## --------------------------------------------------------------------------------
## EsperanzaVida
## n missing distinct Info Mean Gmd .05 .10
## 137 0 130 1 0.5915 0.2855 0.1390 0.2306
## .25 .50 .75 .90 .95
## 0.4040 0.6430 0.7810 0.8922 0.9132
##
## lowest : 0.000 0.010 0.048 0.053 0.079, highest: 0.946 0.955 0.965 0.988 1.008
## --------------------------------------------------------------------------------
## Libertad
## n missing distinct Info Mean Gmd .05 .10
## 137 0 121 1 0.4512 0.189 0.1072 0.2276
## .25 .50 .75 .90 .95
## 0.3550 0.4680 0.5830 0.6400 0.6746
##
## lowest : 0.000 0.016 0.025 0.065 0.077, highest: 0.683 0.685 0.686 0.696 0.724
## --------------------------------------------------------------------------------
## Generosidad
## n missing distinct Info Mean Gmd .05 .10
## 137 0 109 1 0.1797 0.1074 0.0396 0.0616
## .25 .50 .75 .90 .95
## 0.1110 0.1750 0.2380 0.3086 0.3554
##
## lowest : 0.000 0.026 0.029 0.031 0.032, highest: 0.361 0.364 0.376 0.392 0.484
## --------------------------------------------------------------------------------
## Corrupcion
## n missing distinct Info Mean Gmd .05 .10
## 137 0 103 1 0.1125 0.09693 0.0188 0.0296
## .25 .50 .75 .90 .95
## 0.0510 0.0820 0.1340 0.2752 0.3282
##
## lowest : 0.000 0.006 0.009 0.011 0.014, highest: 0.393 0.408 0.410 0.444 0.457
## --------------------------------------------------------------------------------
## FormaGobierno
## n missing distinct
## 137 0 15
##
## lowest : Estado socialista Estado socialista unipartidista Gobierno provisional MonarquĆa constitucional MonarquĆa constitucional electiva
## highest: RepĆŗblica presidencial RepĆŗblica presidencialista RepĆŗblica presidencialista (bajo junta militar) RepĆŗblica presidencialista (Gobierno provisional) RepĆŗblica semipresidencialista
## --------------------------------------------------------------------------------
## Continente
## n missing distinct
## 137 0 5
##
## lowest : Ćfrica AmĆ©rica Asia Europa OceanĆa
## highest: Ćfrica AmĆ©rica Asia Europa OceanĆa
##
## Value Ćfrica AmĆ©rica Asia Europa OceanĆa
## Frequency 41 24 31 40 1
## Proportion 0.299 0.175 0.226 0.292 0.007
## --------------------------------------------------------------------------------
1) Analizando la relación entre Puntaje y Continente
Determinando el tipo de relación: Numérica-Categórica
Determinando si la variable numƩrica se comporta de manera normal:
f1=formula(Puntuación ~ Continente)
aggregate(f1, tabla,mean)
## Continente Puntuación
## 1 Ćfrica 4.296073
## 2 AmƩrica 6.046542
## 3 Asia 5.235355
## 4 Europa 6.098350
## 5 OceanĆa 7.272000
Haremos un grÔfico de cuantiles teóricos para concer la normalidad de la variable.
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggqqplot(data = tabla, x='Puntuación') + facet_grid(. ~ Continente)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
library(ggplot2)
base=ggplot(data=tabla, aes(x=Puntuación))
base + geom_histogram(bins = 20)
**2) Analizando la relación entre Forma de Gobierno