library(htmltab)
linkPage = "https://en.wikipedia.org/wiki/World_Happiness_Report"
linkPath = '//*[@id="mw-content-text"]/div/table'
feliz = htmltab(doc = linkPage,
which = linkPath)
str(feliz)
## 'data.frame': 156 obs. of 9 variables:
## $ Overall rank : chr "1" "2" "3" "4" ...
## $ Country or region : chr "Ā Finland" "Ā Denmark" "Ā Norway" "Ā Iceland" ...
## $ Score : chr "7.769" "7.600" "7.554" "7.494" ...
## $ GDP per capita : chr "1.340" "1.383" "1.488" "1.380" ...
## $ Social support : chr "1.587" "1.573" "1.582" "1.624" ...
## $ Healthy life expectancy : chr "0.986" "0.996" "1.028" "1.026" ...
## $ Freedom to make life choices: chr "0.596" "0.592" "0.603" "0.591" ...
## $ Generosity : chr "0.153" "0.252" "0.271" "0.354" ...
## $ Perceptions of corruption : chr "0.393" "0.410" "0.341" "0.118" ...
library(stringr)
library(magrittr)
feliz[,c(1)]=NULL #Eliminamos Rank
feliz[,]=lapply(feliz[,], trimws,whitespace = "[\\h\\v]")
row.names(feliz)= feliz$`Country or region`
feliz[,-c(1)]=lapply(feliz[,-c(1)],as.numeric)
str(feliz)
## 'data.frame': 156 obs. of 8 variables:
## $ Country or region : chr "Finland" "Denmark" "Norway" "Iceland" ...
## $ Score : num 7.77 7.6 7.55 7.49 7.49 ...
## $ GDP per capita : num 1.34 1.38 1.49 1.38 1.4 ...
## $ Social support : num 1.59 1.57 1.58 1.62 1.52 ...
## $ Healthy life expectancy : num 0.986 0.996 1.028 1.026 0.999 ...
## $ Freedom to make life choices: num 0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
## $ Generosity : num 0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
## $ Perceptions of corruption : num 0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
feliz[!complete.cases(feliz),]
## [1] Country or region Score
## [3] GDP per capita Social support
## [5] Healthy life expectancy Freedom to make life choices
## [7] Generosity Perceptions of corruption
## <0 rows> (or 0-length row.names)
Ninguna fila tiene valores perdidos
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
g.dist = daisy(feliz[,c(3:8)], metric="gower")
pam.resultado1=pam(g.dist,3,cluster.only = F)
feliz$clusterPT1=pam.resultado1$cluster
library(plyr) # para funcion "each"..
# nota el uso de as.matrix con cbind:
agg1 = aggregate(as.matrix(cbind(feliz[,c(3:8)]))~ clusterPT1, data=feliz,
FUN=plyr::each(MD = median, Media = mean))
# convertir en data frame, y
tablaResumen1 =t(as.data.frame(agg1))
tablaResumen1
## [,1] [,2] [,3]
## clusterPT1 1.0000000 2.00000000 3.0000000
## GDP per capita.MD 1.3815000 1.04300000 0.3800000
## GDP per capita.Media 1.3937083 1.03589412 0.4096383
## Social support.MD 1.5125000 1.35100000 0.9160000
## Social support.Media 1.4896250 1.31363529 0.8777660
## Healthy life expectancy.MD 0.9990000 0.81800000 0.4400000
## Healthy life expectancy.Media 0.9855000 0.81600000 0.4286383
## Freedom to make life choices.MD 0.5570000 0.41000000 0.3520000
## Freedom to make life choices.Media 0.5473750 0.38450588 0.3294681
## Generosity.MD 0.2690000 0.12600000 0.2090000
## Generosity.Media 0.2740417 0.14434118 0.2171702
## Perceptions of corruption.MD 0.2885000 0.05700000 0.0890000
## Perceptions of corruption.Media 0.2740417 0.06875294 0.1013617
fviz_cluster(object = list(data=g.dist, cluster = feliz$clusterPT1),
geom = c("text"),
ellipse.type = "convex")
pam.resultado2=pam(g.dist,5,cluster.only = F)
feliz$clusterPT2=pam.resultado2$cluster
library(plyr) # para funcion "each"..
# nota el uso de as.matrix con cbind:
agg2 = aggregate(as.matrix(cbind(feliz[,c(3:8)]))~ clusterPT2, data=feliz,
FUN=plyr::each(MD = median, Media = mean))
# convertir en data frame, y
tablaResumen2 =t(as.data.frame(agg2))
tablaResumen2
## [,1] [,2] [,3] [,4]
## clusterPT2 1.00000 2.00000000 3.0000000 4.00000000
## GDP per capita.MD 1.38150 1.22100000 1.0430000 0.79700000
## GDP per capita.Media 1.38070 1.20222857 1.0302571 0.77126923
## Social support.MD 1.52400 1.43100000 1.3280000 1.23400000
## Social support.Media 1.50545 1.40125714 1.2663714 1.20323077
## Healthy life expectancy.MD 1.01050 0.88100000 0.8080000 0.72050000
## Healthy life expectancy.Media 1.00250 0.89665714 0.7903143 0.70642308
## Freedom to make life choices.MD 0.56800 0.47300000 0.2640000 0.49450000
## Freedom to make life choices.Media 0.55595 0.47145714 0.2623143 0.48280769
## Generosity.MD 0.27100 0.12700000 0.1030000 0.19200000
## Generosity.Media 0.28310 0.15362857 0.1238286 0.20838462
## Perceptions of corruption.MD 0.30300 0.07300000 0.0550000 0.08250000
## Perceptions of corruption.Media 0.29810 0.08117143 0.0590000 0.09276923
## [,5]
## clusterPT2 5.000000
## GDP per capita.MD 0.350000
## GDP per capita.Media 0.373725
## Social support.MD 0.885500
## Social support.Media 0.847625
## Healthy life expectancy.MD 0.418000
## Healthy life expectancy.Media 0.392425
## Freedom to make life choices.MD 0.329500
## Freedom to make life choices.Media 0.298775
## Generosity.MD 0.204500
## Generosity.Media 0.206550
## Perceptions of corruption.MD 0.087500
## Perceptions of corruption.Media 0.097625
fviz_cluster(object = list(data=g.dist, cluster = feliz$clusterPT2),
geom = c("text"),
ellipse.type = "convex")
Usemos una tabla de contingencia para verificar la asignación:
table(feliz$clusterPT1,feliz$clusterPT2,dnn = c('clusterPT1','clusterPT2'))
## clusterPT2
## clusterPT1 1 2 3 4 5
## 1 20 4 0 0 0
## 2 0 31 35 19 0
## 3 0 0 0 7 40
feliz[feliz$clusterPT1==1 & feliz$clusterPT2==1,'Country or region']
## [1] "Finland" "Denmark" "Norway"
## [4] "Iceland" "Netherlands" "Switzerland"
## [7] "Sweden" "New Zealand" "Canada"
## [10] "Austria" "Australia" "Luxembourg"
## [13] "United Kingdom" "Ireland" "Germany"
## [16] "United Arab Emirates" "Malta" "Singapore"
## [19] "Uzbekistan" "Hong Kong"