some libraries needed
library(magrittr)
library(polycor)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
library(matrixcalc)
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
read data
rm(list = ls())
dataPreparada="https://github.com/Estadistica-AnalisisPolitico/Sesion6/raw/main/idhdemo.csv"
idhdemo=read.csv(dataPreparada)
names(idhdemo)
## [1] "country" "hdiRanking" "hdi" "hdiLife"
## [5] "hdiSchoolExpec" "hdiMeanEduc" "hdiGni" "ideRanking"
## [9] "ideRegime" "ide" "ideElectoral" "ideFunctioning"
## [13] "ideParticipation" "ideCulture" "ideLiberties"
str(idhdemo)
## 'data.frame': 165 obs. of 15 variables:
## $ country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ hdiRanking : int 182 74 93 150 48 76 10 22 89 34 ...
## $ hdi : num 0.462 0.789 0.745 0.591 0.849 0.786 0.946 0.926 0.76 0.888 ...
## $ hdiLife : num 62.9 76.8 77.1 61.9 76.1 ...
## $ hdiSchoolExpec : num 10.7 14.5 15.5 12.2 19 ...
## $ hdiMeanEduc : num 2.51 10.12 6.99 5.84 11.14 ...
## $ hdiGni : num 1335 15293 10978 5328 22048 ...
## $ ideRanking : int 167 66 110 107 54 84 15 19 130 139 ...
## $ ideRegime : chr "Authoritarian regime" "Flawed democracy" "Authoritarian regime" "Hybrid regime" ...
## $ ide : num 0.26 6.28 3.66 4.18 6.62 5.42 8.66 8.28 2.8 2.52 ...
## $ ideElectoral : num 0 7 3.08 4.5 9.17 7.92 10 9.58 0.5 0.42 ...
## $ ideFunctioning : num 0.07 6.07 2.5 3.21 5 4.64 8.57 7.5 2.5 2.71 ...
## $ ideParticipation: num 0 5 3.89 5.56 7.22 6.11 7.22 8.89 3.33 3.33 ...
## $ ideCulture : num 1.25 6.25 5 5 3.75 3.13 7.5 6.88 5 4.38 ...
## $ ideLiberties : num 0 7.06 3.82 2.65 7.94 5.29 10 8.53 2.65 1.76 ...
data for factorial
dontselect=c("country","hdiRanking","hdi",
"ideRanking","ideRegime","ide")
select=setdiff(names(idhdemo),dontselect)
theData=idhdemo[,select]
head(theData,10)
## hdiLife hdiSchoolExpec hdiMeanEduc hdiGni ideElectoral ideFunctioning
## 1 62.879 10.70538 2.514790 1335.206 0.00 0.07
## 2 76.833 14.48747 10.121144 15293.327 7.00 6.07
## 3 77.129 15.48788 6.987444 10978.406 3.08 2.50
## 4 61.929 12.16760 5.844292 5327.788 4.50 3.21
## 5 76.064 18.97951 11.144080 22047.971 9.17 5.00
## 6 73.372 14.40561 11.330300 15388.300 7.92 4.64
## 7 83.579 21.08002 12.726820 49257.135 10.00 8.57
## 8 82.412 16.36746 12.305714 56529.663 9.58 7.50
## 9 73.488 12.71063 10.556130 15018.054 0.50 2.50
## 10 79.246 16.29968 11.046590 48731.446 0.42 2.71
## ideParticipation ideCulture ideLiberties
## 1 0.00 1.25 0.00
## 2 5.00 6.25 7.06
## 3 3.89 5.00 3.82
## 4 5.56 5.00 2.65
## 5 7.22 3.75 7.94
## 6 6.11 3.13 5.29
## 7 7.22 7.50 10.00
## 8 8.89 6.88 8.53
## 9 3.33 5.00 2.65
## 10 3.33 4.38 1.76
correlations
corMatrix=polycor::hetcor(theData)$correlations
previous evaluations
KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = corMatrix)
## Overall MSA = 0.9
## MSA for each item =
## hdiLife hdiSchoolExpec hdiMeanEduc hdiGni
## 0.90 0.92 0.91 0.88
## ideElectoral ideFunctioning ideParticipation ideCulture
## 0.84 0.93 0.96 0.87
## ideLiberties
## 0.88
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
run factor analysis
resfa <- fa(theData,
nfactors = 2,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
see results
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## hdiLife 0.307 0.836
## hdiSchoolExpec 0.368 0.811
## hdiMeanEduc 0.316 0.782
## hdiGni 0.283 0.813
## ideElectoral 0.906 0.249
## ideFunctioning 0.801 0.470
## ideParticipation 0.768 0.331
## ideCulture 0.498 0.382
## ideLiberties 0.901 0.334
##
## MR1 MR2
## SS loadings 3.522 3.280
## Proportion Var 0.391 0.364
## Cumulative Var 0.391 0.756
fa.diagram(resfa,main = "Resultados del EFA")
testing results
sort(resfa$communality) #¿Qué variables aportaron más a los factores?
## ideCulture ideParticipation hdiMeanEduc hdiGni
## 0.3941682 0.6993302 0.7119301 0.7405067
## hdiLife hdiSchoolExpec ideFunctioning ideElectoral
## 0.7933223 0.7936103 0.8629055 0.8835971
## ideLiberties
## 0.9226321
sort(resfa$complexity) #¿Qué variables contribuyen a la construcción de más de un factor?
## ideElectoral hdiGni hdiLife ideLiberties
## 1.150198 1.238563 1.265339 1.270410
## hdiMeanEduc ideParticipation hdiSchoolExpec ideFunctioning
## 1.317113 1.358907 1.395523 1.614353
## ideCulture
## 1.874466
resfa$TLI # ¿Tucker Lewis > 0.9?
## [1] 0.9263416
PREPARE FOR REGRE see scores
resfa$scores
## MR1 MR2
## [1,] -1.68034008 -0.83207114
## [2,] 0.42190255 0.17017645
## [3,] -0.84997358 0.38447261
## [4,] -0.33126116 -0.83527332
## [5,] 0.61373479 0.55689007
## [6,] 0.14046830 0.08167183
## [7,] 1.02570884 1.51779769
## [8,] 0.88103717 1.08945988
## [9,] -1.29937276 0.44149238
## [10,] -1.83242173 1.64245537
## [11,] 0.31844737 -0.45412168
## [12,] -1.82081368 0.79127982
## [13,] 0.64419446 1.39022124
## [14,] 0.16694597 -1.25237953
## [15,] 0.32315566 -0.51282672
## [16,] 0.00637766 -0.23230765
## [17,] 0.07023144 0.02710679
## [18,] 1.35156515 -0.85226593
## [19,] 0.79590933 -0.19926374
## [20,] 0.72857529 -0.06394465
## [21,] -0.44112718 -1.43580407
## [22,] -1.11960332 -1.02757486
## [23,] -0.98973591 -0.28683067
## [24,] -1.05515920 -0.43828325
## [25,] 1.02809170 1.03254119
## [26,] 1.36601176 -0.83504903
## [27,] -0.92660701 -1.63788522
## [28,] -0.85058018 -1.70714696
## [29,] 1.10074591 0.49090905
## [30,] -1.75324072 0.98181493
## [31,] 0.89149677 -0.25215019
## [32,] -0.68611998 -0.52775631
## [33,] 1.39058539 0.04560051
## [34,] 0.40424125 0.66961046
## [35,] -1.34796274 0.76993161
## [36,] 0.65938234 0.82403644
## [37,] 0.92959429 0.67342523
## [38,] -1.26521201 -0.91609497
## [39,] 1.09624142 1.42337757
## [40,] -0.84752864 -1.10397042
## [41,] 0.83269487 -0.26618059
## [42,] 0.31945248 0.03380673
## [43,] -1.23959580 0.24370604
## [44,] 0.08754890 -0.57562448
## [45,] -1.41858602 -0.32833524
## [46,] -1.43844169 -0.81564407
## [47,] 0.95635314 0.72603920
## [48,] -0.90652875 -0.54467311
## [49,] -0.77178090 -0.86909663
## [50,] 0.12193568 -0.20526226
## [51,] 1.17845964 1.31639064
## [52,] 0.85060877 0.95067281
## [53,] -1.20082421 -0.11501435
## [54,] 0.08388692 -1.31568124
## [55,] -0.07550962 0.37877055
## [56,] 1.04614268 1.22898977
## [57,] 0.81037133 -1.16018427
## [58,] 0.88180311 0.97570438
## [59,] 0.32464021 -0.89745926
## [60,] -0.91240495 -1.23329895
## [61,] -0.69869116 -1.29860155
## [62,] 0.66934453 -0.31331835
## [63,] -0.72921268 -0.80520382
## [64,] 0.42679596 -0.98709643
## [65,] -0.53110948 1.83620281
## [66,] 0.43564552 0.48096168
## [67,] 1.19015878 1.44089595
## [68,] 1.01117908 -0.81774982
## [69,] 0.69240046 -0.36715512
## [70,] -1.64011323 0.74063141
## [71,] -1.00642518 -0.33713784
## [72,] 0.91245412 1.74043238
## [73,] 0.42565343 1.01075176
## [74,] 0.56281540 0.99178963
## [75,] -0.04724191 -1.35390947
## [76,] 1.15298623 -0.64154921
## [77,] 0.98435915 0.98932362
## [78,] -0.93624247 0.22755893
## [79,] -1.17866700 0.70225223
## [80,] 0.07211009 -0.78474871
## [81,] -1.14485678 1.26317399
## [82,] -0.43718196 -0.11729826
## [83,] -1.50791210 -0.26465704
## [84,] 0.80853693 0.55650340
## [85,] -0.67699074 -0.06929200
## [86,] 0.96845703 -1.74864012
## [87,] 0.61739872 -1.53294001
## [88,] -1.60821323 0.39833519
## [89,] 0.86802246 0.54127424
## [90,] 1.10844177 1.16492380
## [91,] 0.34811360 -1.37260980
## [92,] 0.69213366 -1.28562507
## [93,] 0.68999413 0.09447743
## [94,] -0.53538920 -1.66944809
## [95,] 0.73363037 0.96262662
## [96,] 0.09007308 -1.27923062
## [97,] 1.19187463 -0.04203392
## [98,] 0.15252997 0.10314696
## [99,] 0.55645050 -0.11646111
## [100,] 0.56582788 -0.20474328
## [101,] 0.64460702 0.39036249
## [102,] -0.19982650 -0.03549452
## [103,] -0.36582852 -1.24591778
## [104,] -1.88196390 -0.18598228
## [105,] 1.16225441 -1.32316529
## [106,] 0.22088559 -0.64168296
## [107,] 1.05161036 1.35430511
## [108,] 1.31958694 1.27220288
## [109,] -1.28723728 0.14980294
## [110,] -0.43770633 -1.59021551
## [111,] 0.18526936 -1.39531504
## [112,] 0.67802441 -0.19858766
## [113,] 1.16625850 1.57561068
## [114,] -1.15508917 0.81660202
## [115,] -0.18769735 -1.12736741
## [116,] -0.93037337 0.12674599
## [117,] 0.90761603 0.05775454
## [118,] 0.92365922 -1.19448337
## [119,] 0.84738911 -0.46055488
## [120,] 0.50880090 -0.09662182
## [121,] 0.90776879 -0.59654267
## [122,] 0.58393308 0.62081318
## [123,] 0.90296128 0.64806451
## [124,] -1.35888218 1.86397715
## [125,] -0.85789755 -0.39255596
## [126,] 0.68451204 0.19873132
## [127,] -1.60987764 0.90784741
## [128,] -0.67172313 -0.55253321
## [129,] -1.86400962 1.56576204
## [130,] 0.62997911 -1.35300459
## [131,] 0.63364308 0.10998868
## [132,] 0.23472276 -1.60108407
## [133,] -0.34169237 2.05198437
## [134,] 0.65819755 0.49790260
## [135,] 0.76665827 1.03062327
## [136,] 1.04324030 -0.52313894
## [137,] 0.93406100 1.07075094
## [138,] 0.82806821 0.99269003
## [139,] 0.22963133 0.12519213
## [140,] -1.41184347 -0.88452233
## [141,] 1.13805315 -0.87425890
## [142,] 1.07859340 1.47228748
## [143,] 0.97865206 1.49507107
## [144,] -1.69232803 -0.53922351
## [145,] -1.63969879 0.18148776
## [146,] 0.29593755 -1.14584429
## [147,] 0.29927451 0.39067918
## [148,] 1.09772120 -0.95649747
## [149,] -0.82941307 -0.61605772
## [150,] 0.84567513 0.07177427
## [151,] 0.02478126 -0.01607318
## [152,] -1.10663821 1.42920505
## [153,] -1.95052165 0.46723473
## [154,] -0.11580869 -0.86878372
## [155,] -0.16884568 -0.15748957
## [156,] -1.83594691 2.23546357
## [157,] 0.92596913 1.12762895
## [158,] 0.74853985 1.11710041
## [159,] 1.48400307 0.26012748
## [160,] -1.70970304 0.39503784
## [161,] -1.35736632 0.23791400
## [162,] -1.20266956 0.38847886
## [163,] -1.22944766 -1.15385709
## [164,] 0.54816162 -1.25718564
## [165,] -0.75561568 -0.67787241
idhdemo$ide_efa=resfa$scores[,1]
idhdemo$idh_efa=resfa$scores[,2]
NORMALIZING
efa_scores_norm=normalize(resfa$scores,
method = "range",
margin=2, # by column
range = c(0, 10))
idhdemo$ide_efa_norm=efa_scores_norm[,1]
idhdemo$idh_efa_norm=efa_scores_norm[,2]
REGRESSING
summary(lm(ide_efa_norm~idh_efa_norm,data = idhdemo))
##
## Call:
## lm(formula = ide_efa_norm ~ idh_efa_norm, data = idhdemo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7673 -2.4238 0.9237 2.3597 4.2718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.34932 0.46289 11.556 <2e-16 ***
## idh_efa_norm 0.07515 0.09288 0.809 0.42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.817 on 163 degrees of freedom
## Multiple R-squared: 0.004001, Adjusted R-squared: -0.00211
## F-statistic: 0.6547 on 1 and 163 DF, p-value: 0.4196
summary(lm(ide~hdi,data = idhdemo))
##
## Call:
## lm(formula = ide ~ hdi, data = idhdemo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6195 -1.0091 0.4006 1.2527 3.3699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.8224 0.6613 -2.756 0.00652 **
## hdi 9.7393 0.8923 10.915 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.817 on 163 degrees of freedom
## Multiple R-squared: 0.4223, Adjusted R-squared: 0.4187
## F-statistic: 119.1 on 1 and 163 DF, p-value: < 2.2e-16
CLUSTERING data
dataClus=theData
head(dataClus)
## hdiLife hdiSchoolExpec hdiMeanEduc hdiGni ideElectoral ideFunctioning
## 1 62.879 10.70538 2.514790 1335.206 0.00 0.07
## 2 76.833 14.48747 10.121144 15293.327 7.00 6.07
## 3 77.129 15.48788 6.987444 10978.406 3.08 2.50
## 4 61.929 12.16760 5.844292 5327.788 4.50 3.21
## 5 76.064 18.97951 11.144080 22047.971 9.17 5.00
## 6 73.372 14.40561 11.330300 15388.300 7.92 4.64
## ideParticipation ideCulture ideLiberties
## 1 0.00 1.25 0.00
## 2 5.00 6.25 7.06
## 3 3.89 5.00 3.82
## 4 5.56 5.00 2.65
## 5 7.22 3.75 7.94
## 6 6.11 3.13 5.29
row.names(dataClus)=idhdemo$country
distances
library(cluster)
g.dist = daisy(dataClus, metric="gower")
suggestions
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
K=3
pam
set.seed(123)
res.pam=pam(g.dist,k = K,cluster.only = F)
# nueva columna
dataClus$pam=res.pam$cluster
head(dataClus)
## hdiLife hdiSchoolExpec hdiMeanEduc hdiGni ideElectoral
## Afghanistan 62.879 10.70538 2.514790 1335.206 0.00
## Albania 76.833 14.48747 10.121144 15293.327 7.00
## Algeria 77.129 15.48788 6.987444 10978.406 3.08
## Angola 61.929 12.16760 5.844292 5327.788 4.50
## Argentina 76.064 18.97951 11.144080 22047.971 9.17
## Armenia 73.372 14.40561 11.330300 15388.300 7.92
## ideFunctioning ideParticipation ideCulture ideLiberties pam
## Afghanistan 0.07 0.00 1.25 0.00 1
## Albania 6.07 5.00 6.25 7.06 2
## Algeria 2.50 3.89 5.00 3.82 1
## Angola 3.21 5.56 5.00 2.65 1
## Argentina 5.00 7.22 3.75 7.94 2
## Armenia 4.64 6.11 3.13 5.29 2
agnes
res.agnes<- hcut(g.dist, k = K,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
diana
res.diana <- hcut(g.dist, k = K,hc_func='diana')
dataClus$diana=res.diana$cluster
Evaluate
fviz_silhouette(res.pam,print.summary = F)
fviz_silhouette(res.agnes,print.summary = F)
fviz_silhouette(res.diana,print.summary = F)
#knitr::spin('simpleCode_lastsessions.R', precious=TRUE)