Paola Nieto (20150967)
library(readxl)
IDH <- read_excel("2018_Statistical_Annex_Table_1.xlsx")
## New names:
## * `` -> ...1
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
str(IDH)
## tibble [265 × 15] (S3: tbl_df/tbl/data.frame)
## $ ...1 : chr [1:265] NA NA "HDI rank" NA ...
## $ Table 1. Human Development Index and its components: chr [1:265] NA NA "Country" NA ...
## $ ...3 : chr [1:265] NA "Human Development Index (HDI)" "Value" "2017" ...
## $ ...4 : logi [1:265] NA NA NA NA NA NA ...
## $ ...5 : chr [1:265] "SDG 3" "Life expectancy at birth" "(years)" "2017" ...
## $ ...6 : chr [1:265] NA NA NA NA ...
## $ ...7 : chr [1:265] "SDG 4.3" "Expected years of schooling" "(years)" "2017" ...
## $ ...8 : chr [1:265] NA NA NA "a" ...
## $ ...9 : chr [1:265] "SDG 4.6" "Mean years of schooling" "(years)" "2017" ...
## $ ...10 : chr [1:265] NA NA NA "a" ...
## $ ...11 : chr [1:265] "SDG 8.5" "Gross national income (GNI) per capita" "(2011 PPP $)" "2017" ...
## $ ...12 : chr [1:265] NA NA NA NA ...
## $ ...13 : chr [1:265] NA "GNI per capita rank minus HDI rank" NA "2017" ...
## $ ...14 : logi [1:265] NA NA NA NA NA NA ...
## $ ...15 : chr [1:265] NA "HDI rank" NA "2016" ...
IDH[,c(1,4,6,8,10,12,14)]=NULL
names(IDH) = c("country", "hdi", "Life expectancy", "Expected years of schooling", "Mean years of schooling", "GNI per capita", "GNI per capita rank minus HDI rank", "rank")
HumanDevelopment = c("Very High")
VeryHigh = IDH[5:64,]
VeryHigh=data.frame(VeryHigh, HumanDevelopment, stringsAsFactors = F)
VeryHigh = VeryHigh [-1,]
HumanDevelopment = c("High")
High = IDH[65:118,]
High = data.frame(High, HumanDevelopment, stringsAsFactors = F)
High = High [-1,]
HumanDevelopment = c("Medium")
Medium = IDH[119:158,]
Medium = data.frame(Medium, HumanDevelopment, stringsAsFactors = F)
Medium = Medium [-1,]
HumanDevelopment = c("Low")
Low = IDH[159:197,]
Low =data.frame(Low, HumanDevelopment, stringsAsFactors = F)
Low = Low [-1,]
HumanDevelopment = c("Other")
Other = IDH[198:204,]
Other =data.frame(Other, HumanDevelopment, stringsAsFactors = F)
Other = Other [-1,]
IDH2=rbind(VeryHigh, High, Medium, Low, Other)
IDH2[,c(7,8)]=NULL
str(IDH2)
## 'data.frame': 195 obs. of 7 variables:
## $ country : chr "Norway" "Switzerland" "Australia" "Ireland" ...
## $ hdi : chr "0.95252201967581829" "0.94399757027811748" "0.9386312851065749" "0.93841005899505603" ...
## $ Life.expectancy : chr "82.328000000000003" "83.472999999999999" "83.067999999999998" "81.643000000000001" ...
## $ Expected.years.of.schooling: chr "17.852060000000002" "16.208819999999999" "22.921250000000001" "19.61374" ...
## $ Mean.years.of.schooling : chr "12.56682" "13.407999999999999" "12.855040000000001" "12.526289999999999" ...
## $ GNI.per.capita : chr "68012.492920000004" "57625.069710000003" "43560.057739999997" "53754.186260000002" ...
## $ HumanDevelopment : chr "Very High" "Very High" "Very High" "Very High" ...
library(readr)
IDH2[,2:6]=lapply(IDH2[,2:6], as.numeric)
## Warning in lapply(IDH2[, 2:6], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH2[, 2:6], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH2[, 2:6], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH2[, 2:6], as.numeric): NAs introduced by coercion
## Warning in lapply(IDH2[, 2:6], as.numeric): NAs introduced by coercion
str(IDH2)
## 'data.frame': 195 obs. of 7 variables:
## $ country : chr "Norway" "Switzerland" "Australia" "Ireland" ...
## $ hdi : num 0.953 0.944 0.939 0.938 0.936 ...
## $ Life.expectancy : num 82.3 83.5 83.1 81.6 81.2 ...
## $ Expected.years.of.schooling: num 17.9 16.2 22.9 19.6 17 ...
## $ Mean.years.of.schooling : num 12.6 13.4 12.9 12.5 14.1 ...
## $ GNI.per.capita : num 68012 57625 43560 53754 46136 ...
## $ HumanDevelopment : chr "Very High" "Very High" "Very High" "Very High" ...
EPI <- read_excel("2018-epi.xlsx",
sheet = "2018EPI_CountrySnapshot")
EPI[,c(1:2)]=NULL
EPI[,c(2,6)]=NULL
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
EPI <- EPI %>% mutate(SDGI = replace(SDGI, SDGI == -9999, NA))
str(EPI)
## tibble [180 × 8] (S3: tbl_df/tbl/data.frame)
## $ country : chr [1:180] "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ PopGrowthRate: num [1:180] 2.69 -0.16 1.83 3.37 1.04 ...
## $ PopDensity : num [1:180] 53.1 105 17 23.1 229.5 ...
## $ GDP : num [1:180] 60.29 32.67 565.28 172.44 2.12 ...
## $ GDPpc : num [1:180] 1740 11359 13921 5985 21013 ...
## $ SDGI : num [1:180] 46.8 68.9 68.8 50.2 NA 72.5 71.7 75.9 81.4 70.8 ...
## $ EPI2018Score : num [1:180] 37.7 65.5 57.2 37.4 59.2 ...
## $ EPI2018Rank : num [1:180] 168 40 88 170 76 74 63 21 8 59 ...
nrow(merge(IDH2,EPI))
## [1] 162
datajunta0=merge(IDH2,EPI,all.x=T, all.y=T)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(datajunta0[!complete.cases(datajunta0),],type='html')%>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
font_size = 10)
| country | hdi | Life.expectancy | Expected.years.of.schooling | Mean.years.of.schooling | GNI.per.capita | HumanDevelopment | PopGrowthRate | PopDensity | GDP | GDPpc | SDGI | EPI2018Score | EPI2018Rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | Andorra | 0.8576836 | 81.663 | 13.52402 | 10.155450 | 47573.8701 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 6 | Antigua and Barbuda | 0.7795359 | 76.519 | 13.24949 | 9.236180 | 20763.6443 | High | 1.035422e+00 | 229.461364 | 2.121537e+00 | 21013.0176 | NA | 59.18 | 76 |
| 12 | Bahamas | 0.8071263 | 75.823 | 12.82263 | 11.079900 | 26680.6245 | Very High | 1.129473e+00 | 39.084116 | 8.168482e+00 | 20878.8695 | NA | 54.99 | 98 |
| 21 | Bolivia | NA | NA | NA | NA | NA | NA | 1.510047e+00 | 10.050662 | 7.303544e+01 | 6707.9562 | 64.7 | 55.98 | 92 |
| 22 | Bolivia (Plurinational State of) | 0.6925370 | 69.473 | 14.02423 | 8.915070 | 6714.0272 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 26 | Brunei Darussalam | 0.8532670 | 77.374 | 14.46502 | 9.060000 | 76427.2103 | Very High | 1.345029e+00 | 80.302846 | 3.038073e+01 | 71788.7832 | NA | 63.57 | 53 |
| 30 | Côte d’Ivoire | NA | NA | NA | NA | NA | NA | 2.510354e+00 | 74.515468 | 8.115294e+01 | 3424.7642 | 53.3 | 45.25 | 139 |
| 31 | Cabo Verde | 0.6540307 | 73.004 | 12.63420 | 6.124690 | 5983.1376 | Medium | 1.239581e+00 | 133.885856 | 3.277695e+00 | 6074.7545 | NA | 56.94 | 89 |
| 40 | Comoros | 0.5032536 | 63.912 | 11.24470 | 4.832000 | 1398.7975 | Low | 2.311191e+00 | 427.512628 | 1.122714e+00 | 1411.1523 | NA | 44.24 | 146 |
| 41 | Congo | 0.6062826 | 65.088 | 11.37073 | 6.310000 | 5694.2210 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 42 | Congo (Democratic Republic of the) | 0.4574692 | 60.031 | 9.75000 | 6.759480 | 795.8264 | Low | NA | NA | NA | NA | NA | NA | NA |
| 44 | Côte d’Ivoire | 0.4923045 | 54.102 | 9.03737 | 5.192000 | 3481.2253 | Low | NA | NA | NA | NA | NA | NA | NA |
| 48 | Czech Republic | NA | NA | NA | NA | NA | NA | 1.475671e-01 | 136.790999 | 3.311370e+02 | 31352.8220 | 81.9 | 67.68 | 33 |
| 49 | Czechia | 0.8875614 | 78.877 | 16.85478 | 12.740350 | 30588.3008 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 50 | Dem. Rep. Congo | NA | NA | NA | NA | NA | NA | 3.278534e+00 | 34.730665 | 5.857138e+01 | 743.8943 | 42.7 | 30.41 | 178 |
| 53 | Dominica | 0.7150566 | 78.032 | 12.70000 | 7.800000 | 8343.9306 | High | 5.194109e-01 | 98.057333 | 7.465602e-01 | 10151.3425 | NA | 59.38 | 73 |
| 58 | Equatorial Guinea | 0.5905610 | 57.939 | 9.30000 | 5.539090 | 19512.5325 | Medium | 3.847227e+00 | 43.546881 | 2.951429e+01 | 24162.5275 | NA | 60.40 | 71 |
| 59 | Eritrea | 0.4399785 | 65.536 | 5.37934 | 3.952450 | 1750.4590 | Low | 3.302730e+12 | 69.059406 | 7.795960e+00 | 1117.7004 | NA | 39.34 | 165 |
| 61 | Eswatini (Kingdom of) | 0.5883164 | 58.268 | 11.19615 | 6.521370 | 7619.9435 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 63 | Fiji | 0.7407880 | 70.417 | 15.30000 | 10.790000 | 8324.3100 | High | 7.382877e-01 | 49.193213 | 7.592063e+00 | 8447.2642 | NA | 53.09 | 107 |
| 72 | Grenada | 0.7719485 | 73.780 | 16.86987 | 8.675660 | 12863.7458 | High | 4.613812e-01 | 315.638235 | 1.413048e+00 | 13167.0521 | NA | 50.93 | 118 |
| 75 | Guinea-Bissau | 0.4553036 | 57.805 | 10.50000 | 3.003920 | 1551.8098 | Low | 2.519329e+00 | 64.569630 | 2.708444e+00 | 1491.6820 | NA | 44.67 | 143 |
| 79 | Hong Kong, China (SAR) | 0.9325829 | 84.097 | 16.32567 | 12.038130 | 58419.7099 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 84 | Iran | NA | NA | NA | NA | NA | NA | 1.148789e+00 | 49.287451 | 1.484950e+03 | 18497.7277 | 64.7 | 58.16 | 80 |
| 85 | Iran (Islamic Republic of) | 0.7980573 | 76.153 | 14.88064 | 9.840175 | 19130.2400 | High | NA | NA | NA | NA | NA | NA | NA |
| 95 | Kiribati | 0.6117840 | 66.506 | 12.89535 | 7.865530 | 3041.8666 | Medium | 1.753116e+00 | 141.228395 | 2.236600e-01 | 1955.1553 | NA | 55.26 | 95 |
| 96 | Korea (Democratic People’s Rep. of) | NA | 71.887 | 12.00025 | NA | NA | Other | NA | NA | NA | NA | NA | NA | NA |
| 97 | Korea (Republic of) | 0.9025611 | 82.361 | 16.49749 | 12.116330 | 35944.7095 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 100 | Lao People’s Democratic Republic | 0.6012757 | 67.021 | 11.20924 | 5.193850 | 6070.1156 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 101 | Laos | NA | NA | NA | NA | NA | NA | 1.406427e+00 | 29.282292 | 3.875593e+01 | 5734.5223 | 61.4 | 42.94 | 153 |
| 106 | Libya | 0.7055994 | 72.110 | 13.40250 | 7.310000 | 11100.0974 | High | 9.306745e-01 | 3.576647 | 9.096585e+01 | 14454.5031 | NA | 49.79 | 123 |
| 107 | Liechtenstein | 0.9160829 | 80.410 | 14.72093 | 12.548460 | 97335.7496 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 110 | Macedonia | NA | NA | NA | NA | NA | NA | 9.123870e-02 | 82.522046 | 2.716968e+01 | 13054.7770 | 69.4 | 61.06 | 68 |
| 114 | Maldives | 0.7168644 | 77.649 | 12.58675 | 6.311180 | 13567.3439 | High | 2.015177e+00 | 1391.640000 | 6.087624e+00 | 14581.4150 | NA | 52.14 | 111 |
| 117 | Marshall Islands | 0.7079474 | 73.620 | 13.00000 | 10.865770 | 5124.8962 | High | NA | NA | NA | NA | NA | NA | NA |
| 121 | Micronesia | NA | NA | NA | NA | NA | NA | 4.814453e-01 | 149.910000 | 3.414104e-01 | 3253.4798 | NA | 49.80 | 122 |
| 122 | Micronesia (Federated States of) | 0.6272547 | 69.316 | 11.70000 | 7.954740 | 3842.9071 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 123 | Moldova | NA | NA | NA | NA | NA | NA | -5.932920e-02 | 123.558714 | 1.756228e+01 | 4944.3357 | 74.2 | 51.97 | 112 |
| 124 | Moldova (Republic of) | 0.6997534 | 71.718 | 11.63386 | 11.595360 | 5553.8504 | High | NA | NA | NA | NA | NA | NA | NA |
| 125 | Monaco | NA | NA | NA | NA | NA | Other | NA | NA | NA | NA | NA | NA | NA |
| 132 | Nauru | NA | NA | 10.31429 | NA | 18572.9566 | Other | NA | NA | NA | NA | NA | NA | NA |
| 142 | Palau | 0.7984784 | 73.445 | 15.60380 | 12.327280 | 12830.5903 | High | NA | NA | NA | NA | NA | NA | NA |
| 143 | Palestine, State of | 0.6858355 | 73.646 | 12.82014 | 9.104640 | 5055.0862 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 145 | Papua New Guinea | 0.5443040 | 65.705 | 10.00000 | 4.580000 | 3402.5742 | Low | 2.064027e+00 | 17.853180 | 3.135592e+01 | 3878.2877 | NA | 39.35 | 164 |
| 152 | Republic of Congo | NA | NA | NA | NA | NA | NA | 2.572357e+00 | 15.009725 | 2.717401e+01 | 5301.3959 | 50.9 | 42.39 | 157 |
| 154 | Russia | NA | NA | NA | NA | NA | NA | 1.702445e-01 | 8.813796 | 3.581300e+03 | 24811.1442 | 68.9 | 63.79 | 52 |
| 155 | Russian Federation | 0.8162755 | 71.222 | 15.53573 | 12.019990 | 24232.5558 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 157 | São Tomé and PrÃÂncipe | NA | NA | NA | NA | NA | NA | 2.203582e+00 | 208.239583 | 6.001086e-01 | 3001.8940 | NA | 54.01 | 104 |
| 158 | Saint Kitts and Nevis | 0.7778446 | 74.372 | 14.38947 | 8.400000 | 23977.6260 | High | NA | NA | NA | NA | NA | NA | NA |
| 159 | Saint Lucia | 0.7470492 | 75.696 | 13.62860 | 8.936310 | 11694.7789 | High | 4.554919e-01 | 291.827869 | 2.138049e+00 | 12010.5015 | NA | 56.18 | 91 |
| 160 | Saint Vincent and the Grenadines | 0.7227079 | 73.294 | 13.25641 | 8.600000 | 10498.6929 | High | 1.716127e-01 | 281.135897 | 1.164780e+00 | 10623.3855 | NA | 66.48 | 36 |
| 161 | Samoa | 0.7128122 | 75.240 | 12.52000 | 10.315900 | 5909.3737 | High | 7.025260e-01 | 68.948763 | 1.154024e+00 | 5914.2813 | NA | 54.50 | 102 |
| 162 | San Marino | NA | NA | 15.11120 | NA | NA | Other | NA | NA | NA | NA | NA | NA | NA |
| 163 | Sao Tome and Principe | 0.5894763 | 66.762 | 12.46685 | 6.333470 | 2941.3128 | Medium | NA | NA | NA | NA | NA | NA | NA |
| 167 | Seychelles | 0.7965287 | 73.746 | 14.78051 | 9.500000 | 26076.8965 | High | 1.337635e+00 | 205.819565 | 2.491819e+00 | 26319.1586 | NA | 66.02 | 39 |
| 172 | Solomon Islands | 0.5459600 | 71.006 | 10.22041 | 5.540730 | 1872.4538 | Low | 2.011525e+00 | 21.415470 | 1.242422e+00 | 2072.7106 | NA | 43.22 | 151 |
| 173 | Somalia | NA | 56.714 | NA | NA | NA | Other | NA | NA | NA | NA | NA | NA | NA |
| 175 | South Korea | NA | NA | NA | NA | NA | NA | 4.513180e-01 | 525.704832 | 1.792870e+03 | 34985.7599 | 75.5 | 62.30 | 60 |
| 176 | South Sudan | 0.3877252 | 57.288 | 4.87162 | 4.849130 | 963.1741 | Low | NA | NA | NA | NA | NA | NA | NA |
| 181 | Swaziland | NA | NA | NA | NA | NA | NA | 1.809667e+00 | 78.087093 | 1.037361e+01 | 7723.6411 | 55.0 | 40.32 | 162 |
| 184 | Syrian Arab Republic | 0.5357037 | 70.963 | 8.75280 | 5.062500 | 2337.1701 | Low | NA | NA | NA | NA | NA | NA | NA |
| 185 | Taiwan | NA | NA | NA | NA | NA | NA | 2.596410e+11 | 650.744537 | 1.097717e+03 | 46602.3082 | NA | 72.84 | 23 |
| 187 | Tanzania | NA | NA | NA | NA | NA | NA | 3.092454e+00 | 62.736736 | 1.393800e+02 | 2508.0885 | 52.1 | 50.83 | 119 |
| 188 | Tanzania (United Republic of) | 0.5377147 | 66.310 | 8.92369 | 5.780000 | 2655.3938 | Low | NA | NA | NA | NA | NA | NA | NA |
| 190 | The former Yugoslav Republic of Macedonia | 0.7566872 | 75.851 | 13.32826 | 9.631960 | 12504.9456 | High | NA | NA | NA | NA | NA | NA | NA |
| 193 | Tonga | 0.7256447 | 73.179 | 14.30000 | 11.180000 | 5547.1730 | High | 7.101198e-01 | 148.780556 | 5.706675e-01 | 5327.2672 | NA | 62.49 | 57 |
| 198 | Tuvalu | NA | NA | NA | NA | 5887.7243 | Other | NA | NA | NA | NA | NA | NA | NA |
| 203 | United States | 0.9239136 | 79.541 | 16.46821 | 13.379990 | 54941.1093 | Very High | NA | NA | NA | NA | NA | NA | NA |
| 204 | United States of America | NA | NA | NA | NA | NA | NA | 6.928013e-01 | 35.324443 | 1.723620e+04 | 53341.7902 | 72.4 | 71.19 | 27 |
| 207 | Vanuatu | 0.6025739 | 72.334 | 10.89000 | 6.800000 | 2995.2797 | Medium | 2.167915e+00 | 22.182281 | 7.723966e-01 | 2856.4753 | NA | 44.55 | 144 |
| 208 | Venezuela | NA | NA | NA | NA | NA | NA | 1.317057e+00 | 35.789557 | 4.438401e+02 | 14059.7303 | 65.8 | 63.89 | 51 |
| 209 | Venezuela (Bolivarian Republic of) | 0.7607730 | 74.726 | 14.30000 | 10.323430 | 10671.5415 | High | NA | NA | NA | NA | NA | NA | NA |
| 211 | Yemen | 0.4519004 | 65.157 | 8.97700 | 3.000000 | 1239.2914 | Low | NA | NA | NA | NA | NA | NA | NA |
IDH2[IDH2$country=="United States",'country']="United States of America"
IDH2[IDH2$country=="Bolivia (Plurinational State of)",'country']="Bolivia"
IDH2[IDH2$country=="Venezuela (Bolivarian Republic of)",'country']="Venezuela"
IDH2[IDH2$country=="The former Yugoslav Republic of Macedonia",'country']="Macedonia"
IDH2[IDH2$country=="Tanzania (United Republic of)",'country']="Tanzania"
IDH2[IDH2$country=="Eswatini (Kingdom of)",'country']="Swaziland"
IDH2[IDH2$country=="Korea (Republic of)",'country']="South Korea"
IDH2[IDH2$country=="Russian Federation",'country']="Russia"
IDH2[IDH2$country=="Lao People's Democratic Republic",'country']="Laos"
EPI[EPI$country=="Côte d'Ivoire",'country']="Côte d'Ivoire"
EPI[EPI$country=="Czech Republic",'country']="Czechia"
EPI[EPI$country=="Republic of Congo",'country']="Congo"
IDH2[IDH2$country=="Congo (Democratic Republic of the)",'country']="Dem. Rep. Congo"
IDH2[IDH2$country=="Iran (Islamic Republic of)",'country']="Iran"
IDH2[IDH2$country=="Micronesia (Federated States of)",'country']="Micronesia"
IDH2[IDH2$country=="Moldova (Republic of)",'country']="Moldova"
datajunta=merge(IDH2,EPI)
nrow(datajunta)
## [1] 178
datajunta=na.omit(datajunta)
str(datajunta)
## 'data.frame': 155 obs. of 14 variables:
## $ country : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ hdi : num 0.498 0.785 0.754 0.581 0.825 ...
## $ Life.expectancy : num 64 78.5 76.3 61.8 76.7 ...
## $ Expected.years.of.schooling: num 10.4 14.8 14.4 11.8 17.4 ...
## $ Mean.years.of.schooling : num 3.78 10.03 7.97 5.13 9.95 ...
## $ GNI.per.capita : num 1824 11886 13802 5790 18461 ...
## $ HumanDevelopment : chr "Low" "High" "High" "Medium" ...
## $ PopGrowthRate : num 2.689 -0.16 1.825 3.368 0.985 ...
## $ PopDensity : num 53.1 105 17 23.1 16 ...
## $ GDP : num 60.3 32.7 565.3 172.4 810.7 ...
## $ GDPpc : num 1740 11359 13921 5985 18489 ...
## $ SDGI : num 46.8 68.9 68.8 50.2 72.5 71.7 75.9 81.4 70.8 64.6 ...
## $ EPI2018Score : num 37.7 65.5 57.2 37.4 59.3 ...
## $ EPI2018Rank : num 168 40 88 170 74 63 21 8 59 96 ...
## - attr(*, "na.action")= 'omit' Named int [1:23] 5 11 24 28 37 48 53 54 57 66 ...
## ..- attr(*, "names")= chr [1:23] "5" "11" "24" "28" ...
dontselect=c("country","HumanDevelopment","EPI2018Score", "EPI2018Rank")
select=setdiff(names(datajunta),dontselect)
theData= datajunta[,select]
# esta es:
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
#Explorar correlaciones:
#Sin evaluar significancia:
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)
#Evaluando significancia:
ggcorrplot(corMatrix,
p.mat = cor_pmat(corMatrix),
insig = "blank")
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.83
## MSA for each item =
## hdi Life.expectancy
## 0.78 0.84
## Expected.years.of.schooling Mean.years.of.schooling
## 0.90 0.83
## GNI.per.capita PopGrowthRate
## 0.74 0.89
## PopDensity GDP
## 0.75 0.79
## GDPpc SDGI
## 0.78 0.94
# Es mayor de 0.5, si podemos factorizar
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
Ambas salieron falsas, podemos continuar con la factorización
fa.parallel(theData,fm = 'ML', fa = 'fa')
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
Se sugiere 3 factores
#Redimensionamos
library(GPArotation)
resfa <- fa(theData,nfactors = 2,cor = 'mixed',rotate = "varimax",fm="minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## hdi 0.899 0.435
## Life.expectancy 0.826 0.370
## Expected.years.of.schooling 0.829 0.344
## Mean.years.of.schooling 0.858 0.291
## GNI.per.capita 0.415 0.911
## PopGrowthRate -0.707 0.104
## PopDensity 0.326
## GDP 0.160
## GDPpc 0.414 0.880
## SDGI 0.941 0.230
##
## MR1 MR2
## SS loadings 4.667 2.311
## Proportion Var 0.467 0.231
## Cumulative Var 0.467 0.698
Resultado mejorado
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## hdi 0.899
## Life.expectancy 0.826
## Expected.years.of.schooling 0.829
## Mean.years.of.schooling 0.858
## GNI.per.capita 0.911
## PopGrowthRate -0.707
## PopDensity
## GDP
## GDPpc 0.880
## SDGI 0.941
##
## MR1 MR2
## SS loadings 4.667 2.311
## Proportion Var 0.467 0.231
## Cumulative Var 0.467 0.698
fa.diagram(resfa)
¿La RaÃz del error cuadrático medio corregida está cerca a cero?
resfa$crms
## [1] 0.02372592
¿La RaÃz del error cuadrático medio de aproximación es menor a 0.05?
resfa$RMSEA
## RMSEA lower upper confidence
## 0.1699666 0.1438317 0.1985637 0.9000000
¿El Ãndice de Tucker-Lewis es mayor a 0.9?
resfa$TLI
## [1] 0.8928612
¿Qué variables aportaron mas a los factores?
sort(resfa$communality)
## GDP PopDensity
## 0.0330759 0.1061881
## PopGrowthRate Expected.years.of.schooling
## 0.5103593 0.8053951
## Life.expectancy Mean.years.of.schooling
## 0.8192786 0.8204888
## SDGI GDPpc
## 0.9392638 0.9451553
## hdi GNI.per.capita
## 0.9969126 1.0023686
¿Qué variables contribuyen a mas de un factor?
sort(resfa$complexity)
## PopDensity PopGrowthRate
## 1.000162 1.042877
## SDGI Mean.years.of.schooling
## 1.118842 1.227939
## Expected.years.of.schooling Life.expectancy
## 1.334180 1.386668
## GNI.per.capita GDPpc
## 1.397294 1.421721
## hdi GDP
## 1.444666 1.548114
Darles nombres
as.data.frame(resfa$scores)%>%head()
datajuntaFA=cbind(datajunta[1],as.data.frame(resfa$scores))
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(data=datajuntaFA, x = ~MR1, y = ~MR2, text=~country) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Factor1'),
yaxis = list(title = 'Factor2')))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
library(fpc)
library(cluster)
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:fpc':
##
## dbscan
g.dist.cmd = daisy(datajuntaFA[,c(2:3)], metric = 'euclidean')
kNNdistplot(g.dist.cmd, k=2)
abline(h=0.63,col='red')
Para tener una idea de cada quien:
resDB=fpc::dbscan(g.dist.cmd, eps=0.63, MinPts=2,method = 'dist')
datajuntaFA$clustDB=as.factor(resDB$cluster)
aggregate(cbind(MR1, MR2) # dependientes
~ clustDB, # nivel
data = datajuntaFA, # data
max) # operacion
plot_ly(data=datajuntaFA, x = ~MR1, y = ~MR2, text=~country, color = ~clustDB) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Demo'),
yaxis = list(title = 'Tranquilidad')))
Finalmente, veamos relaciones:
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse
## The following object is masked from 'package:base':
##
## isFALSE
datajunta$fa1=normalize(datajuntaFA$MR1,
method = "range",
margin=2, # by column
range = c(0, 10))
datajunta$fa2=normalize(datajuntaFA$MR2,
method = "range",
margin=2, # by column
range = c(0, 10))
You can see them all here:
plot(datajunta[,c("hdi","EPI2018Score","fa1","fa2")])