Principal Component Analysis (PCA)

Loading the dataset- human

# read the human data
human <- read.table("http://s3.amazonaws.com/assets.datacamp.com/production/course_2218/datasets/human1.txt", sep  =",", header = T)

# look at the (column) names of human
names(human)
##  [1] "HDI.Rank"       "Country"        "HDI"            "Life.Exp"      
##  [5] "Edu.Exp"        "Edu.Mean"       "GNI"            "GNI.Minus.Rank"
##  [9] "GII.Rank"       "GII"            "Mat.Mor"        "Ado.Birth"     
## [13] "Parli.F"        "Edu2.F"         "Edu2.M"         "Labo.F"        
## [17] "Labo.M"         "Edu2.FM"        "Labo.FM"
# look at the structure of human
str(human)
## 'data.frame':    195 obs. of  19 variables:
##  $ HDI.Rank      : int  1 2 3 4 5 6 6 8 9 9 ...
##  $ Country       : Factor w/ 195 levels "Afghanistan",..: 129 10 169 48 124 67 84 186 34 125 ...
##  $ HDI           : num  0.944 0.935 0.93 0.923 0.922 0.916 0.916 0.915 0.913 0.913 ...
##  $ Life.Exp      : num  81.6 82.4 83 80.2 81.6 80.9 80.9 79.1 82 81.8 ...
##  $ Edu.Exp       : num  17.5 20.2 15.8 18.7 17.9 16.5 18.6 16.5 15.9 19.2 ...
##  $ Edu.Mean      : num  12.6 13 12.8 12.7 11.9 13.1 12.2 12.9 13 12.5 ...
##  $ GNI           : Factor w/ 194 levels "1,096","1,123",..: 166 135 156 139 140 137 127 154 134 117 ...
##  $ GNI.Minus.Rank: int  5 17 6 11 9 11 16 3 11 23 ...
##  $ GII.Rank      : int  1 2 3 4 5 6 6 8 9 9 ...
##  $ GII           : num  0.067 0.11 0.028 0.048 0.062 0.041 0.113 0.28 0.129 0.157 ...
##  $ Mat.Mor       : int  4 6 6 5 6 7 9 28 11 8 ...
##  $ Ado.Birth     : num  7.8 12.1 1.9 5.1 6.2 3.8 8.2 31 14.5 25.3 ...
##  $ Parli.F       : num  39.6 30.5 28.5 38 36.9 36.9 19.9 19.4 28.2 31.4 ...
##  $ Edu2.F        : num  97.4 94.3 95 95.5 87.7 96.3 80.5 95.1 100 95 ...
##  $ Edu2.M        : num  96.7 94.6 96.6 96.6 90.5 97 78.6 94.8 100 95.3 ...
##  $ Labo.F        : num  61.2 58.8 61.8 58.7 58.5 53.6 53.1 56.3 61.6 62 ...
##  $ Labo.M        : num  68.7 71.8 74.9 66.4 70.6 66.4 68.1 68.9 71 73.8 ...
##  $ Edu2.FM       : num  1.007 0.997 0.983 0.989 0.969 ...
##  $ Labo.FM       : num  0.891 0.819 0.825 0.884 0.829 ...
# access the stringr package
library("stringr")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("GGally")
## Warning: package 'GGally' was built under R version 3.6.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library("corrplot")
## Warning: package 'corrplot' was built under R version 3.6.3
## corrplot 0.84 loaded
# look at the structure of the GNI column in 'human'
str(human$GNI)
##  Factor w/ 194 levels "1,096","1,123",..: 166 135 156 139 140 137 127 154 134 117 ...
# remove the commas from GNI and print out a numeric version of it
human$GNI <- str_replace(human$GNI, pattern=",", replace ="") %>% as.numeric(human$GNI)

# columns to keep
keep <- c("Country", "Edu2.FM", "Labo.FM", "Life.Exp", "Edu.Exp", "GNI", "Mat.Mor", "Ado.Birth", "Parli.F")

# select the 'keep' columns
human <- select(human, one_of(keep))

# print out a completeness indicator of the 'human' data
complete.cases(human)
##   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
##  [13] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [61]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [85]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
##  [97]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [109] FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133] FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [145]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
## [157] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [169] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [181]  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [193]  TRUE  TRUE  TRUE
# print out the data along with a completeness indicator as the last column
data.frame(human[-1], comp = complete.cases(human))
##       Edu2.FM   Labo.FM Life.Exp Edu.Exp    GNI Mat.Mor Ado.Birth Parli.F  comp
## 1   1.0072389 0.8908297     81.6    17.5  64992       4       7.8    39.6  TRUE
## 2   0.9968288 0.8189415     82.4    20.2  42261       6      12.1    30.5  TRUE
## 3   0.9834369 0.8251001     83.0    15.8  56431       6       1.9    28.5  TRUE
## 4   0.9886128 0.8840361     80.2    18.7  44025       5       5.1    38.0  TRUE
## 5   0.9690608 0.8286119     81.6    17.9  45435       6       6.2    36.9  TRUE
## 6   0.9927835 0.8072289     80.9    16.5  43919       7       3.8    36.9  TRUE
## 7   1.0241730 0.7797357     80.9    18.6  39568       9       8.2    19.9  TRUE
## 8   1.0031646 0.8171263     79.1    16.5  52947      28      31.0    19.4  TRUE
## 9   1.0000000 0.8676056     82.0    15.9  42155      11      14.5    28.2  TRUE
## 10  0.9968520 0.8401084     81.8    19.2  32689       8      25.3    31.4  TRUE
## 11  0.9148148 0.7616580     83.0    15.4  76628       6       6.0    25.3  TRUE
## 12  0.9116162 0.7566372     84.0    15.6  53959      NA       3.3      NA FALSE
## 13         NA        NA     80.0    15.0  79851      NA        NA    20.0 FALSE
## 14  0.9908362 0.8880707     82.2    15.8  45636       4       6.5    43.6  TRUE
## 15  0.9989990 0.8107715     80.7    16.2  39267       8      25.8    23.5  TRUE
## 16  0.9934498 0.9108527     82.6    19.0  35182       4      11.5    41.3  TRUE
## 17  0.8641975 0.6948682     81.9    16.9  33890      27       2.2    16.3  TRUE
## 18  0.9667812 0.8379161     82.4    16.0  30676       2       7.8    22.5  TRUE
## 19  1.0000000 0.7848297     81.7    13.9  58711      11       8.3    28.3  TRUE
## 20  1.0139860 0.6931818     83.5    15.3  36927       6       5.4    11.6  TRUE
## 21  0.9348613 0.8010118     80.8    16.3  41187       6       6.7    42.4  TRUE
## 22  0.9375000 0.8230519     82.2    16.0  38056      12       5.7    25.7  TRUE
## 23  1.0000000 0.8064993     81.4    15.7  43869       4       4.1    30.3  TRUE
## 24  1.0000000 0.8703125     80.8    17.1  38695       4       9.2    42.5  TRUE
## 25  0.9775510 0.8275316     80.4    16.8  27852       7       0.6    27.7  TRUE
## 26  0.9138167 0.7978723     82.6    17.3  32045       4      10.6    38.0  TRUE
## 27  0.8844720 0.6655462     83.1    16.0  33030       4       4.0    30.1  TRUE
## 28  1.0020060 0.7481698     78.6    16.4  26660       5       4.9    18.9  TRUE
## 29  0.8880597 0.7072000     80.9    17.6  24524       5      11.9    21.0  TRUE
## 30  1.0000000 0.8156749     76.8    16.5  25214      11      16.8    19.8  TRUE
## 31  0.9424779 0.6985392     78.8    14.5  72570      27      23.0      NA FALSE
## 32  0.9302326 0.7876231     80.2    14.0  28633      10       5.5    12.5  TRUE
## 33  1.1305085 0.5319372     78.2    13.8 123124       6       9.5     0.0  TRUE
## 34  1.0040568        NA     81.3    13.5  43978      NA        NA    50.0 FALSE
## 35  0.9959799 0.7448980     76.3    15.1  25845       7      15.9    18.7  TRUE
## 36  0.9286550 0.7534669     77.4    15.5  23177       3      12.2    22.1  TRUE
## 37  0.9448568 0.8291233     73.3    16.4  24500      11      10.6    23.4  TRUE
## 38  0.8772379 0.5716440     80.6    14.4  27930       9      18.2    13.0  TRUE
## 39  0.8605974 0.2579821     74.3    16.3  52821      16      10.2    19.9  TRUE
## 40  0.9774306 0.6333333     76.3    17.9  22050      69      54.4    36.8  TRUE
## 41  1.1944444 0.5054348     77.0    13.3  60868       8      27.6    17.5  TRUE
## 42  0.9594241 0.6577540     81.7    15.2  21290      22      55.3    15.8  TRUE
## 43  0.9896266 0.8293051     80.9    16.3  25757       8      12.6    31.3  TRUE
## 44  0.9918946 0.7466667     75.2    15.4  22916      14      12.1    10.1  TRUE
## 45  1.1031128 0.4510932     76.6    14.4  38599      22      13.8    15.0  TRUE
## 46  0.9989899 0.8121302     74.2    15.2  22281      13      13.5    18.0  TRUE
## 47  0.9081197 0.7654110     77.3    14.8  19409      13      12.7    25.8  TRUE
## 48  0.9875666 0.5246691     74.4    14.7  83961      14      14.5     1.5  TRUE
## 49  0.8891235 0.7504363     76.2    15.2  14558       7      15.2    17.3  TRUE
## 50  0.9436009 0.7939778     71.3    15.7  16676       1      20.6    30.1  TRUE
## 51  0.9686486 0.7963738     70.1    14.7  22352      24      25.7    14.5  TRUE
## 52  0.8266200 0.3510896     76.8    13.6  34858      11      10.6     9.6  TRUE
## 53  0.9358696 0.7503852     74.7    14.2  18108      33      31.0    12.0  TRUE
## 54  1.0815109 0.7239583     77.2    15.5  19283      14      58.3    11.5  TRUE
## 55  1.0410959 0.8738966     75.4    12.6  21336      37      28.5    16.7  TRUE
## 56  0.9645749 0.8690629     69.4    15.0  20867      26      29.9    20.1  TRUE
## 57  1.0205245 0.8603133     75.6    15.4  12488      52      48.4    19.6  TRUE
## 58         NA        NA     76.1    14.0  20070      NA      49.3    25.7 FALSE
## 59  0.9717868 0.8118644     74.2    14.4  15596       5      35.9    20.4  TRUE
## 60         NA        NA     72.7    13.7  13496      NA        NA    10.3 FALSE
## 61  1.0821643 0.5990220     77.6    13.3  18192      85      78.5    19.3  TRUE
## 62  0.9130435 0.5880795     74.7    12.7  22762      29       5.7    14.2  TRUE
## 63  0.8517241 0.5876011     74.4    15.6  17470      73      30.9    11.6  TRUE
## 64  1.0045045        NA     73.1    13.4  23300      NA      56.3    43.8 FALSE
## 65  0.9802956 0.7019868     70.4    12.3  26090      84      34.8    24.7  TRUE
## 66  0.7934783 0.7307061     74.9    14.4  12190      16      16.9    34.0  TRUE
## 67  0.9428934 0.6200000     79.4    13.8   7301      80      43.1    48.9  TRUE
## 68  0.9566787 0.3286319     79.3    13.8  16509      16      12.0     3.1  TRUE
## 69  1.0039604 0.5898734     79.4    13.9  13413      38      60.8    33.3  TRUE
## 70  0.9201183 0.2255435     75.4    15.1  15440      23      31.6     3.1  TRUE
## 71  1.1141732 0.6452020     74.2    14.2  16159     110      83.2    17.0  TRUE
## 72  0.6500000 0.4152542     75.3    14.5  18677      20      30.9    14.4  TRUE
## 73  0.9515707 0.4600262     74.9    13.7   9779      29      16.9     5.8  TRUE
## 74  0.9191419 0.5644556     76.8    13.1  16056      49      63.4    37.1  TRUE
## 75  1.0419847 0.7351485     74.5    15.2  15175      69      70.8     9.6  TRUE
## 76  0.9676375 0.7523302     74.9    13.8   7164      41      46.8    11.3  TRUE
## 77         NA        NA     73.8    12.9  20805      NA        NA     6.7 FALSE
## 78  0.9620123 0.9037356     70.8    11.9  16428      26      40.0    15.6  TRUE
## 79         NA        NA     73.4    15.8  10939      23      35.4    25.0 FALSE
## 80  0.8853503 0.2342342     74.0    13.5  11365      50      26.5    11.6  TRUE
## 81  0.7230216 0.6385185     75.4    13.4  11780       7      18.3    33.3  TRUE
## 82  0.9562044 0.7952167     71.0    15.1   8178      23      25.7    11.8  TRUE
## 83  0.8612903 0.2105263     74.8    14.0  13054      89      10.0    25.7  TRUE
## 84  0.8517398 0.8080569     74.6    13.1  11015      89      50.7    22.3  TRUE
## 85  0.9306030 0.6854962     77.8    11.8   9943      21      15.3    20.7  TRUE
## 86  0.9894737 0.7465565     74.7    12.3   8124      29      27.1    10.7  TRUE
## 87  0.6432665 0.5951134     76.5    13.6   9638       8      15.1    19.3  TRUE
## 88  1.0177665 0.6614268     75.9    14.2  10605      87      77.0    41.6  TRUE
## 89         NA 0.8228346     75.1    12.6   9765      34      56.3    20.7 FALSE
## 90  0.8164117 0.8160920     75.8    13.1  12547      32       8.6    23.6  TRUE
## 91  0.9953488 0.5208333     70.0    15.7   7493      59      42.8    14.0  TRUE
## 92  1.0142687 0.8167388     69.4    14.6  10729      68      18.7    14.9  TRUE
## 93  0.8750000 0.7967782     74.4    13.5  13323      26      41.0     6.1  TRUE
## 94  1.2801724        NA     77.8    12.7   9994      NA        NA    21.9 FALSE
## 95  1.3245823 0.3926702     71.6    14.0  14911      15       2.5    16.0  TRUE
## 96  0.7114967 0.3540197     74.8    14.6  10404      46       4.6    31.3  TRUE
## 97  1.0233813 0.7001255     74.0    13.5  12040      83      68.5    20.9  TRUE
## 98         NA 0.7141026     72.9    13.4   9937      45      54.5    13.0 FALSE
## 99  1.0541311 0.7912553     75.7    12.4   7415      80      70.1    16.7  TRUE
## 100 0.9909400 0.7171582     72.8    14.7   5069     120      18.1     0.0  TRUE
## 101 1.0079156 0.5978129     70.0    13.6   7614      45      71.4    13.3  TRUE
## 102 1.0470810 0.6526718     73.5    13.1  11883     100      99.6    19.1  TRUE
## 103 0.9469214 0.5886628     71.1    12.7  15617     130      35.2    11.8  TRUE
## 104 0.8348624 0.7251613     76.8    13.0  12328      31       4.2     5.9  TRUE
## 105 1.0716667 0.4023973     73.4    12.9   5327      58      28.3     6.1  TRUE
## 106 0.9448010 0.8811275     64.5    12.5  16646     170      44.2     9.5  TRUE
## 107 0.9689441 0.8506787     71.6    11.9   5223      21      29.3    20.8  TRUE
## 108 0.7244224 0.3168449     71.1    13.5  10512      45      43.0     2.2  TRUE
## 109        NA 0.6098830     65.6    10.8  13066      61      18.0    25.8 FALSE
## 110 1.4930748 0.8593272     64.4    12.5  16367     240     103.0    16.2  TRUE
## 111 0.8109756 0.6104513     68.9    13.0   9788     190      48.3    17.1  TRUE
## 112 0.8558140 0.6568396     72.9    11.9   7643     110      67.0    16.8  TRUE
## 113 0.9074074 0.2319277     72.9    13.0   4699      NA      45.8      NA FALSE
## 114        NA 0.6362434     68.4    11.5   5567      36      38.8    16.4 FALSE
## 115 1.0345369 0.6411543     68.2    11.3   7915     120      46.8    27.1  TRUE
## 116 0.8440367 0.6050633     73.0    12.3   7349      69      76.0    27.4  TRUE
## 117 0.9578393 0.7355372     57.4    13.6  12122     140      50.9    40.7  TRUE
## 118 0.8342697 0.8880779     75.8    11.9   5092      49      29.0    24.3  TRUE
## 119 0.8054146 0.7935723     68.3    13.2   5760     200      71.9    51.8  TRUE
## 120 0.9762397 0.7044025     70.6    12.5   3044      75      29.3    23.3  TRUE
## 121 0.5537849 0.2134670     69.4    10.1  14003      67      68.7    26.5  TRUE
## 122        NA 0.6152927     73.3    13.5   6094      53      70.6    20.8 FALSE
## 123        NA        NA     69.1    11.7   3432      96      18.6     0.0 FALSE
## 124 1.2615063 0.5291925     66.4    10.3   6522     250      88.5    31.3  TRUE
## 125 1.0287206 0.5902864     74.9    11.5   4457     100     100.8    39.1  TRUE
## 126 0.6854305 0.3496042     74.0    11.6   6850     120      35.8    11.0  TRUE
## 127 0.9680233 0.8587127     64.8    11.3   9418     130      54.9    37.7  TRUE
## 128 0.9439655 0.5589569     71.8    10.7   6929     140      97.2    13.3  TRUE
## 129 1.0427632 0.7639429     69.4    11.2   2517      44      42.8    15.2  TRUE
## 130 0.4770318 0.3379224     68.0    11.7   5497     190      32.8    12.2  TRUE
## 131 1.0852713 0.5162847     73.1    11.1   3938     120      84.0    25.8  TRUE
## 132 0.9855072 0.8639896     69.5    12.6   7176     120      40.9     8.3  TRUE
## 133        NA 0.4842520     68.2    11.7   5363     270      52.2    38.5 FALSE
## 134 0.7283951 0.1856946     69.6    12.3   2728      49      41.6    12.4  TRUE
## 135        NA 0.7687500     71.9    10.6   2803      86      44.8     0.0 FALSE
## 136 0.8446809 0.9383562     62.3    11.1   6012     410     126.7    11.5  TRUE
## 137        NA        NA     66.0    12.3   2434     130      16.6     8.7 FALSE
## 138        NA 0.8752711     57.6     9.0  21056     290     112.6    19.7 FALSE
## 139 0.5863636 0.8539720     60.1    13.5   3734     280     125.4    12.7  TRUE
## 140 0.6986090 0.9425770     61.4    11.5   3852     380      58.4    10.9  TRUE
## 141 0.6189189 0.9646018     66.2    10.6   4680      NA      65.0    25.0 FALSE
## 142 0.8256659 0.6825208     71.6    10.0   3191     170      80.6    20.0  TRUE
## 143 0.4323144 0.9109827     68.4    10.9   2949     170      44.3    19.0  TRUE
## 144        NA 0.5822622     66.5    11.3   2918     210      65.1    18.2 FALSE
## 145 0.8057325 0.8591160     61.6    11.0   2762     400      93.6    20.8  TRUE
## 146 0.4633508 0.9173364     69.6    12.4   2311     190      73.7    29.5  TRUE
## 147 0.4186551 0.2967431     66.2     7.8   4866     170      27.3    19.7  TRUE
## 148 1.4967320 0.9137303     65.9     8.6   4608     200      12.1     4.7  TRUE
## 149        NA 0.8231469     52.3    11.4   6822     460     170.2    36.8 FALSE
## 150 0.8423077 0.6131285     49.0    11.3   5542     310      72.0    14.7  TRUE
## 151 0.5894737 0.9767184     65.0     9.2   2411     410     122.7    36.0  TRUE
## 152        NA 0.7566719     52.8     9.0   5341     560     119.6     6.6 FALSE
## 153 0.6103152 0.8307292     55.5    10.4   2803     590     115.8    27.1  TRUE
## 154        NA 0.9569061     65.1    10.3   1328     440     122.8    20.5 FALSE
## 155 0.7854839 0.9275362     57.5    10.9   1615     470      60.3    35.1  TRUE
## 156 0.3971292 0.3628319     63.1     8.5   3560     320      73.3    22.2  TRUE
## 157        NA 0.6759494     67.9     9.2   1540     130      64.9     2.0 FALSE
## 158 0.5241379 0.9527027     62.6     9.9   2463     220      62.1     2.7  TRUE
## 159        NA 0.4394507     63.3    11.5   1456     350      51.1     3.0 FALSE
## 160 0.3220974 0.3518006     63.8     9.2   3519     270      47.0     0.7  TRUE
## 161 1.1526316 0.8027211     49.8    11.1   3306     490      89.4    26.8  TRUE
## 162 0.3995037 0.9913899     59.7    12.2   1228     450      91.5    17.6  TRUE
## 163 0.6363636 0.8577465     62.8     8.7   1669     380      42.0     3.5  TRUE
## 164 0.9090909 1.0128957     64.2    10.3   1458     320      33.6    57.5  TRUE
## 165 0.6835821 0.9570707     58.5     9.8   1613     360     126.6    35.0  TRUE
## 166 0.4185185 0.8633461     59.6    11.1   1767     340      90.2     8.4  TRUE
## 167 0.6648352 0.4118421     63.5     7.0   3809     360      84.0    23.8  TRUE
## 168        NA 0.5361891     62.0     6.4   3276     230      18.6    12.7 FALSE
## 169        NA        NA     55.7     7.6   2332     730      75.3    24.3 FALSE
## 170 0.4675325 0.7500000     66.5     7.9   2188     320      94.4    42.7  TRUE
## 171 0.1979866 0.1987421     60.4     9.3   1885     400      86.8    27.6  TRUE
## 172 0.4651163 0.6437346     51.5     8.9   3171     720     130.3     9.2  TRUE
## 173 0.5138889 1.0380368     62.8    10.8    747     510     144.8    16.7  TRUE
## 174 0.4285714 0.8756999     64.1     8.5   1428     420      78.4    25.5  TRUE
## 175 0.5523810 0.8709288     60.2     8.8   1507     430     115.8     9.4  TRUE
## 176 0.3950617 0.9658470     58.7     9.8    680     730     135.3     8.2  TRUE
## 177 0.3918575 0.8981481     60.9     9.5    805     640     117.4    10.7  TRUE
## 178        NA 0.8687898     55.2     9.0   1362     560      99.3    13.7 FALSE
## 179 0.5099338 0.6240786     58.0     8.4   1583     550     175.6     9.5  TRUE
## 180 0.2258065 1.0326087     55.1     9.3   1123     480     137.8    39.6  TRUE
## 181 0.4608295 0.9521739     50.9     8.6   1780    1100     100.7    12.4  TRUE
## 182        NA 0.8378033     58.8     8.7   1096     650     131.0    21.9 FALSE
## 183 0.2812500 0.8566667     58.7     7.8   1591     400     115.4    13.3  TRUE
## 184 0.6385542 1.0158537     56.7    10.1    758     740      30.3    34.9  TRUE
## 185 0.1717172 0.8080808     51.6     7.4   2085     980     152.0    14.9  TRUE
## 186        NA 0.8908686     63.7     4.1   1130     380      65.3    22.0 FALSE
## 187 0.3782772 0.8531140     50.7     7.2    581     880      98.3    12.5  TRUE
## 188 0.3076923 0.4459309     61.4     5.4    908     630     204.8    13.3  TRUE
## 189 0.7289916 0.3081009     70.6    12.0  15722     155      45.4    14.0  TRUE
## 190 0.8250377 0.7884131     74.0    12.7  11449      72      21.2    18.7  TRUE
## 191 0.8784119 0.6514286     72.3    13.6  12791      28      30.8    19.0  TRUE
## 192 0.9836957 0.6729323     75.0    14.0  14242      85      68.3    27.0  TRUE
## 193 0.5329670 0.3711083     68.4    11.2   5605     183      38.7    17.5  TRUE
## 194 0.7015873 0.8537859     58.5     9.6   3363     506     109.7    22.5  TRUE
## 195 0.8333333 0.6558018     71.5    12.2  14301     210      47.4    21.8  TRUE
# filter out all rows with NA values
human <- filter(human, complete.cases(human))

# human without NA is available

# look at the last 10 observations
tail(human, 10)
##                             Country   Edu2.FM   Labo.FM Life.Exp Edu.Exp   GNI
## 153                            Chad 0.1717172 0.8080808     51.6     7.4  2085
## 154        Central African Republic 0.3782772 0.8531140     50.7     7.2   581
## 155                           Niger 0.3076923 0.4459309     61.4     5.4   908
## 156                     Arab States 0.7289916 0.3081009     70.6    12.0 15722
## 157       East Asia and the Pacific 0.8250377 0.7884131     74.0    12.7 11449
## 158         Europe and Central Asia 0.8784119 0.6514286     72.3    13.6 12791
## 159 Latin America and the Caribbean 0.9836957 0.6729323     75.0    14.0 14242
## 160                      South Asia 0.5329670 0.3711083     68.4    11.2  5605
## 161              Sub-Saharan Africa 0.7015873 0.8537859     58.5     9.6  3363
## 162                           World 0.8333333 0.6558018     71.5    12.2 14301
##     Mat.Mor Ado.Birth Parli.F
## 153     980     152.0    14.9
## 154     880      98.3    12.5
## 155     630     204.8    13.3
## 156     155      45.4    14.0
## 157      72      21.2    18.7
## 158      28      30.8    19.0
## 159      85      68.3    27.0
## 160     183      38.7    17.5
## 161     506     109.7    22.5
## 162     210      47.4    21.8
# last indice we want to keep
last <- nrow(human) - 7

# choose everything until the last 7 observations
human <- human[1:last, ]

# add countries as rownames
rownames(human) <- human$Country

# remove the Country variable
human <- select(human, -Country)
head(human)
##               Edu2.FM   Labo.FM Life.Exp Edu.Exp   GNI Mat.Mor Ado.Birth
## Norway      1.0072389 0.8908297     81.6    17.5 64992       4       7.8
## Australia   0.9968288 0.8189415     82.4    20.2 42261       6      12.1
## Switzerland 0.9834369 0.8251001     83.0    15.8 56431       6       1.9
## Denmark     0.9886128 0.8840361     80.2    18.7 44025       5       5.1
## Netherlands 0.9690608 0.8286119     81.6    17.9 45435       6       6.2
## Germany     0.9927835 0.8072289     80.9    16.5 43919       7       3.8
##             Parli.F
## Norway         39.6
## Australia      30.5
## Switzerland    28.5
## Denmark        38.0
## Netherlands    36.9
## Germany        36.9
# visualize the 'human_' variables
ggpairs(human)

# compute the correlation matrix and visualize it with corrplot

cor(human)
##                Edu2.FM      Labo.FM   Life.Exp     Edu.Exp         GNI
## Edu2.FM    1.000000000  0.009564039  0.5760299  0.59325156  0.43030485
## Labo.FM    0.009564039  1.000000000 -0.1400125  0.04732183 -0.02173971
## Life.Exp   0.576029853 -0.140012504  1.0000000  0.78943917  0.62666411
## Edu.Exp    0.593251562  0.047321827  0.7894392  1.00000000  0.62433940
## GNI        0.430304846 -0.021739705  0.6266641  0.62433940  1.00000000
## Mat.Mor   -0.660931770  0.240461075 -0.8571684 -0.73570257 -0.49516234
## Ado.Birth -0.529418415  0.120158862 -0.7291774 -0.70356489 -0.55656208
## Parli.F    0.078635285  0.250232608  0.1700863  0.20608156  0.08920818
##              Mat.Mor  Ado.Birth     Parli.F
## Edu2.FM   -0.6609318 -0.5294184  0.07863528
## Labo.FM    0.2404611  0.1201589  0.25023261
## Life.Exp  -0.8571684 -0.7291774  0.17008631
## Edu.Exp   -0.7357026 -0.7035649  0.20608156
## GNI       -0.4951623 -0.5565621  0.08920818
## Mat.Mor    1.0000000  0.7586615 -0.08944000
## Ado.Birth  0.7586615  1.0000000 -0.07087810
## Parli.F   -0.0894400 -0.0708781  1.00000000
cor(human)%>%corrplot()

From the correlation plot, we can see the correlation of each variables with itself and the other variables. As we can see Edu.Exp shows high correlation with Life.Exp whereas Life.Exp shows very less correlation with Labo.FM

PCA using Singular Value Decomposition (SVD) method

# standardize the variables

human_std <- scale(human)
#human_std

# print out summaries of the standardized variables
summary(human_std)
##     Edu2.FM           Labo.FM           Life.Exp          Edu.Exp       
##  Min.   :-2.8189   Min.   :-2.6247   Min.   :-2.7188   Min.   :-2.7378  
##  1st Qu.:-0.5233   1st Qu.:-0.5484   1st Qu.:-0.6425   1st Qu.:-0.6782  
##  Median : 0.3503   Median : 0.2316   Median : 0.3056   Median : 0.1140  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.5958   3rd Qu.: 0.7350   3rd Qu.: 0.6717   3rd Qu.: 0.7126  
##  Max.   : 2.6646   Max.   : 1.6632   Max.   : 1.4218   Max.   : 2.4730  
##       GNI             Mat.Mor          Ado.Birth          Parli.F       
##  Min.   :-0.9193   Min.   :-0.6992   Min.   :-1.1325   Min.   :-1.8203  
##  1st Qu.:-0.7243   1st Qu.:-0.6496   1st Qu.:-0.8394   1st Qu.:-0.7409  
##  Median :-0.3013   Median :-0.4726   Median :-0.3298   Median :-0.1403  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3712   3rd Qu.: 0.1932   3rd Qu.: 0.6030   3rd Qu.: 0.6127  
##  Max.   : 5.6890   Max.   : 4.4899   Max.   : 3.8344   Max.   : 3.1850
# perform principal component analysis (with the SVD method)
pca_human <- prcomp(human_std)

# draw a biplot of the principal component representation and the original variables
biplot(pca_human, choices = 1:2, cex = c(0.8, 1), col = c("grey40", "deeppink2"))

Using biplot, we can interpret the correlations. The arrows in pink color are drawn to visualize the connections between the original features and the PC’s. If you look at the ‘Labo.Fm’ arrow, it points towards positive values of PC1 and Edu.EXP points towards positive value of PC2.

# create and print out a summary of pca_human
s <- summary(pca_human)
s
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0708 1.1397 0.87505 0.77886 0.66196 0.53631 0.45900
## Proportion of Variance 0.5361 0.1624 0.09571 0.07583 0.05477 0.03595 0.02634
## Cumulative Proportion  0.5361 0.6984 0.79413 0.86996 0.92473 0.96069 0.98702
##                            PC8
## Standard deviation     0.32224
## Proportion of Variance 0.01298
## Cumulative Proportion  1.00000
# rounded percentages of variance captured by each PC
pca_pr <- round(100*s$importance[2,], digits = 1) 

# print out the percentages of variance
pca_pr
##  PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8 
## 53.6 16.2  9.6  7.6  5.5  3.6  2.6  1.3
# create object pc_lab to be used as axis labels
pc_lab <- paste0(names(pca_pr), " (", pca_pr, "%)")

# draw a biplot
biplot(pca_human, cex = c(0.8, 1), col = c("grey40", "deeppink2"), xlab = pc_lab[1], ylab = pc_lab[2])

biplot(pca_human, cex = c(0.9, 1), col = c("grey40", "deeppink2"), xlab = pc_lab[1], ylab = pc_lab[2])

PC1 explains 53.6% of the total variance, making it a fairly good summary measure.Whereas, PC2 explaining 16.2% of the variance. Also, we can see high values of PC1 are associated with positive values of almost all the variables with standard deviation of approximately 2.07.