Libraries

library(tidyverse)
library(kableExtra)
library(ggplot2)
library(mdatools)
library(ggbiplot)
library(plotly)
library(shiny)


Original dataset


df = read.csv('Alcohol_consumption_cleaned.csv', sep = ';', row.names = 'Country')
df = df[1:6]

df %>% kable() %>% kable_styling(full_width = FALSE)
Continent Liquor Wine Beer LifeEx HeartD
France Europe 2.5 63.5 40.1 78 61.1
Italy Europe 0.9 58.0 25.1 78 94.1
Switz Europe 1.7 46.0 65.0 78 106.4
Austria Europe 1.2 15.7 102.1 78 173.0
U.K. Other 1.5 12.2 100.0 77 199.7
U.S.A. America 2.0 8.9 87.8 76 176.0
Russia Asia 3.8 2.7 17.1 69 373.6
CzechRep Europe 1.0 1.7 140.0 73 283.7
Japan Asia 2.1 1.0 55.0 79 34.7
Mexico America 0.8 0.2 50.4 73 36.4


Performing PCA

dfScaled = scale(df[2:6], scale = T) %>% data.frame()

Pc = pca(dfScaled, alpha = 0.05, lim.type = 'jm', method= 'svd', ncomp = 2)
pc = prcomp(dfScaled)

summary(Pc)
## 
## Summary for PCA model (class pca)
## Type of limits: jm
## Alpha: 0.05
## Gamma: 0.01
## 
##        Eigenvals Expvar Cumexpvar Nq Nh
## Comp 1     2.301  46.03     46.03 10 10
## Comp 2     1.606  32.11     78.14 10 10
pca1 = summary(pc)
pca1
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5
## Standard deviation     1.5170 1.2672 0.7644 0.64978 0.29399
## Proportion of Variance 0.4603 0.3211 0.1168 0.08444 0.01729
## Cumulative Proportion  0.4603 0.7814 0.8983 0.98271 1.00000


Scree plot


var = paste(round(pca1$importance[2,]*100, digits = 2), '%')

ggscreeplot(pc)+
  geom_col(fill = 'cornflowerblue') + 
  geom_line()+
  theme_bw()+
  geom_text(label = var, nudge_y = 0.03, nudge_x = 0.2 )


Residuals


T² distance


scoredf = Pc$calres$scores

eigenvalues = Pc$eigenvals*diag(Pc$ncomp)
tnorm = scoredf %*% solve(eigenvalues)

tsquared = c()

for (i in 1:nrow(tnorm)) {
  for (j in 1:ncol(tnorm)) {
    tsquared = c(tsquared, tnorm[i,1:j] %*% scoredf[i,1:j])
  }
}

tsquared = tsquared %>% matrix(nrow = 10, byrow = T)
rownames(tsquared) = rownames(tnorm)
colnames(tsquared) = colnames(scoredf)
tsquared
##               Comp 1    Comp 2
## France   0.846010400 2.4786891
## Italy    1.345417138 1.7523651
## Switz    0.527539249 0.6135703
## Austria  0.047799072 0.8294527
## U.K.     0.011387880 0.5512178
## U.S.A.   0.086134519 0.1884672
## Russia   5.048273778 7.6798452
## CzechRep 0.855615710 3.5396848
## Japan    0.226754801 0.2366353
## Mexico   0.005067455 0.1300725


hotelling = data.frame('T2_Hotelling' = Pc$calres$T2[,2],'Sample' = c(1:nrow(df)), 'Country' = rownames(df), 'Continent' = df$Continent)

hotplot = ggplot(hotelling, aes(x= Sample, y= T2_Hotelling, text = Country, color = Continent))+
  geom_point()+
  geom_line()+
  theme_bw()+
  geom_hline(yintercept = Pc$T2lim[1,2], alpha = 0.3, color = 'orange')+
  geom_hline(yintercept = Pc$T2lim[2,2], alpha = 0.3, color = 'darkred')

div(
ggplotly(hotplot, width = 700, height = 500), align = 'center')
russia = scoredf[7,1:2] %*% t(Pc$loadings) 


barplot(russia, col = 'blue', main = 'Russia')


qdf = data.frame('Q_residuals' = Pc$calres$Q[,2],'Sample' = c(1:nrow(df)), 'Country' = hotelling$Country, 'Continent' = df$Continent)

qplot = ggplot(qdf, aes(x= Sample, y= Q_residuals, text = Country, color = Continent))+
  geom_point()+
  geom_line()+
  theme_bw()+
  geom_hline(yintercept = Pc$Qlim[1,2], alpha = 0.3, color = 'orange')+
  geom_hline(yintercept = Pc$Qlim[2,2], alpha = 0.3, color = 'darkred')

div(
ggplotly(qplot, width = 700, height = 500), align = 'center')
barplot(Pc$calres$residuals[10,],col = 'blue', main = 'Mexico')

Pc = selectCompNum(Pc, ncomp = 2)

selectedcomp = Pc$calres$ncomp.selected

plotResiduals(Pc, show.limits = T, 
              ncomp = selectedcomp, 
              labels=rownames(df), 
              show.labels= T, 
              norm = FALSE ,
              lab.col='black',
              xlab = 'T^2 Hotelling',
              ylab = 'Q residuals', 
              cgroup = factor(df$Continent)
              )


Score plot


scoreplot = ggbiplot(pc, groups = df$Continent, ellipse = F, var.axes = F)+
  theme_bw()+
  geom_text(label= rownames(df), size= 3, nudge_y = 0.1, nudge_x = 0.2)+
  labs(color='Continent')+
  xlim(-3,2)+
  geom_hline(yintercept = 0, alpha = 0.3, linetype = 'longdash')+
  geom_vline(xintercept = 0, alpha = 0.3, linetype = 'longdash')

div(
ggplotly(scoreplot, width = 700, height = 500), align = 'center')


Loading plot


loadingplot = ggbiplot(pc)+
  geom_point(color='white')+
  theme_bw()+
  xlim(-2.5,2.5)+
  geom_hline(yintercept = 0, alpha = 0.3, linetype = 'longdash')+
  geom_vline(xintercept = 0, alpha = 0.3, linetype = 'longdash')

loadingplot


Biplot


biplo = ggbiplot(pc, groups = df$Continent, ellipse = T)+
  theme_bw()+
  geom_text(label= rownames(df), size= 3, nudge_y = 0.1, nudge_x = 0.2)+
  labs(color='Continent')+
  geom_hline(yintercept = 0, alpha = 0.3, linetype = 'longdash')+
  geom_vline(xintercept = 0, alpha = 0.3, linetype = 'longdash')

div(
ggplotly(biplo, width = 700, height = 500), align = 'center')






LS0tDQp0aXRsZTogIkFsY29ob2wgY29uc3VtcHRpb24iDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIHRoZW06IGNlcnVsZWFuDQogICAgdG9jOiBUcnVlDQogICAgY29kZV9kb3dubG9hZDogVHJ1ZQ0KICAgIA0KLS0tDQoNCjxoci8+DQo8YnIvPg0KDQoNCiMjIyBMaWJyYXJpZXMNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGthYmxlRXh0cmEpDQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KG1kYXRvb2xzKQ0KbGlicmFyeShnZ2JpcGxvdCkNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShzaGlueSkNCg0KDQoNCmBgYA0KDQo8YnIvPg0KDQojIyMgT3JpZ2luYWwgZGF0YXNldA0KDQo8YnIvPg0KDQpgYGB7cn0NCmRmID0gcmVhZC5jc3YoJ0FsY29ob2xfY29uc3VtcHRpb25fY2xlYW5lZC5jc3YnLCBzZXAgPSAnOycsIHJvdy5uYW1lcyA9ICdDb3VudHJ5JykNCmRmID0gZGZbMTo2XQ0KDQpkZiAlPiUga2FibGUoKSAlPiUga2FibGVfc3R5bGluZyhmdWxsX3dpZHRoID0gRkFMU0UpDQoNCmBgYA0KPGJyLz4NCg0KUGVyZm9ybWluZyBQQ0ENCg0KDQoNCmBgYHtyfQ0KDQpkZlNjYWxlZCA9IHNjYWxlKGRmWzI6Nl0sIHNjYWxlID0gVCkgJT4lIGRhdGEuZnJhbWUoKQ0KDQpQYyA9IHBjYShkZlNjYWxlZCwgYWxwaGEgPSAwLjA1LCBsaW0udHlwZSA9ICdqbScsIG1ldGhvZD0gJ3N2ZCcsIG5jb21wID0gMikNCnBjID0gcHJjb21wKGRmU2NhbGVkKQ0KDQpzdW1tYXJ5KFBjKQ0KcGNhMSA9IHN1bW1hcnkocGMpDQpwY2ExDQoNCmBgYA0KPGJyLz4NCg0KIyMjIFNjcmVlIHBsb3QNCg0KPGJyLz4NCg0KDQpgYGB7ciwgZmlnLmFsaWduPSdjZW50ZXInfQ0KDQp2YXIgPSBwYXN0ZShyb3VuZChwY2ExJGltcG9ydGFuY2VbMixdKjEwMCwgZGlnaXRzID0gMiksICclJykNCg0KZ2dzY3JlZXBsb3QocGMpKw0KICBnZW9tX2NvbChmaWxsID0gJ2Nvcm5mbG93ZXJibHVlJykgKyANCiAgZ2VvbV9saW5lKCkrDQogIHRoZW1lX2J3KCkrDQogIGdlb21fdGV4dChsYWJlbCA9IHZhciwgbnVkZ2VfeSA9IDAuMDMsIG51ZGdlX3ggPSAwLjIgKQ0KICANCg0KDQpgYGANCjxici8+DQoNCiMjIyBSZXNpZHVhbHMNCg0KPGJyLz4NCg0KIyMjIyBUwrIgZGlzdGFuY2UNCg0KPGJyLz4NCg0KYGBge3J9DQoNCnNjb3JlZGYgPSBQYyRjYWxyZXMkc2NvcmVzDQoNCmVpZ2VudmFsdWVzID0gUGMkZWlnZW52YWxzKmRpYWcoUGMkbmNvbXApDQp0bm9ybSA9IHNjb3JlZGYgJSolIHNvbHZlKGVpZ2VudmFsdWVzKQ0KDQp0c3F1YXJlZCA9IGMoKQ0KDQpmb3IgKGkgaW4gMTpucm93KHRub3JtKSkgew0KICBmb3IgKGogaW4gMTpuY29sKHRub3JtKSkgew0KICAgIHRzcXVhcmVkID0gYyh0c3F1YXJlZCwgdG5vcm1baSwxOmpdICUqJSBzY29yZWRmW2ksMTpqXSkNCiAgfQ0KfQ0KDQp0c3F1YXJlZCA9IHRzcXVhcmVkICU+JSBtYXRyaXgobnJvdyA9IDEwLCBieXJvdyA9IFQpDQpyb3duYW1lcyh0c3F1YXJlZCkgPSByb3duYW1lcyh0bm9ybSkNCmNvbG5hbWVzKHRzcXVhcmVkKSA9IGNvbG5hbWVzKHNjb3JlZGYpDQp0c3F1YXJlZA0KDQpgYGANCg0KDQoNCg0KPGJyLz4NCg0KDQoNCmBgYHtyIGZpZy5hbGlnbj0nY2VudGVyJywgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KaG90ZWxsaW5nID0gZGF0YS5mcmFtZSgnVDJfSG90ZWxsaW5nJyA9IFBjJGNhbHJlcyRUMlssMl0sJ1NhbXBsZScgPSBjKDE6bnJvdyhkZikpLCAnQ291bnRyeScgPSByb3duYW1lcyhkZiksICdDb250aW5lbnQnID0gZGYkQ29udGluZW50KQ0KDQpob3RwbG90ID0gZ2dwbG90KGhvdGVsbGluZywgYWVzKHg9IFNhbXBsZSwgeT0gVDJfSG90ZWxsaW5nLCB0ZXh0ID0gQ291bnRyeSwgY29sb3IgPSBDb250aW5lbnQpKSsNCiAgZ2VvbV9wb2ludCgpKw0KICBnZW9tX2xpbmUoKSsNCiAgdGhlbWVfYncoKSsNCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gUGMkVDJsaW1bMSwyXSwgYWxwaGEgPSAwLjMsIGNvbG9yID0gJ29yYW5nZScpKw0KICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSBQYyRUMmxpbVsyLDJdLCBhbHBoYSA9IDAuMywgY29sb3IgPSAnZGFya3JlZCcpDQoNCmRpdigNCmdncGxvdGx5KGhvdHBsb3QsIHdpZHRoID0gNzAwLCBoZWlnaHQgPSA1MDApLCBhbGlnbiA9ICdjZW50ZXInKQ0KDQpgYGANCg0KYGBge3IsIGZpZy5hbGlnbj0nY2VudGVyJ30NCg0KcnVzc2lhID0gc2NvcmVkZls3LDE6Ml0gJSolIHQoUGMkbG9hZGluZ3MpIA0KDQoNCmJhcnBsb3QocnVzc2lhLCBjb2wgPSAnYmx1ZScsIG1haW4gPSAnUnVzc2lhJykNCg0KDQpgYGANCg0KDQo8YnIvPg0KDQpgYGB7ciBmaWcuYWxpZ249J2NlbnRlcicsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQoNCnFkZiA9IGRhdGEuZnJhbWUoJ1FfcmVzaWR1YWxzJyA9IFBjJGNhbHJlcyRRWywyXSwnU2FtcGxlJyA9IGMoMTpucm93KGRmKSksICdDb3VudHJ5JyA9IGhvdGVsbGluZyRDb3VudHJ5LCAnQ29udGluZW50JyA9IGRmJENvbnRpbmVudCkNCg0KcXBsb3QgPSBnZ3Bsb3QocWRmLCBhZXMoeD0gU2FtcGxlLCB5PSBRX3Jlc2lkdWFscywgdGV4dCA9IENvdW50cnksIGNvbG9yID0gQ29udGluZW50KSkrDQogIGdlb21fcG9pbnQoKSsNCiAgZ2VvbV9saW5lKCkrDQogIHRoZW1lX2J3KCkrDQogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IFBjJFFsaW1bMSwyXSwgYWxwaGEgPSAwLjMsIGNvbG9yID0gJ29yYW5nZScpKw0KICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSBQYyRRbGltWzIsMl0sIGFscGhhID0gMC4zLCBjb2xvciA9ICdkYXJrcmVkJykNCg0KZGl2KA0KZ2dwbG90bHkocXBsb3QsIHdpZHRoID0gNzAwLCBoZWlnaHQgPSA1MDApLCBhbGlnbiA9ICdjZW50ZXInKQ0KDQoNCmBgYA0KDQoNCg0KDQpgYGB7ciwgZmlnLmFsaWduPSdjZW50ZXInfQ0KYmFycGxvdChQYyRjYWxyZXMkcmVzaWR1YWxzWzEwLF0sY29sID0gJ2JsdWUnLCBtYWluID0gJ01leGljbycpDQpgYGANCg0KDQoNCg0KDQpgYGB7ciwgZmlnLmFsaWduPSdjZW50ZXInfQ0KDQpQYyA9IHNlbGVjdENvbXBOdW0oUGMsIG5jb21wID0gMikNCg0Kc2VsZWN0ZWRjb21wID0gUGMkY2FscmVzJG5jb21wLnNlbGVjdGVkDQoNCnBsb3RSZXNpZHVhbHMoUGMsIHNob3cubGltaXRzID0gVCwgDQogICAgICAgICAgICAgIG5jb21wID0gc2VsZWN0ZWRjb21wLCANCiAgICAgICAgICAgICAgbGFiZWxzPXJvd25hbWVzKGRmKSwgDQogICAgICAgICAgICAgIHNob3cubGFiZWxzPSBULCANCiAgICAgICAgICAgICAgbm9ybSA9IEZBTFNFICwNCiAgICAgICAgICAgICAgbGFiLmNvbD0nYmxhY2snLA0KICAgICAgICAgICAgICB4bGFiID0gJ1ReMiBIb3RlbGxpbmcnLA0KICAgICAgICAgICAgICB5bGFiID0gJ1EgcmVzaWR1YWxzJywgDQogICAgICAgICAgICAgIGNncm91cCA9IGZhY3RvcihkZiRDb250aW5lbnQpDQogICAgICAgICAgICAgICkNCg0KYGBgDQoNCg0KDQo8YnIvPg0KDQojIyMgU2NvcmUgcGxvdA0KDQo8YnIvPg0KDQoNCg0KYGBge3IgZmlnLmFsaWduPSdjZW50ZXInLCB3YXJuaW5nPUZBTFNFfQ0KDQpzY29yZXBsb3QgPSBnZ2JpcGxvdChwYywgZ3JvdXBzID0gZGYkQ29udGluZW50LCBlbGxpcHNlID0gRiwgdmFyLmF4ZXMgPSBGKSsNCiAgdGhlbWVfYncoKSsNCiAgZ2VvbV90ZXh0KGxhYmVsPSByb3duYW1lcyhkZiksIHNpemU9IDMsIG51ZGdlX3kgPSAwLjEsIG51ZGdlX3ggPSAwLjIpKw0KICBsYWJzKGNvbG9yPSdDb250aW5lbnQnKSsNCiAgeGxpbSgtMywyKSsNCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCwgYWxwaGEgPSAwLjMsIGxpbmV0eXBlID0gJ2xvbmdkYXNoJykrDQogIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IDAsIGFscGhhID0gMC4zLCBsaW5ldHlwZSA9ICdsb25nZGFzaCcpDQoNCmRpdigNCmdncGxvdGx5KHNjb3JlcGxvdCwgd2lkdGggPSA3MDAsIGhlaWdodCA9IDUwMCksIGFsaWduID0gJ2NlbnRlcicpDQoNCmBgYA0KDQoNCg0KPGJyLz4NCg0KIyMjIExvYWRpbmcgcGxvdA0KDQo8YnIvPg0KDQpgYGB7ciwgZmlnLmFsaWduPSdjZW50ZXInfQ0KDQpsb2FkaW5ncGxvdCA9IGdnYmlwbG90KHBjKSsNCiAgZ2VvbV9wb2ludChjb2xvcj0nd2hpdGUnKSsNCiAgdGhlbWVfYncoKSsNCiAgeGxpbSgtMi41LDIuNSkrDQogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDAsIGFscGhhID0gMC4zLCBsaW5ldHlwZSA9ICdsb25nZGFzaCcpKw0KICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAwLCBhbHBoYSA9IDAuMywgbGluZXR5cGUgPSAnbG9uZ2Rhc2gnKQ0KDQpsb2FkaW5ncGxvdA0KDQpgYGANCg0KDQo8YnIvPg0KDQojIyMgQmlwbG90DQoNCjxici8+DQoNCg0KYGBge3IgYmlwbG90fQ0KDQpiaXBsbyA9IGdnYmlwbG90KHBjLCBncm91cHMgPSBkZiRDb250aW5lbnQsIGVsbGlwc2UgPSBUKSsNCiAgdGhlbWVfYncoKSsNCiAgZ2VvbV90ZXh0KGxhYmVsPSByb3duYW1lcyhkZiksIHNpemU9IDMsIG51ZGdlX3kgPSAwLjEsIG51ZGdlX3ggPSAwLjIpKw0KICBsYWJzKGNvbG9yPSdDb250aW5lbnQnKSsNCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCwgYWxwaGEgPSAwLjMsIGxpbmV0eXBlID0gJ2xvbmdkYXNoJykrDQogIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IDAsIGFscGhhID0gMC4zLCBsaW5ldHlwZSA9ICdsb25nZGFzaCcpDQoNCmRpdigNCmdncGxvdGx5KGJpcGxvLCB3aWR0aCA9IDcwMCwgaGVpZ2h0ID0gNTAwKSwgYWxpZ24gPSAnY2VudGVyJykNCg0KYGBgDQoNCg0KDQoNCjxici8+DQoNCjxici8+DQoNCjxici8+DQoNCjxici8+DQoNCjxici8+DQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=