PCA eksperimenter

Missing

6 of 80 rows have missing:

data <- haven::read_sav("~/Dropbox (BI Norwegian Business School)/forskning/lesesenteret/oddny/pca/Variabler til PCA Njål.sav")
naniar::vis_miss(data)

data <- data[complete.cases(data),]

We remove 6 obs

Correlations

corr_matrix <- cor(data)
ggcorrplot::ggcorrplot(corr_matrix)

PCA

data.pca <- princomp(corr_matrix, scores=T)
data.pca <- prcomp(data, scale = TRUE)
summary(data.pca)
Importance of components:
                          PC1    PC2    PC3     PC4     PC5     PC6     PC7
Standard deviation     2.1927 1.9901 1.6921 1.38427 1.31719 1.16337 1.03849
Proportion of Variance 0.2003 0.1650 0.1193 0.07984 0.07229 0.05639 0.04494
Cumulative Proportion  0.2003 0.3653 0.4846 0.56448 0.63677 0.69316 0.73810
                           PC8     PC9    PC10    PC11    PC12    PC13   PC14
Standard deviation     0.94920 0.88978 0.80099 0.74359 0.70087 0.68794 0.6426
Proportion of Variance 0.03754 0.03299 0.02673 0.02304 0.02047 0.01972 0.0172
Cumulative Proportion  0.77564 0.80863 0.83536 0.85840 0.87887 0.89859 0.9158
                          PC15    PC16   PC17    PC18    PC19    PC20    PC21
Standard deviation     0.59222 0.56757 0.5253 0.48720 0.45390 0.41249 0.40347
Proportion of Variance 0.01461 0.01342 0.0115 0.00989 0.00858 0.00709 0.00678
Cumulative Proportion  0.93040 0.94383 0.9553 0.96521 0.97380 0.98089 0.98767
                          PC22    PC23    PC24
Standard deviation     0.34214 0.33250 0.26135
Proportion of Variance 0.00488 0.00461 0.00285
Cumulative Proportion  0.99255 0.99715 1.00000

loadings for first 5 components

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#data.pca$loadings[, 1:5] %>% round(3)
res.var <- factoextra::get_pca_var(data.pca)
res.var$coord[, 1:5] %>% head() %>% round(2)
                                  Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Dominant_language                  0.17 -0.70  0.29 -0.28 -0.21
Q_NO_Rank_Speak                   -0.21  0.35 -0.63 -0.27 -0.20
Q_L_t_s                           -0.27 -0.28 -0.64 -0.47 -0.14
Q_L_t_w                           -0.22 -0.48 -0.35 -0.59 -0.06
Q_Edu_highest_recode              -0.25 -0.24 -0.35  0.19 -0.48
Mothers_level_of_education_recode -0.17 -0.27 -0.44  0.19 -0.46

scree for første 14 komponenter

factoextra::fviz_eig(data.pca, addlabels = TRUE, ncp=14)

biplot

factoextra::fviz_pca_var(data.pca, col.var = "black")

factor scores for first 6 observations

res.ind <- factoextra::get_pca_ind(data.pca)
head(res.ind$coord) %>% round(2)
  Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7 Dim.8 Dim.9 Dim.10 Dim.11 Dim.12
1 -1.91  0.00  0.68 -0.96  2.44  1.10 -0.55 -1.01 -0.36  -0.14  -0.76  -0.39
2 -0.55 -1.01  1.35  0.16  2.67  0.86 -1.22 -0.39 -0.15  -0.43  -0.28   0.33
3 -1.43  1.18  2.24 -0.41  1.16  0.47 -0.45 -0.15 -0.61  -1.00  -1.57  -0.67
4 -0.59  0.15 -0.06  0.78  1.39  1.01 -0.64  0.96 -0.90   0.99   0.84   0.56
5  4.51  3.54  1.04 -0.41 -1.08  1.53  0.85  0.23  1.30   0.30  -1.63  -0.95
6  3.02  0.81 -7.31 -4.29  0.68  0.30 -1.36 -0.09 -0.89  -0.18  -0.72   0.18
  Dim.13 Dim.14 Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21 Dim.22 Dim.23
1  -0.05   0.56   0.34  -0.32  -0.21   0.39  -0.58   0.05  -0.53  -0.42  -0.38
2   0.34   0.44  -0.45  -0.28  -0.56   0.29   0.33  -0.48   0.08   0.25   0.28
3   0.48   0.96   0.02   0.17   0.60   0.63   0.10   0.51  -1.09   0.21   0.79
4  -0.13  -1.53   0.31  -0.15  -0.06  -0.86   0.20  -0.18   0.55  -0.07   0.21
5  -1.90  -1.20  -0.45  -0.32  -0.41  -0.42   0.57   0.33  -0.01   0.00  -0.32
6   1.24   0.06  -0.53  -1.06  -0.44  -0.60   0.53  -0.09   0.12   0.34   0.18
  Dim.24
1   0.26
2  -0.13
3  -0.42
4   0.15
5   0.01
6   0.08