data <- haven::read_sav("~/Dropbox (BI Norwegian Business School)/forskning/lesesenteret/oddny/pca/Variabler til PCA Njål.sav")
naniar::vis_miss(data)data <- data[complete.cases(data),]6 of 80 rows have missing:
data <- haven::read_sav("~/Dropbox (BI Norwegian Business School)/forskning/lesesenteret/oddny/pca/Variabler til PCA Njål.sav")
naniar::vis_miss(data)data <- data[complete.cases(data),]We remove 6 obs
corr_matrix <- cor(data)
ggcorrplot::ggcorrplot(corr_matrix)data.pca <- princomp(corr_matrix, scores=T)
data.pca <- prcomp(data, scale = TRUE)
summary(data.pca)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.1927 1.9901 1.6921 1.38427 1.31719 1.16337 1.03849
Proportion of Variance 0.2003 0.1650 0.1193 0.07984 0.07229 0.05639 0.04494
Cumulative Proportion 0.2003 0.3653 0.4846 0.56448 0.63677 0.69316 0.73810
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 0.94920 0.88978 0.80099 0.74359 0.70087 0.68794 0.6426
Proportion of Variance 0.03754 0.03299 0.02673 0.02304 0.02047 0.01972 0.0172
Cumulative Proportion 0.77564 0.80863 0.83536 0.85840 0.87887 0.89859 0.9158
PC15 PC16 PC17 PC18 PC19 PC20 PC21
Standard deviation 0.59222 0.56757 0.5253 0.48720 0.45390 0.41249 0.40347
Proportion of Variance 0.01461 0.01342 0.0115 0.00989 0.00858 0.00709 0.00678
Cumulative Proportion 0.93040 0.94383 0.9553 0.96521 0.97380 0.98089 0.98767
PC22 PC23 PC24
Standard deviation 0.34214 0.33250 0.26135
Proportion of Variance 0.00488 0.00461 0.00285
Cumulative Proportion 0.99255 0.99715 1.00000
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.3 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#data.pca$loadings[, 1:5] %>% round(3)
res.var <- factoextra::get_pca_var(data.pca)
res.var$coord[, 1:5] %>% head() %>% round(2) Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Dominant_language 0.17 -0.70 0.29 -0.28 -0.21
Q_NO_Rank_Speak -0.21 0.35 -0.63 -0.27 -0.20
Q_L_t_s -0.27 -0.28 -0.64 -0.47 -0.14
Q_L_t_w -0.22 -0.48 -0.35 -0.59 -0.06
Q_Edu_highest_recode -0.25 -0.24 -0.35 0.19 -0.48
Mothers_level_of_education_recode -0.17 -0.27 -0.44 0.19 -0.46
factoextra::fviz_eig(data.pca, addlabels = TRUE, ncp=14)factoextra::fviz_pca_var(data.pca, col.var = "black")res.ind <- factoextra::get_pca_ind(data.pca)
head(res.ind$coord) %>% round(2) Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7 Dim.8 Dim.9 Dim.10 Dim.11 Dim.12
1 -1.91 0.00 0.68 -0.96 2.44 1.10 -0.55 -1.01 -0.36 -0.14 -0.76 -0.39
2 -0.55 -1.01 1.35 0.16 2.67 0.86 -1.22 -0.39 -0.15 -0.43 -0.28 0.33
3 -1.43 1.18 2.24 -0.41 1.16 0.47 -0.45 -0.15 -0.61 -1.00 -1.57 -0.67
4 -0.59 0.15 -0.06 0.78 1.39 1.01 -0.64 0.96 -0.90 0.99 0.84 0.56
5 4.51 3.54 1.04 -0.41 -1.08 1.53 0.85 0.23 1.30 0.30 -1.63 -0.95
6 3.02 0.81 -7.31 -4.29 0.68 0.30 -1.36 -0.09 -0.89 -0.18 -0.72 0.18
Dim.13 Dim.14 Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21 Dim.22 Dim.23
1 -0.05 0.56 0.34 -0.32 -0.21 0.39 -0.58 0.05 -0.53 -0.42 -0.38
2 0.34 0.44 -0.45 -0.28 -0.56 0.29 0.33 -0.48 0.08 0.25 0.28
3 0.48 0.96 0.02 0.17 0.60 0.63 0.10 0.51 -1.09 0.21 0.79
4 -0.13 -1.53 0.31 -0.15 -0.06 -0.86 0.20 -0.18 0.55 -0.07 0.21
5 -1.90 -1.20 -0.45 -0.32 -0.41 -0.42 0.57 0.33 -0.01 0.00 -0.32
6 1.24 0.06 -0.53 -1.06 -0.44 -0.60 0.53 -0.09 0.12 0.34 0.18
Dim.24
1 0.26
2 -0.13
3 -0.42
4 0.15
5 0.01
6 0.08