Library
# 打开excel
if (!require("openxlsx", quietly = TRUE)) {
install.packages("openxlsx")
if (!require("openxlsx", quietly = TRUE)) {
stop("'openxlsx' not installed, please install it first.")
}
}
# 运行PCA
if (!require("FactoMineR", quietly = TRUE)) {
install.packages("FactoMineR")
if (!require("FactoMineR", quietly = TRUE)) {
stop("'FactoMineR' not installed, please install it first.")
}
}
# 可视化、提取PCA
if (!require("factoextra", quietly = TRUE)) {
install.packages("factoextra")
if (!require("factoextra", quietly = TRUE)) {
stop("'factoextra' not installed, please install it first.")
}
}
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# 数据操作、转化
if (!require("tidyverse", quietly = TRUE)) {
install.packages("tidyverse")
if (!require("tidyverse", quietly = TRUE)) {
stop("'tidyverse' not installed, please install it first.")
}
}
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# library(openxlsx)
# library(FactoMineR)
# library(factoextra)
# library(tidyverse)
Load
dat <-
openxlsx::read.xlsx(
"工作簿1-copy.xlsx",
sheet = 1
)
rownames(dat) <-
gsub(
" ", "_",
paste(
dat$Name, dat$Context, sep = "."
)
)
dat2use <- dat[, -c(1:2)]
head(dat2use)
## intact.1 intact.2 ctrl.1 ctrl.7 ctrl.77 PEMF.3
## Crest_Height.Left 31.7226 30.0082 21.54430 21.46480 19.82140 20.23870
## Crest_Height.Right 33.7307 31.6864 21.69020 20.88370 21.95790 21.35090
## Foot_Height.Left 14.1485 12.0419 13.13080 4.95309 6.60524 8.19913
## Foot_Height.Right 12.7613 11.6415 7.11575 6.58093 8.20716 13.53590
## Cadence.Left 176.4710 162.1620 96.00000 92.30770 136.36400 244.89800
## Walking_Speed.Left 88.2905 77.7067 15.89870 19.36610 24.29810 65.45520
## PEMF.6 PEMF.8 PEMF.10
## Crest_Height.Left 17.75060 25.20900 23.16780
## Crest_Height.Right 18.30080 21.10390 21.78580
## Foot_Height.Left 6.65190 4.59227 4.34999
## Foot_Height.Right 5.16637 6.47711 8.81274
## Cadence.Left 210.52600 171.42900 164.38400
## Walking_Speed.Left 52.16450 37.47010 39.26380
GROUP <-
factor(
sapply(strsplit(colnames(dat2use), "\\."), "[", 1),
levels = c("intact", "ctrl", "PEMF")
)
Run PCA
dat2PCA <- t(dat2use)
res.pca <-
PCA(
dat2PCA,
graph = FALSE,
scale.unit = TRUE
)
## Warning in PCA(dat2PCA, graph = FALSE, scale.unit = TRUE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
res.pca
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 9 individuals, described by 30 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# ## extra eigenvalues/variances
get_eig(res.pca)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 11.3457570 37.8191899 37.81919
## Dim.2 5.0470821 16.8236070 54.64280
## Dim.3 4.2623899 14.2079663 68.85076
## Dim.4 3.8684430 12.8948101 81.74557
## Dim.5 2.4925522 8.3085072 90.05408
## Dim.6 1.6008503 5.3361675 95.39025
## Dim.7 1.0878532 3.6261774 99.01643
## Dim.8 0.2950724 0.9835747 100.00000
# ## Extract the results for variables
var <- get_pca_var(res.pca)
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
Screen plot
## fviz_eig OR fviz_screeplot
p1 <-
fviz_screeplot(
res.pca,
addlabels = TRUE
)
p1

Contributions to PC
## Contributions of variables to PC1
fviz_contrib(
res.pca,
choice = "var",
axes = 1,
top = 20
)

## Contributions of variables to PC2
fviz_contrib(
res.pca,
choice = "var",
axes = 2,
top = 20
)

PC
## Extract the results for individual
res.ind <- get_pca_ind(res.pca)
res.ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
## cos2: the quality of the individuals on the factor map
head(res.ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## intact.1 4.803103 2.525825 1.1585661 0.5954451 -0.7420380
## intact.2 3.748596 2.573039 1.2012728 0.5206826 -0.2893268
## ctrl.1 -2.657183 1.286937 0.6422179 -1.1237169 -0.2160756
## ctrl.7 -5.245660 2.393930 -0.6991359 1.2053704 2.0833985
## ctrl.77 -3.817184 -2.532057 3.6109861 0.6059946 -2.1186725
## PEMF.3 3.783802 -4.089615 0.5549893 0.7871863 2.3869123
PCA for individual (cos2)
## PCA for individual
fviz_pca_ind(
res.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping (slow if many points)
)

PCA for individual (group)
# Use habillage to specify groups for coloring
p2 <-
fviz_pca_ind(
res.pca,
label = "none", # hide individual labels
habillage = GROUP, # color by groups
palette = RColorBrewer::brewer.pal(8, "Set1")
)
p2

Kmeans cluster
## k-means clust
df <- scale(dat2PCA)
df[is.na(df)] <- 0
set.seed(123)
km.res <- kmeans(df, 3, nstart = 25)
# km.res
p3 <-
fviz_cluster(
km.res, data = df,
palette = RColorBrewer::brewer.pal(8, "Set1"),
ggtheme = theme_minimal(),
main = "Kmeans Clustering Plot"
)
p3

Pam clust
## pam clust
pam.res <- cluster::pam(df, 3)
fviz_cluster(
pam.res,
main = "Pam Clustering Plot"
)

Dendrogram
## dendrogram
hcut.res <- hcut(df, k = 3)
p7 <-
fviz_dend(
hcut.res,
rect = TRUE,
cex = 0.5,
k_colors = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in get_col(col, k): Length of color vector was longer than the number
## of clusters - first k elements are used
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p7

PCA (ggplot)
a <- 1
b <- 2
coord <-
data.frame(res.pca$ind$coord)[, c(a, b)]
colnames(coord) <- c("x_axis", "y_axis")
coord.mean <-
cbind(coord, GROUP) %>%
group_by(GROUP) %>%
summarise(
x_mean = mean(x_axis),
y_mean = mean(y_axis)
)
p4 <-
ggplot(
coord,
aes(
x = x_axis,
y = y_axis
)
) +
geom_point(
aes(
color = GROUP,
shape = GROUP
),
size = 3,
alpha = 0.5
) +
geom_point(
data = coord.mean,
aes(
x = x_mean,
y = y_mean,
col = GROUP,
shape = GROUP
),
size = 6
) +
theme_bw() +
theme(
# legend.position = "none",
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")
) +
labs(
x = paste0("PC", a, ": ", round(res.pca$eig[a, 2], 2), "% variance"),
y = paste0("PC", b, ": ", round(res.pca$eig[b, 2], 2), "% variance"),
title = "PCA",
color = NULL,
shape = NULL
)
p4

ggsave(
"PCA.pdf",
width = 4,
height = 3.25
)
cor(x = res.pca$ind$coord[, "Dim.1"], y = as.numeric(unlist(dat2use["Walking_Speed.Left", ])))
## [1] 0.9545638
res.pca$var$coord
## Dim.1 Dim.2 Dim.3 Dim.4
## Crest_Height.Left 0.58223889 0.65736642 0.321703341 -0.08576332
## Crest_Height.Right 0.62946841 0.58546627 0.449894871 0.15922555
## Foot_Height.Left 0.51644097 0.45140036 0.390439814 0.06470402
## Foot_Height.Right 0.74368851 -0.04902897 0.479951602 0.35143769
## Cadence.Left 0.74333003 -0.58760637 -0.274966112 0.01497499
## Walking_Speed.Left 0.95456375 0.12344700 0.018780294 0.16930267
## Stride_Time.Left -0.76055200 0.50809379 0.199781554 0.02384526
## Step_Time.Left -0.80424505 0.41094759 0.121157695 0.40943767
## Opposite_Foot_Off.Left -0.31194909 0.13579460 -0.504632388 0.18610245
## Opposite_Foot_Contact.Left 0.51871917 -0.22418788 0.041531745 -0.63371097
## Foot_Off.Left -0.65446424 0.56003722 -0.008876098 -0.36565155
## Single_Support.Left -0.09528754 -0.13309683 0.894680021 0.20495883
## Double_Support.Left -0.62121027 0.13855308 -0.193340346 0.37085171
## Stride_Length.Left 0.80318908 0.48590409 0.153082632 0.20102593
## Step_Length.Left 0.60436699 0.25916672 0.212589630 0.05595162
## Step_Width.Left 0.01553123 0.78397645 -0.463970682 0.35116984
## Limp_Index.Right -0.52019537 0.27679369 -0.151176877 0.30052445
## Cadence.Right 0.77708770 -0.36909235 -0.399163026 0.23604651
## Walking_Speed.Right 0.96114402 0.13950054 -0.032654614 0.15684096
## Stride_Time.Right -0.68356780 -0.08661481 0.603863640 -0.28644886
## Step_Time.Right -0.14969035 0.16355955 0.030081049 -0.95361418
## Opposite_Foot_Off.Right -0.02281402 -0.69303686 0.515172515 0.42886067
## Opposite_Foot_Contact.Right -0.10209634 -0.26562305 0.342888111 0.62108871
## Foot_Off.Right -0.77522907 0.34579486 0.009159324 -0.20663015
## Single_Support.Right -0.50644928 0.03267730 0.495711760 0.52682455
## Double_Support.Right -0.77786463 -0.12617936 0.524889957 -0.24246557
## Stride_Length.Right 0.78106442 0.50483673 0.270925954 -0.09512167
## Step_Length.Right 0.62772217 0.10226479 -0.168739439 -0.35308252
## Step_Width.Right -0.06201991 0.77963035 -0.444741203 0.30632017
## Limp_Index.NA -0.36344019 -0.33792790 -0.558731416 0.50626428
## Dim.5
## Crest_Height.Left 0.058431696
## Crest_Height.Right -0.096032175
## Foot_Height.Left -0.219720822
## Foot_Height.Right 0.292514055
## Cadence.Left -0.002973759
## Walking_Speed.Left -0.066896268
## Stride_Time.Left 0.157228630
## Step_Time.Left 0.017802468
## Opposite_Foot_Off.Left 0.444903089
## Opposite_Foot_Contact.Left 0.309110481
## Foot_Off.Left -0.205969354
## Single_Support.Left 0.318901125
## Double_Support.Left 0.598636662
## Stride_Length.Left -0.090183445
## Step_Length.Left 0.702694774
## Step_Width.Left -0.153539256
## Limp_Index.Right 0.573978767
## Cadence.Right -0.088993234
## Walking_Speed.Right -0.145785093
## Stride_Time.Right -0.244367073
## Step_Time.Right 0.191294360
## Opposite_Foot_Off.Right 0.005856891
## Opposite_Foot_Contact.Right -0.494286515
## Foot_Off.Right -0.393495840
## Single_Support.Right 0.153398371
## Double_Support.Right -0.085367568
## Stride_Length.Right -0.170364444
## Step_Length.Right 0.156434227
## Step_Width.Right -0.140213834
## Limp_Index.NA -0.192326603