Library

# 打开excel
if (!require("openxlsx", quietly = TRUE)) {
  install.packages("openxlsx")
  
  if (!require("openxlsx", quietly = TRUE)) {
    stop("'openxlsx' not installed, please install it first.")
  }
}

# 运行PCA
if (!require("FactoMineR", quietly = TRUE)) {
  install.packages("FactoMineR")
  
  if (!require("FactoMineR", quietly = TRUE)) {
    stop("'FactoMineR' not installed, please install it first.")
  }
}

# 可视化、提取PCA
if (!require("factoextra", quietly = TRUE)) {
  install.packages("factoextra")
  
  if (!require("factoextra", quietly = TRUE)) {
    stop("'factoextra' not installed, please install it first.")
  }
}
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# 数据操作、转化
if (!require("tidyverse", quietly = TRUE)) {
  install.packages("tidyverse")
  
  if (!require("tidyverse", quietly = TRUE)) {
    stop("'tidyverse' not installed, please install it first.")
  }
}
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# library(openxlsx) 
# library(FactoMineR) 
# library(factoextra) 
# library(tidyverse) 

Load

dat <-
  openxlsx::read.xlsx(
    "工作簿1-copy.xlsx",
    sheet = 1
  )
rownames(dat) <-
  gsub(
    " ", "_",
    paste(
      dat$Name, dat$Context, sep = "."
    )
  )

dat2use <- dat[, -c(1:2)]

head(dat2use)
##                    intact.1 intact.2   ctrl.1   ctrl.7   ctrl.77    PEMF.3
## Crest_Height.Left   31.7226  30.0082 21.54430 21.46480  19.82140  20.23870
## Crest_Height.Right  33.7307  31.6864 21.69020 20.88370  21.95790  21.35090
## Foot_Height.Left    14.1485  12.0419 13.13080  4.95309   6.60524   8.19913
## Foot_Height.Right   12.7613  11.6415  7.11575  6.58093   8.20716  13.53590
## Cadence.Left       176.4710 162.1620 96.00000 92.30770 136.36400 244.89800
## Walking_Speed.Left  88.2905  77.7067 15.89870 19.36610  24.29810  65.45520
##                       PEMF.6    PEMF.8   PEMF.10
## Crest_Height.Left   17.75060  25.20900  23.16780
## Crest_Height.Right  18.30080  21.10390  21.78580
## Foot_Height.Left     6.65190   4.59227   4.34999
## Foot_Height.Right    5.16637   6.47711   8.81274
## Cadence.Left       210.52600 171.42900 164.38400
## Walking_Speed.Left  52.16450  37.47010  39.26380
GROUP <-
  factor(
    sapply(strsplit(colnames(dat2use), "\\."), "[", 1),
    levels = c("intact", "ctrl", "PEMF")
  )

Run PCA

dat2PCA <- t(dat2use)

res.pca <- 
  PCA(
    dat2PCA,
    graph = FALSE,
    scale.unit = TRUE
  )
## Warning in PCA(dat2PCA, graph = FALSE, scale.unit = TRUE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
res.pca
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 9 individuals, described by 30 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
# ## extra eigenvalues/variances
get_eig(res.pca)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 11.3457570       37.8191899                    37.81919
## Dim.2  5.0470821       16.8236070                    54.64280
## Dim.3  4.2623899       14.2079663                    68.85076
## Dim.4  3.8684430       12.8948101                    81.74557
## Dim.5  2.4925522        8.3085072                    90.05408
## Dim.6  1.6008503        5.3361675                    95.39025
## Dim.7  1.0878532        3.6261774                    99.01643
## Dim.8  0.2950724        0.9835747                   100.00000
# ## Extract the results for variables
var <- get_pca_var(res.pca)

var
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"

Screen plot

## fviz_eig OR fviz_screeplot
p1 <-
  fviz_screeplot(
    res.pca, 
    addlabels = TRUE
  )

p1

Contributions to PC

## Contributions of variables to PC1
fviz_contrib(
  res.pca, 
  choice = "var", 
  axes = 1, 
  top = 20
)

## Contributions of variables to PC2
fviz_contrib(
  res.pca, 
  choice = "var", 
  axes = 2, 
  top = 20
)

PC

## Extract the results for individual
res.ind <- get_pca_ind(res.pca)
res.ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
## cos2: the quality of the individuals on the factor map
head(res.ind$coord)
##              Dim.1     Dim.2      Dim.3      Dim.4      Dim.5
## intact.1  4.803103  2.525825  1.1585661  0.5954451 -0.7420380
## intact.2  3.748596  2.573039  1.2012728  0.5206826 -0.2893268
## ctrl.1   -2.657183  1.286937  0.6422179 -1.1237169 -0.2160756
## ctrl.7   -5.245660  2.393930 -0.6991359  1.2053704  2.0833985
## ctrl.77  -3.817184 -2.532057  3.6109861  0.6059946 -2.1186725
## PEMF.3    3.783802 -4.089615  0.5549893  0.7871863  2.3869123

PCA for individual (cos2)

## PCA for individual
fviz_pca_ind(
  res.pca, col.ind = "cos2", 
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE # Avoid text overlapping (slow if many points)
)

PCA for individual (group)

# Use habillage to specify groups for coloring
p2 <-
  fviz_pca_ind(
    res.pca,
    label = "none", # hide individual labels
    habillage = GROUP, # color by groups
    palette = RColorBrewer::brewer.pal(8, "Set1")
  )

p2

Kmeans cluster

## k-means clust
df <- scale(dat2PCA)
df[is.na(df)] <- 0
set.seed(123)
km.res <- kmeans(df, 3, nstart = 25)
# km.res

p3 <-
  fviz_cluster(
    km.res, data = df,
    palette = RColorBrewer::brewer.pal(8, "Set1"),
    ggtheme = theme_minimal(),
    main = "Kmeans Clustering Plot"
  )

p3

Pam clust

## pam clust
pam.res <- cluster::pam(df, 3)

fviz_cluster(
  pam.res, 
  main = "Pam Clustering Plot"
)

Dendrogram

## dendrogram
hcut.res <- hcut(df, k = 3)

p7 <-
  fviz_dend(
    hcut.res, 
    rect = TRUE, 
    cex = 0.5,
    k_colors = RColorBrewer::brewer.pal(8, "Set1")
  )
## Warning in get_col(col, k): Length of color vector was longer than the number
## of clusters - first k elements are used
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p7

PCA (ggplot)

a <- 1
b <- 2

coord <- 
  data.frame(res.pca$ind$coord)[, c(a, b)]

colnames(coord) <- c("x_axis", "y_axis")

coord.mean <-
  cbind(coord, GROUP) %>%
  group_by(GROUP) %>%
  summarise(
    x_mean = mean(x_axis),
    y_mean = mean(y_axis)
  )

p4 <-
  ggplot(
    coord, 
    aes(
      x = x_axis, 
      y = y_axis
    )
  ) +
  geom_point(
    aes(
      color = GROUP,
      shape = GROUP
    ),
    size = 3,
    alpha = 0.5
  ) +
  geom_point(
    data = coord.mean,
    aes(
      x = x_mean,
      y = y_mean,
      col = GROUP,
      shape = GROUP
    ),
    size = 6
  ) +
  theme_bw() + 
  theme(
    # legend.position = "none",
    panel.border = element_blank(), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black")
  ) +
  labs(
    x = paste0("PC", a, ": ", round(res.pca$eig[a, 2], 2), "% variance"),
    y = paste0("PC", b, ": ", round(res.pca$eig[b, 2], 2), "% variance"),
    title = "PCA",
    color = NULL,
    shape = NULL
  )

p4

ggsave(
  "PCA.pdf",
  width = 4,
  height = 3.25
)
cor(x = res.pca$ind$coord[, "Dim.1"], y = as.numeric(unlist(dat2use["Walking_Speed.Left", ])))
## [1] 0.9545638
res.pca$var$coord
##                                   Dim.1       Dim.2        Dim.3       Dim.4
## Crest_Height.Left            0.58223889  0.65736642  0.321703341 -0.08576332
## Crest_Height.Right           0.62946841  0.58546627  0.449894871  0.15922555
## Foot_Height.Left             0.51644097  0.45140036  0.390439814  0.06470402
## Foot_Height.Right            0.74368851 -0.04902897  0.479951602  0.35143769
## Cadence.Left                 0.74333003 -0.58760637 -0.274966112  0.01497499
## Walking_Speed.Left           0.95456375  0.12344700  0.018780294  0.16930267
## Stride_Time.Left            -0.76055200  0.50809379  0.199781554  0.02384526
## Step_Time.Left              -0.80424505  0.41094759  0.121157695  0.40943767
## Opposite_Foot_Off.Left      -0.31194909  0.13579460 -0.504632388  0.18610245
## Opposite_Foot_Contact.Left   0.51871917 -0.22418788  0.041531745 -0.63371097
## Foot_Off.Left               -0.65446424  0.56003722 -0.008876098 -0.36565155
## Single_Support.Left         -0.09528754 -0.13309683  0.894680021  0.20495883
## Double_Support.Left         -0.62121027  0.13855308 -0.193340346  0.37085171
## Stride_Length.Left           0.80318908  0.48590409  0.153082632  0.20102593
## Step_Length.Left             0.60436699  0.25916672  0.212589630  0.05595162
## Step_Width.Left              0.01553123  0.78397645 -0.463970682  0.35116984
## Limp_Index.Right            -0.52019537  0.27679369 -0.151176877  0.30052445
## Cadence.Right                0.77708770 -0.36909235 -0.399163026  0.23604651
## Walking_Speed.Right          0.96114402  0.13950054 -0.032654614  0.15684096
## Stride_Time.Right           -0.68356780 -0.08661481  0.603863640 -0.28644886
## Step_Time.Right             -0.14969035  0.16355955  0.030081049 -0.95361418
## Opposite_Foot_Off.Right     -0.02281402 -0.69303686  0.515172515  0.42886067
## Opposite_Foot_Contact.Right -0.10209634 -0.26562305  0.342888111  0.62108871
## Foot_Off.Right              -0.77522907  0.34579486  0.009159324 -0.20663015
## Single_Support.Right        -0.50644928  0.03267730  0.495711760  0.52682455
## Double_Support.Right        -0.77786463 -0.12617936  0.524889957 -0.24246557
## Stride_Length.Right          0.78106442  0.50483673  0.270925954 -0.09512167
## Step_Length.Right            0.62772217  0.10226479 -0.168739439 -0.35308252
## Step_Width.Right            -0.06201991  0.77963035 -0.444741203  0.30632017
## Limp_Index.NA               -0.36344019 -0.33792790 -0.558731416  0.50626428
##                                    Dim.5
## Crest_Height.Left            0.058431696
## Crest_Height.Right          -0.096032175
## Foot_Height.Left            -0.219720822
## Foot_Height.Right            0.292514055
## Cadence.Left                -0.002973759
## Walking_Speed.Left          -0.066896268
## Stride_Time.Left             0.157228630
## Step_Time.Left               0.017802468
## Opposite_Foot_Off.Left       0.444903089
## Opposite_Foot_Contact.Left   0.309110481
## Foot_Off.Left               -0.205969354
## Single_Support.Left          0.318901125
## Double_Support.Left          0.598636662
## Stride_Length.Left          -0.090183445
## Step_Length.Left             0.702694774
## Step_Width.Left             -0.153539256
## Limp_Index.Right             0.573978767
## Cadence.Right               -0.088993234
## Walking_Speed.Right         -0.145785093
## Stride_Time.Right           -0.244367073
## Step_Time.Right              0.191294360
## Opposite_Foot_Off.Right      0.005856891
## Opposite_Foot_Contact.Right -0.494286515
## Foot_Off.Right              -0.393495840
## Single_Support.Right         0.153398371
## Double_Support.Right        -0.085367568
## Stride_Length.Right         -0.170364444
## Step_Length.Right            0.156434227
## Step_Width.Right            -0.140213834
## Limp_Index.NA               -0.192326603