library(pcaMethods)
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.5.3
## Loading required package: BiocGenerics
## Loading required package: generics
## 
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
##     setequal, union
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
##     unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'pcaMethods'
## The following object is masked from 'package:stats':
## 
##     loadings
library(ggplot2)
## use an expressionset and ggplot
data(sample.ExpressionSet)
#view(as.data.frame(sample.ExpressionSet)) 
dim(as.data.frame(sample.ExpressionSet)) #[1]  26 503
## [1]  26 503
as.data.frame(sample.ExpressionSet)[, 1:10] 
##   AFFX.MurIL2_at AFFX.MurIL10_at AFFX.MurIL4_at AFFX.MurFAS_at AFFX.BioB.5_at
## A       192.7420        97.13700       45.81920       22.54450        96.7875
## B        85.7533       126.19600        8.83135        3.60093        30.4380
## C       176.7570        77.92160       33.06320       14.68830        46.1271
## D       135.5750        93.37130       28.70720       12.33970        70.9319
## E        64.4939        24.39860        5.94492       36.86630        56.1744
## F        76.3569        85.50880       28.29250       11.25680        42.6756
## G       160.5050        98.90860       30.96940       23.00340        86.5156
## H        65.9631        81.69320       14.79230       16.21340        30.7927
## I        56.9039        97.80150       14.23990       12.03750        19.7183
## J       135.6080        90.48380       34.48740        4.54978        46.3520
## K        63.4432        70.57330       20.35210        8.51782        39.1326
## L        78.2126        94.54180       14.15540       27.28520        41.7698
## M        83.0943        75.34550       20.62510       10.16160        80.2197
## N        89.3372        68.58270       15.92310       20.24880        36.4903
## O        91.0615        87.40500       20.15790       15.78490        36.4021
## P        95.9377        84.45810       27.81390       14.32760        35.3054
## Q       179.8450        87.68060       32.79110       15.94880        58.6239
## R       152.4670       108.03200       33.52920       14.67530       114.0620
## S       180.8340       134.26300       19.81720       -7.91911        93.4402
## T        85.4146        91.40310       20.41900       12.88750        22.5168
## U       157.9890        -8.68811       26.87200       11.91860        48.6462
## V       146.8000        85.02120       31.14880       12.83240        90.2215
## W        93.8829        79.29980       22.34200       11.13900        42.0053
## X       103.8550        71.65520       19.01350        7.55564        57.5738
## Y        64.4340        64.23690       12.16860       19.98490        44.8216
## Z       175.6150        78.70680       17.37800        8.96849        61.7044
##   AFFX.BioB.M_at AFFX.BioB.3_at AFFX.BioC.5_at AFFX.BioC.3_at AFFX.BioDn.5_at
## A        89.0730        265.964       110.1360        43.0794       10.918700
## B        25.8461        181.080        57.2889        16.8006       16.178900
## C        57.2033        164.926        67.3980        37.6002       10.149500
## D        69.9766        161.469        77.2207        46.5272        9.736390
## E        49.5822        236.976        41.3488        22.2475       16.902800
## F        26.1262        156.803        37.9780        61.6401        5.333280
## G        75.0083        211.257       110.5510        33.6623       25.118200
## H        42.3352        235.994        47.7690        31.4423       38.757600
## I        41.1207        175.640        24.7875        23.1008       31.404100
## J        91.5307        229.671        66.7302        39.7419        0.398779
## K        39.9136        222.287        62.9876        35.1225        2.981670
## L        49.8397        181.522        46.2777        28.1342       17.750600
## M        63.4794        177.979        61.8372        20.6908       13.764000
## N        24.7007        105.778        54.7061        27.6193       12.904700
## O        47.4641        223.689        62.0684        40.6454       13.390200
## P        47.3578        183.585        40.6705        35.5333       -6.861960
## Q        58.1331        192.221        53.2711        57.5078       21.509100
## R       104.1220        305.567       107.2370        41.1337        3.105360
## S       115.8310        300.689       119.6660        79.9829        5.953470
## T        58.1224        146.081        24.0654        23.4953        5.660120
## U        73.4221        142.913        98.8425        51.5609       52.933800
## V        64.6066        187.132        92.0846        48.1247       15.726700
## W        40.3068        170.583        53.3866        31.8358       15.211600
## X        41.8209        133.279        52.0164        29.9264       -5.352820
## Y        46.1087        187.407        65.9154        37.8611       13.188400
## Z        49.4122        144.784        75.0043        60.4772       10.038500
df1 <- as.data.frame(sample.ExpressionSet)


df1$AFFX.MurIL10_at[1:10] <- NA
df1$AFFX.MurIL10_at
##  [1]        NA        NA        NA        NA        NA        NA        NA
##  [8]        NA        NA        NA  70.57330  94.54180  75.34550  68.58270
## [15]  87.40500  84.45810  87.68060 108.03200 134.26300  91.40310  -8.68811
## [22]  85.02120  79.29980  71.65520  64.23690  78.70680
listPcaMethods()
##  [1] "svd"          "nipals"       "rnipals"      "bpca"         "ppca"        
##  [6] "svdImpute"    "robustPca"    "nlpca"        "llsImpute"    "llsImputeAll"
pc <- pca(df1, method = "bpca")

summary(pc)
## bpca calculated PCA
## Importance of component(s):
##                  PC1       PC2
## R2            0.3439 4.066e-06
## Cumulative R2 0.3439 3.439e-01
dim(sample.ExpressionSet)
## Features  Samples 
##      500       26
dim(scores(pc))
## [1] 26  2
scores(pc)
##           PC1           PC2
## A -0.78646551  0.0048764855
## B  0.39651901  0.0605168411
## C -1.88761888 -0.0076183262
## D -0.42858966 -0.0739140462
## E  0.68209899  0.0091281260
## F -0.60657188  0.0312675446
## G -2.68820076  0.0370703917
## H  1.56117614  0.0365043158
## I  1.42197297  0.0255188834
## J -1.67699935 -0.0538034618
## K  2.13215214  0.0368110823
## L  1.24061939  0.0114482820
## M -0.87032313 -0.0352918241
## N  2.61174797  0.0084116936
## O -1.85287328 -0.0200374761
## P  1.08101664  0.0630593818
## Q  1.74370197 -0.0498231958
## R -3.61803501  0.0269920628
## S  0.52708577 -0.0763383841
## T  2.20872215 -0.0021950332
## U -2.32406057 -0.0028567229
## V -2.76883614  0.0004794694
## W  0.03068859  0.0351104907
## X  1.25976843 -0.0573134710
## Y -0.63725495  0.0171319859
## Z  3.24855602 -0.0251349482
##############################################
score_df <- as.data.frame(scores(pc))
score_df$sample <- rownames(score_df)

meta_df <- pData(sample.ExpressionSet)
meta_df$sample <- rownames(meta_df)

df <- merge(score_df, meta_df, by = "sample")
df
##    sample         PC1           PC2    sex    type score
## 1       A -0.78646551  0.0048764855 Female Control  0.75
## 2       B  0.39651901  0.0605168411   Male    Case  0.40
## 3       C -1.88761888 -0.0076183262   Male Control  0.73
## 4       D -0.42858966 -0.0739140462   Male    Case  0.42
## 5       E  0.68209899  0.0091281260 Female    Case  0.93
## 6       F -0.60657188  0.0312675446   Male Control  0.22
## 7       G -2.68820076  0.0370703917   Male    Case  0.96
## 8       H  1.56117614  0.0365043158   Male    Case  0.79
## 9       I  1.42197297  0.0255188834 Female    Case  0.37
## 10      J -1.67699935 -0.0538034618   Male Control  0.63
## 11      K  2.13215214  0.0368110823   Male    Case  0.26
## 12      L  1.24061939  0.0114482820 Female Control  0.36
## 13      M -0.87032313 -0.0352918241   Male    Case  0.41
## 14      N  2.61174797  0.0084116936   Male    Case  0.80
## 15      O -1.85287328 -0.0200374761 Female    Case  0.10
## 16      P  1.08101664  0.0630593818 Female Control  0.41
## 17      Q  1.74370197 -0.0498231958 Female    Case  0.16
## 18      R -3.61803501  0.0269920628   Male Control  0.72
## 19      S  0.52708577 -0.0763383841   Male    Case  0.17
## 20      T  2.20872215 -0.0021950332 Female    Case  0.74
## 21      U -2.32406057 -0.0028567229   Male Control  0.35
## 22      V -2.76883614  0.0004794694 Female Control  0.77
## 23      W  0.03068859  0.0351104907   Male Control  0.27
## 24      X  1.25976843 -0.0573134710   Male Control  0.98
## 25      Y -0.63725495  0.0171319859 Female    Case  0.94
## 26      Z  3.24855602 -0.0251349482 Female    Case  0.32
###############################################
head(df)
##   sample        PC1          PC2    sex    type score
## 1      A -0.7864655  0.004876486 Female Control  0.75
## 2      B  0.3965190  0.060516841   Male    Case  0.40
## 3      C -1.8876189 -0.007618326   Male Control  0.73
## 4      D -0.4285897 -0.073914046   Male    Case  0.42
## 5      E  0.6820990  0.009128126 Female    Case  0.93
## 6      F -0.6065719  0.031267545   Male Control  0.22
library(ggplot2)
df$type <- as.factor(df$type)
str(df)
## 'data.frame':    26 obs. of  6 variables:
##  $ sample: chr  "A" "B" "C" "D" ...
##  $ PC1   : num  -0.786 0.397 -1.888 -0.429 0.682 ...
##  $ PC2   : num  0.00488 0.06052 -0.00762 -0.07391 0.00913 ...
##  $ sex   : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 2 1 2 ...
##  $ type  : Factor w/ 2 levels "Case","Control": 2 1 2 1 1 2 1 1 1 2 ...
##  $ score : num  0.75 0.4 0.73 0.42 0.93 0.22 0.96 0.79 0.37 0.63 ...
ggplot(df, aes(PC1, PC2, shape = sex, color = type)) +
  geom_point(size = 3) +
  xlab(paste0("PC1 (", round(pc@R2[1] * 100, 1), "%)")) +
  ylab(paste0("PC2 (", round(pc@R2[2] * 100, 1), "%)")) +
  ggrepel::geom_text_repel(
    aes(label = sample),
    size = 3,
    max.overlaps = 30,
    box.padding = 0.3,
    point.padding = 0.2,
    show.legend = FALSE
  ) +
  #coord_equal() +
  scale_color_brewer(palette = "Set2") +
  stat_ellipse(
    aes(group = type, color = type),
    type = "norm",
    linewidth = 0.8,
    level = 0.95,
    show.legend = FALSE
  ) +
  theme_classic(base_size = 13) +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text = element_text(size = 19, color = "black"),
    axis.line = element_line(linewidth = 0.7, color = "black"),
    axis.ticks = element_line(linewidth = 0.6, color = "black"),
    legend.position = "right",
    legend.text = element_text(size = 15),
    legend.key = element_blank(),
    legend.title = element_text(size = 19)
  )
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

OUT_PATH <- getwd()
ggsave(paste0(OUT_PATH, Sys.Date(), "-pca",  ".tiff"), width = 8, height = 8, dpi = 300, compression = "lzw")
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?