library(pcaMethods)
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.5.3
## Loading required package: BiocGenerics
## Loading required package: generics
##
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
##
## as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
## setequal, union
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
## unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'pcaMethods'
## The following object is masked from 'package:stats':
##
## loadings
library(ggplot2)
## use an expressionset and ggplot
data(sample.ExpressionSet)
#view(as.data.frame(sample.ExpressionSet))
dim(as.data.frame(sample.ExpressionSet)) #[1] 26 503
## [1] 26 503
as.data.frame(sample.ExpressionSet)[, 1:10]
## AFFX.MurIL2_at AFFX.MurIL10_at AFFX.MurIL4_at AFFX.MurFAS_at AFFX.BioB.5_at
## A 192.7420 97.13700 45.81920 22.54450 96.7875
## B 85.7533 126.19600 8.83135 3.60093 30.4380
## C 176.7570 77.92160 33.06320 14.68830 46.1271
## D 135.5750 93.37130 28.70720 12.33970 70.9319
## E 64.4939 24.39860 5.94492 36.86630 56.1744
## F 76.3569 85.50880 28.29250 11.25680 42.6756
## G 160.5050 98.90860 30.96940 23.00340 86.5156
## H 65.9631 81.69320 14.79230 16.21340 30.7927
## I 56.9039 97.80150 14.23990 12.03750 19.7183
## J 135.6080 90.48380 34.48740 4.54978 46.3520
## K 63.4432 70.57330 20.35210 8.51782 39.1326
## L 78.2126 94.54180 14.15540 27.28520 41.7698
## M 83.0943 75.34550 20.62510 10.16160 80.2197
## N 89.3372 68.58270 15.92310 20.24880 36.4903
## O 91.0615 87.40500 20.15790 15.78490 36.4021
## P 95.9377 84.45810 27.81390 14.32760 35.3054
## Q 179.8450 87.68060 32.79110 15.94880 58.6239
## R 152.4670 108.03200 33.52920 14.67530 114.0620
## S 180.8340 134.26300 19.81720 -7.91911 93.4402
## T 85.4146 91.40310 20.41900 12.88750 22.5168
## U 157.9890 -8.68811 26.87200 11.91860 48.6462
## V 146.8000 85.02120 31.14880 12.83240 90.2215
## W 93.8829 79.29980 22.34200 11.13900 42.0053
## X 103.8550 71.65520 19.01350 7.55564 57.5738
## Y 64.4340 64.23690 12.16860 19.98490 44.8216
## Z 175.6150 78.70680 17.37800 8.96849 61.7044
## AFFX.BioB.M_at AFFX.BioB.3_at AFFX.BioC.5_at AFFX.BioC.3_at AFFX.BioDn.5_at
## A 89.0730 265.964 110.1360 43.0794 10.918700
## B 25.8461 181.080 57.2889 16.8006 16.178900
## C 57.2033 164.926 67.3980 37.6002 10.149500
## D 69.9766 161.469 77.2207 46.5272 9.736390
## E 49.5822 236.976 41.3488 22.2475 16.902800
## F 26.1262 156.803 37.9780 61.6401 5.333280
## G 75.0083 211.257 110.5510 33.6623 25.118200
## H 42.3352 235.994 47.7690 31.4423 38.757600
## I 41.1207 175.640 24.7875 23.1008 31.404100
## J 91.5307 229.671 66.7302 39.7419 0.398779
## K 39.9136 222.287 62.9876 35.1225 2.981670
## L 49.8397 181.522 46.2777 28.1342 17.750600
## M 63.4794 177.979 61.8372 20.6908 13.764000
## N 24.7007 105.778 54.7061 27.6193 12.904700
## O 47.4641 223.689 62.0684 40.6454 13.390200
## P 47.3578 183.585 40.6705 35.5333 -6.861960
## Q 58.1331 192.221 53.2711 57.5078 21.509100
## R 104.1220 305.567 107.2370 41.1337 3.105360
## S 115.8310 300.689 119.6660 79.9829 5.953470
## T 58.1224 146.081 24.0654 23.4953 5.660120
## U 73.4221 142.913 98.8425 51.5609 52.933800
## V 64.6066 187.132 92.0846 48.1247 15.726700
## W 40.3068 170.583 53.3866 31.8358 15.211600
## X 41.8209 133.279 52.0164 29.9264 -5.352820
## Y 46.1087 187.407 65.9154 37.8611 13.188400
## Z 49.4122 144.784 75.0043 60.4772 10.038500
df1 <- as.data.frame(sample.ExpressionSet)
df1$AFFX.MurIL10_at[1:10] <- NA
df1$AFFX.MurIL10_at
## [1] NA NA NA NA NA NA NA
## [8] NA NA NA 70.57330 94.54180 75.34550 68.58270
## [15] 87.40500 84.45810 87.68060 108.03200 134.26300 91.40310 -8.68811
## [22] 85.02120 79.29980 71.65520 64.23690 78.70680
listPcaMethods()
## [1] "svd" "nipals" "rnipals" "bpca" "ppca"
## [6] "svdImpute" "robustPca" "nlpca" "llsImpute" "llsImputeAll"
pc <- pca(df1, method = "bpca")
summary(pc)
## bpca calculated PCA
## Importance of component(s):
## PC1 PC2
## R2 0.3439 4.066e-06
## Cumulative R2 0.3439 3.439e-01
dim(sample.ExpressionSet)
## Features Samples
## 500 26
dim(scores(pc))
## [1] 26 2
scores(pc)
## PC1 PC2
## A -0.78646551 0.0048764855
## B 0.39651901 0.0605168411
## C -1.88761888 -0.0076183262
## D -0.42858966 -0.0739140462
## E 0.68209899 0.0091281260
## F -0.60657188 0.0312675446
## G -2.68820076 0.0370703917
## H 1.56117614 0.0365043158
## I 1.42197297 0.0255188834
## J -1.67699935 -0.0538034618
## K 2.13215214 0.0368110823
## L 1.24061939 0.0114482820
## M -0.87032313 -0.0352918241
## N 2.61174797 0.0084116936
## O -1.85287328 -0.0200374761
## P 1.08101664 0.0630593818
## Q 1.74370197 -0.0498231958
## R -3.61803501 0.0269920628
## S 0.52708577 -0.0763383841
## T 2.20872215 -0.0021950332
## U -2.32406057 -0.0028567229
## V -2.76883614 0.0004794694
## W 0.03068859 0.0351104907
## X 1.25976843 -0.0573134710
## Y -0.63725495 0.0171319859
## Z 3.24855602 -0.0251349482
##############################################
score_df <- as.data.frame(scores(pc))
score_df$sample <- rownames(score_df)
meta_df <- pData(sample.ExpressionSet)
meta_df$sample <- rownames(meta_df)
df <- merge(score_df, meta_df, by = "sample")
df
## sample PC1 PC2 sex type score
## 1 A -0.78646551 0.0048764855 Female Control 0.75
## 2 B 0.39651901 0.0605168411 Male Case 0.40
## 3 C -1.88761888 -0.0076183262 Male Control 0.73
## 4 D -0.42858966 -0.0739140462 Male Case 0.42
## 5 E 0.68209899 0.0091281260 Female Case 0.93
## 6 F -0.60657188 0.0312675446 Male Control 0.22
## 7 G -2.68820076 0.0370703917 Male Case 0.96
## 8 H 1.56117614 0.0365043158 Male Case 0.79
## 9 I 1.42197297 0.0255188834 Female Case 0.37
## 10 J -1.67699935 -0.0538034618 Male Control 0.63
## 11 K 2.13215214 0.0368110823 Male Case 0.26
## 12 L 1.24061939 0.0114482820 Female Control 0.36
## 13 M -0.87032313 -0.0352918241 Male Case 0.41
## 14 N 2.61174797 0.0084116936 Male Case 0.80
## 15 O -1.85287328 -0.0200374761 Female Case 0.10
## 16 P 1.08101664 0.0630593818 Female Control 0.41
## 17 Q 1.74370197 -0.0498231958 Female Case 0.16
## 18 R -3.61803501 0.0269920628 Male Control 0.72
## 19 S 0.52708577 -0.0763383841 Male Case 0.17
## 20 T 2.20872215 -0.0021950332 Female Case 0.74
## 21 U -2.32406057 -0.0028567229 Male Control 0.35
## 22 V -2.76883614 0.0004794694 Female Control 0.77
## 23 W 0.03068859 0.0351104907 Male Control 0.27
## 24 X 1.25976843 -0.0573134710 Male Control 0.98
## 25 Y -0.63725495 0.0171319859 Female Case 0.94
## 26 Z 3.24855602 -0.0251349482 Female Case 0.32
###############################################
head(df)
## sample PC1 PC2 sex type score
## 1 A -0.7864655 0.004876486 Female Control 0.75
## 2 B 0.3965190 0.060516841 Male Case 0.40
## 3 C -1.8876189 -0.007618326 Male Control 0.73
## 4 D -0.4285897 -0.073914046 Male Case 0.42
## 5 E 0.6820990 0.009128126 Female Case 0.93
## 6 F -0.6065719 0.031267545 Male Control 0.22
library(ggplot2)
df$type <- as.factor(df$type)
str(df)
## 'data.frame': 26 obs. of 6 variables:
## $ sample: chr "A" "B" "C" "D" ...
## $ PC1 : num -0.786 0.397 -1.888 -0.429 0.682 ...
## $ PC2 : num 0.00488 0.06052 -0.00762 -0.07391 0.00913 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 2 1 2 ...
## $ type : Factor w/ 2 levels "Case","Control": 2 1 2 1 1 2 1 1 1 2 ...
## $ score : num 0.75 0.4 0.73 0.42 0.93 0.22 0.96 0.79 0.37 0.63 ...
ggplot(df, aes(PC1, PC2, shape = sex, color = type)) +
geom_point(size = 3) +
xlab(paste0("PC1 (", round(pc@R2[1] * 100, 1), "%)")) +
ylab(paste0("PC2 (", round(pc@R2[2] * 100, 1), "%)")) +
ggrepel::geom_text_repel(
aes(label = sample),
size = 3,
max.overlaps = 30,
box.padding = 0.3,
point.padding = 0.2,
show.legend = FALSE
) +
#coord_equal() +
scale_color_brewer(palette = "Set2") +
stat_ellipse(
aes(group = type, color = type),
type = "norm",
linewidth = 0.8,
level = 0.95,
show.legend = FALSE
) +
theme_classic(base_size = 13) +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
axis.title = element_text(size = 13, face = "bold"),
axis.text = element_text(size = 19, color = "black"),
axis.line = element_line(linewidth = 0.7, color = "black"),
axis.ticks = element_line(linewidth = 0.6, color = "black"),
legend.position = "right",
legend.text = element_text(size = 15),
legend.key = element_blank(),
legend.title = element_text(size = 19)
)
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

OUT_PATH <- getwd()
ggsave(paste0(OUT_PATH, Sys.Date(), "-pca", ".tiff"), width = 8, height = 8, dpi = 300, compression = "lzw")
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?