Hier müsste man mal jemand anderen fragen
set.seed(99)
x <- matrix(rnorm(15), 5, 3)
prin <- princomp(x)
summary(prin)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.6671 0.55137 0.2682
## Proportion of Variance 0.8808 0.09636 0.0228
## Cumulative Proportion 0.8808 0.97720 1.0000
prin$laodings
## NULL
prin$scores
## Comp.1 Comp.2 Comp.3
## [1,] -0.2853 -0.4550 -5.001e-06
## [2,] -1.7206 0.8594 1.899e-01
## [3,] -1.8083 -0.4837 -2.607e-01
## [4,] 1.5139 -0.3710 3.911e-01
## [5,] 2.3004 0.4503 -3.203e-01
pr <- prcomp(x)
pr$rotation
## PC1 PC2 PC3
## [1,] 0.08693 -0.03664 -0.99554
## [2,] 0.21320 -0.97549 0.05451
## [3,] 0.97313 0.21699 0.07699
predict(pr)
## PC1 PC2 PC3
## [1,] 0.2853 -0.4550 5.001e-06
## [2,] 1.7206 0.8594 -1.899e-01
## [3,] 1.8083 -0.4837 2.607e-01
## [4,] -1.5139 -0.3710 -3.911e-01
## [5,] -2.3004 0.4503 3.203e-01
pr2 <- prcomp(t(x))
biplot(prin)
layout(matrix(1:2, 2, 1))
plot(prin$scores[, 1:2], xlim = c(-3, 3), ylim = c(-2, 2))
points(prin$loadings[, 1:2], pch = 20)
library(HSAUR)
## Loading required package: tools
data("heptathlon", package = "HSAUR")
heptathloaaAqqn$hurdles <- max(heptathlon$hurdles) - heptathlon$hurdles
## Error: object 'heptathloaaAqqn' not found
heptathlon$run200m <- max(heptathlon$run200m) - heptathlon$run200m
heptathlon$run800m <- max(heptathlon$run800m) - heptathlon$run800m
score <- which(colnames(heptathlon) == "score")
plot(heptathlon[, -score])
round(cor(heptathlon[, -score]), 2)
## hurdles highjump shot run200m longjump javelin run800m
## hurdles 1.00 -0.81 -0.65 -0.77 -0.91 -0.01 -0.78
## highjump -0.81 1.00 0.44 0.49 0.78 0.00 0.59
## shot -0.65 0.44 1.00 0.68 0.74 0.27 0.42
## run200m -0.77 0.49 0.68 1.00 0.82 0.33 0.62
## longjump -0.91 0.78 0.74 0.82 1.00 0.07 0.70
## javelin -0.01 0.00 0.27 0.33 0.07 1.00 -0.02
## run800m -0.78 0.59 0.42 0.62 0.70 -0.02 1.00
Negativ d.h. mehr Score –> weniger sprünge o.ä.
heptathlon_pca <- prcomp(heptathlon[, -score], scale = TRUE)
print(heptathlon_pca)
## Standard deviations:
## [1] 2.1119 1.0928 0.7218 0.6761 0.4952 0.2701 0.2214
##
## Rotation:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## hurdles 0.45287 -0.15792 -0.04515 0.02654 -0.09495 -0.78334 -0.38025
## highjump -0.37720 0.24807 0.36778 -0.67999 -0.01880 -0.09940 -0.43393
## shot -0.36307 -0.28941 -0.67619 -0.12432 -0.51165 0.05086 -0.21762
## run200m -0.40790 -0.26039 -0.08359 0.36107 0.64983 -0.02496 -0.45338
## longjump -0.45623 0.05587 -0.13932 -0.11129 0.18430 -0.59021 0.61206
## javelin -0.07541 -0.84169 0.47156 -0.12080 -0.13511 0.02724 0.17295
## run800m -0.37496 0.22449 0.39586 0.60341 -0.50432 -0.15556 -0.09831
plot(heptathlon_pca)
summary(heptathlon_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.112 1.093 0.7218 0.6761 0.495 0.2701 0.221
## Proportion of Variance 0.637 0.171 0.0744 0.0653 0.035 0.0104 0.007
## Cumulative Proportion 0.637 0.808 0.8822 0.9475 0.983 0.9930 1.000
a1 <- heptathlon_pca$rotation[, 1]
a1
## hurdles highjump shot run200m longjump javelin run800m
## 0.45287 -0.37720 -0.36307 -0.40790 -0.45623 -0.07541 -0.37496
#### Extracting the first principal component score from all pre-computed
#### principal components
predict(heptathlon_pca)[, 1]
## Joyner-Kersee (USA) John (GDR) Behmer (GDR)
## -4.121448 -2.882186 -2.649634
## Sablovskaite (URS) Choubenkova (URS) Schulz (GDR)
## -1.343351 -1.359026 -1.043847
## Fleming (AUS) Greiner (USA) Lajbnerova (CZE)
## -1.100386 -0.923174 -0.530251
## Bouraga (URS) Wijnsma (HOL) Dimitrova (BUL)
## -0.759819 -0.556268 -1.186454
## Scheider (SWI) Braun (FRG) Ruotsalainen (FIN)
## 0.015461 0.003774 0.090748
## Yuping (CHN) Hagger (GB) Brown (USA)
## -0.137225 0.171129 0.519253
## Mulliner (GB) Hautenauve (BEL) Kytola (FIN)
## 1.125482 1.085698 1.447055
## Geremias (BRA) Hui-Ing (TAI) Jeong-Mi (KOR)
## 2.014030 2.880299 2.970119
## Launa (PNG)
## 6.270022
heptathlon_pca$x[, 1]
## Joyner-Kersee (USA) John (GDR) Behmer (GDR)
## -4.121448 -2.882186 -2.649634
## Sablovskaite (URS) Choubenkova (URS) Schulz (GDR)
## -1.343351 -1.359026 -1.043847
## Fleming (AUS) Greiner (USA) Lajbnerova (CZE)
## -1.100386 -0.923174 -0.530251
## Bouraga (URS) Wijnsma (HOL) Dimitrova (BUL)
## -0.759819 -0.556268 -1.186454
## Scheider (SWI) Braun (FRG) Ruotsalainen (FIN)
## 0.015461 0.003774 0.090748
## Yuping (CHN) Hagger (GB) Brown (USA)
## -0.137225 0.171129 0.519253
## Mulliner (GB) Hautenauve (BEL) Kytola (FIN)
## 1.125482 1.085698 1.447055
## Geremias (BRA) Hui-Ing (TAI) Jeong-Mi (KOR)
## 2.014030 2.880299 2.970119
## Launa (PNG)
## 6.270022
center <- heptathlon_pca$center
scale <- heptathlon_pca$scale
hm <- as.matrix(heptathlon[, -score])
drop((scale(hm, center = center, scale = scale) %*% heptathlon_pca$rotation)[,
1])
## Joyner-Kersee (USA) John (GDR) Behmer (GDR)
## -4.121448 -2.882186 -2.649634
## Sablovskaite (URS) Choubenkova (URS) Schulz (GDR)
## -1.343351 -1.359026 -1.043847
## Fleming (AUS) Greiner (USA) Lajbnerova (CZE)
## -1.100386 -0.923174 -0.530251
## Bouraga (URS) Wijnsma (HOL) Dimitrova (BUL)
## -0.759819 -0.556268 -1.186454
## Scheider (SWI) Braun (FRG) Ruotsalainen (FIN)
## 0.015461 0.003774 0.090748
## Yuping (CHN) Hagger (GB) Brown (USA)
## -0.137225 0.171129 0.519253
## Mulliner (GB) Hautenauve (BEL) Kytola (FIN)
## 1.125482 1.085698 1.447055
## Geremias (BRA) Hui-Ing (TAI) Jeong-Mi (KOR)
## 2.014030 2.880299 2.970119
## Launa (PNG)
## 6.270022
cor(heptathlon$score, heptathlon_pca$x[, 1])
## [1] -0.9911
plot(heptathlon$score, heptathlon_pca$x[, 1])
biplot(heptathlon_pca, col = c("gray", "black"))
Cancer cell line data Load the tumor cell line data from module 4 and do PCA:
load("~/Dropbox/Uni/Master/HT_Course/Module_4_ANOVA/tumorCellLines_anova.RData")
tpcaTumors = prcomp(t(tumorCellLines), center = T, scale = T)
summary(tpcaTumors)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 32.646 29.844 19.9477 18.0815 15.9164 14.9927
## Proportion of Variance 0.187 0.156 0.0697 0.0573 0.0444 0.0394
## Cumulative Proportion 0.187 0.343 0.4123 0.4696 0.5139 0.5533
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 14.4040 13.8198 12.9850 12.8974 12.7450 12.0700
## Proportion of Variance 0.0363 0.0335 0.0295 0.0291 0.0284 0.0255
## Cumulative Proportion 0.5896 0.6231 0.6526 0.6817 0.7102 0.7357
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 12.0122 11.6009 11.2489 11.0931 10.7450 10.6577
## Proportion of Variance 0.0253 0.0236 0.0222 0.0215 0.0202 0.0199
## Cumulative Proportion 0.7610 0.7845 0.8067 0.8283 0.8485 0.8684
## PC19 PC20 PC21 PC22 PC23 PC24 PC25
## Standard deviation 10.5118 10.2072 9.8323 9.6369 9.2773 9.0553 8.6450
## Proportion of Variance 0.0193 0.0182 0.0169 0.0163 0.0151 0.0144 0.0131
## Cumulative Proportion 0.8877 0.9060 0.9229 0.9392 0.9542 0.9686 0.9817
## PC26 PC27 PC28
## Standard deviation 8.1777 6.13620 3.63e-14
## Proportion of Variance 0.0117 0.00659 0.00e+00
## Cumulative Proportion 0.9934 1.00000 1.00e+00
plot(tpcaTumors)
0.48 + 0.11
coln <- colnames(tumorCellLines)
coln_split <- strsplit(coln, split = "\\.")
cancerType <- sapply(coln_split, function(x) x[1])
fac <- factor(cancerType)
print(fac)
## [1] CO CO CO CO CO CO CO ME ME ME ME ME ME ME OV OV OV OV OV OV OV RE RE
## [24] RE RE RE RE RE
## Levels: CO ME OV RE
plot(tpcaTumors$x[, 1], tpcaTumors$x[, 2])
plot(predict(tpcaTumors), col = as.numeric(fac), pch = as.numeric(fac))
legend("bottomright", legend = levels(fac), col = unique(as.numeric(fac)), pch = unique(as.numeric(fac)))