library(MASS)
data(crabs)
summary(crabs)
## sp sex index FL RW
## B:100 F:100 Min. : 1.0 Min. : 7.2 Min. : 6.5
## O:100 M:100 1st Qu.:13.0 1st Qu.:12.9 1st Qu.:11.0
## Median :25.5 Median :15.6 Median :12.8
## Mean :25.5 Mean :15.6 Mean :12.7
## 3rd Qu.:38.0 3rd Qu.:18.1 3rd Qu.:14.3
## Max. :50.0 Max. :23.1 Max. :20.2
## CL CW BD
## Min. :14.7 Min. :17.1 Min. : 6.1
## 1st Qu.:27.3 1st Qu.:31.5 1st Qu.:11.4
## Median :32.1 Median :36.8 Median :13.9
## Mean :32.1 Mean :36.4 Mean :14.0
## 3rd Qu.:37.2 3rd Qu.:42.0 3rd Qu.:16.6
## Max. :47.6 Max. :54.6 Max. :21.6
nam <- paste(crabs[, 1], crabs[, 2], sep = ".")
fac = as.factor(nam)
boxplot(crabs[, 4:6])
mydataframe = data.frame(mytype = as.factor(fac), crabs[, 4:8])
boxplot(FL ~ mytype, mydataframe, col = c(2, 3, 4, 7))
plot(crabs$FL, crabs$RW, pch = as.numeric(fac), col = as.numeric(fac))
pairs(crabs[, -c(1:3)], col = as.numeric((fac)), pch = as.numeric(fac))
We observe like in the plots above that there is something wrong with RW
library("stats")
pcaCrabs = princomp(crabs[, 4:8])
summary(pcaCrabs)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 11.8323 1.135937 0.997631 0.3669098 0.278433
## Proportion of Variance 0.9825 0.009055 0.006984 0.0009447 0.000544
## Cumulative Proportion 0.9825 0.991527 0.998511 0.9994560 1.000000
plot(pcaCrabs)
pcaCrabs = princomp(scale(crabs[, 4:8]))
summary(pcaCrabs)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.1829 0.38849 0.215406 0.105260 0.0412689
## Proportion of Variance 0.9578 0.03034 0.009327 0.002227 0.0003423
## Cumulative Proportion 0.9578 0.98810 0.997431 0.999658 1.0000000
plot(pcaCrabs)
in a pca the original variables doesnt exist anymore
biplot(pcaCrabs, xlabs = fac)
plot(pcaCrabs$scores[, 1:2], col = as.numeric(fac), pch = as.numeric(fac))
# biplot(pcaCrabs,choices= c(2,3),xlabs= as.numeric(fac))
# biplot(pcaCrabs,choices= c(1,4),xlabs= as.numeric(fac))
unique(as.numeric(fac))
## [1] 2 1 4 3
plot(pcaCrabs$scores[, c(1, 4)], col = as.numeric(fac), pch = as.numeric(fac))
legend("topright", legend = levels(fac), col = c(2, 1, 4, 3), pch = c(2, 1,
4, 3))
plot(pcaCrabs$scores[, c(1, 3)], col = as.numeric(fac), pch = as.numeric(fac))
legend("topright", legend = levels(fac), col = c(2, 1, 4, 3), pch = c(2, 1,
4, 3))
plot(pcaCrabs$scores[, c(2, 3)], col = as.numeric(fac), pch = as.numeric(fac))
legend("topright", legend = levels(fac), col = c(2, 1, 4, 3), pch = c(2, 1,
4, 3))
biplot(pcaCrabs, choices = c(2, 3), xlabs = as.numeric(fac))
loadings(pcaCrabs)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## FL -0.452 -0.138 0.531 0.697
## RW -0.428 0.898
## CL -0.453 -0.268 -0.310 -0.792
## CW -0.451 -0.181 -0.653 0.575
## BD -0.451 -0.264 0.443 -0.707 0.176
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## SS loadings 1.0 1.0 1.0 1.0 1.0
## Proportion Var 0.2 0.2 0.2 0.2 0.2
## Cumulative Var 0.2 0.4 0.6 0.8 1.0
# source('http://bioconductor.org/biocLite.R') biocLite()
library("Biobase")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist
##
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
load("~/Dropbox/Uni/Master/HT_Course/Module_6_DataReduction/cellcycle.RData")
pcaCho <- prcomp(exprs(yeast))
plot(pcaCho, n = 17)
summary(pcaCho)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.762 1.628 1.451 1.2111 1.1459 0.9727 0.8654
## Proportion of Variance 0.195 0.167 0.132 0.0922 0.0825 0.0595 0.0471
## Cumulative Proportion 0.195 0.362 0.494 0.5860 0.6685 0.7280 0.7751
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.8263 0.724 0.6975 0.6740 0.6105 0.5794 0.5227
## Proportion of Variance 0.0429 0.033 0.0306 0.0285 0.0234 0.0211 0.0172
## Cumulative Proportion 0.8180 0.851 0.8815 0.9101 0.9335 0.9546 0.9717
## PC15 PC16 PC17
## Standard deviation 0.4906 0.4571 4.48e-16
## Proportion of Variance 0.0151 0.0131 0.00e+00
## Cumulative Proportion 0.9869 1.0000 1.00e+00
biplot(pcaCho)
biplot(pcaCho, choices = c(1, 3), cex = 0.5)