expr_matrix_file = "test.matrix" #Trinity_trans.TMM.EXPR.matrix"
data = read.table(expr_matrix_file)
log2data = log2(data+1)

Scale the expression data in the columns according to Z-scores and center at zero.

scaled_log2data = scale(log2data, center=T, scale=T)

Now center the expression for each gene

pca_data = t(scale(t(scaled_log2data), center=T, scale=F))

Consider remvoing those genes that have low variance

trans_sd = apply(pca_data, 1, sd)
hist(trans_sd, br=30)

Running PCA

Running PCA using svd

First, transpose the matrix so that the genes (the observations) are the columns and the samples are the rows.

pca_data = t(pca_data)

Now, following how the ‘prcomp’ works under the hood: https://svn.r-project.org/R/trunk/src/library/stats/R/prcomp.R

s = svd(pca_data, nu=0)
sdev = s$d / sqrt(max(1, nrow(pca_data) - 1))
rotation = s$v   # eigenvalues or weights or loadings
pc_scores =  pca_data %*% rotation

head(rotation)
##              [,1]         [,2]        [,3]         [,4]         [,5]
## [1,]  0.015380900  0.115042231 -0.04071386 -0.016166758  0.026375262
## [2,]  0.063644195  0.117248462 -0.01831308  0.011639030  0.032484166
## [3,] -0.137278043  0.059009336 -0.18996903  0.064540927 -0.001172617
## [4,] -0.156316119 -0.076723777 -0.01728652  0.023962771 -0.026674647
## [5,]  0.001916858 -0.048681511 -0.05613182 -0.012680826  0.012019248
## [6,] -0.032518032 -0.004278177  0.01154936 -0.002433161  0.009549742
##               [,6]         [,7]         [,8]        [,9]
## [1,]  0.0279106064 -0.016828122 -0.048915677  0.08417830
## [2,]  0.0354375099 -0.006320862 -0.049945737  0.09110938
## [3,] -0.0691692466  0.198356624 -0.003011987 -0.41107232
## [4,] -0.0843086959 -0.073580533 -0.022794484  0.11443270
## [5,]  0.0885567243 -0.147198369  0.066128228 -0.58500436
## [6,] -0.0004730328  0.010681301 -0.018931128 -0.27937977
head(pc_scores)
##                [,1]       [,2]        [,3]       [,4]        [,5]
## wt_37_1   -6.717970  0.7209640  1.10175522  1.2286432  2.64543507
## wt_37_2   -6.924955  0.2120236 -0.87416865 -0.4582072 -0.72656328
## wt_37_3   -7.269614  0.3572181 -0.07139531 -0.6523621 -1.83772671
## wt_GSNO_1  2.957313 -3.0995315  3.23410554 -1.5778476  0.04694307
## wt_GSNO_2  3.127170 -3.9599796  0.30566905  2.9802351 -1.17675164
## wt_GSNO_3  2.543771 -4.5140454 -3.02848432 -1.3970307  1.16249076
##                  [,6]       [,7]       [,8]         [,9]
## wt_37_1   -0.27315091  0.6200814 -0.2403687 7.632783e-16
## wt_37_2    1.72318626 -1.6070618 -0.6000073 6.561592e-16
## wt_37_3   -1.25805546  0.9670404  0.9112224 2.341877e-17
## wt_GSNO_1 -0.86443777 -0.9196316 -0.2327268 9.799561e-16
## wt_GSNO_2  0.50682366  0.2246499  0.0313378 1.017957e-15
## wt_GSNO_3 -0.01217303  0.5283262  0.1151434 6.533402e-16
plot(pc_scores[,1], pc_scores[,2])

Of course, you could just run the ‘prcomp’ like so:

pca = prcomp(pca_data)
rotation = pca$rotation
pc_scores = pca$x

head(rotation)
##                          PC1          PC2         PC3          PC4
## TR24|c0_g1_i1    0.015380900  0.115042231 -0.04071386 -0.016166758
## TR2779|c0_g1_i1  0.063644195  0.117248462 -0.01831308  0.011639030
## TR127|c1_g1_i1  -0.137278043  0.059009336 -0.18996903  0.064540927
## TR2107|c1_g1_i1 -0.156316119 -0.076723777 -0.01728652  0.023962771
## TR2011|c5_g1_i1  0.001916858 -0.048681511 -0.05613182 -0.012680826
## TR4163|c0_g1_i1 -0.032518032 -0.004278177  0.01154936 -0.002433161
##                          PC5           PC6          PC7          PC8
## TR24|c0_g1_i1    0.026375262  0.0279106064 -0.016828122 -0.048915677
## TR2779|c0_g1_i1  0.032484166  0.0354375099 -0.006320862 -0.049945737
## TR127|c1_g1_i1  -0.001172617 -0.0691692466  0.198356624 -0.003011987
## TR2107|c1_g1_i1 -0.026674647 -0.0843086959 -0.073580533 -0.022794484
## TR2011|c5_g1_i1  0.012019248  0.0885567243 -0.147198369  0.066128228
## TR4163|c0_g1_i1  0.009549742 -0.0004730328  0.010681301 -0.018931128
##                         PC9
## TR24|c0_g1_i1    0.01884967
## TR2779|c0_g1_i1  0.10910829
## TR127|c1_g1_i1  -0.44206123
## TR2107|c1_g1_i1  0.31352682
## TR2011|c5_g1_i1 -0.71080312
## TR4163|c0_g1_i1 -0.31861443
head(pc_scores)
##                 PC1        PC2         PC3        PC4         PC5
## wt_37_1   -6.717970  0.7209640  1.10175522  1.2286432  2.64543507
## wt_37_2   -6.924955  0.2120236 -0.87416865 -0.4582072 -0.72656328
## wt_37_3   -7.269614  0.3572181 -0.07139531 -0.6523621 -1.83772671
## wt_GSNO_1  2.957313 -3.0995315  3.23410554 -1.5778476  0.04694307
## wt_GSNO_2  3.127170 -3.9599796  0.30566905  2.9802351 -1.17675164
## wt_GSNO_3  2.543771 -4.5140454 -3.02848432 -1.3970307  1.16249076
##                   PC6        PC7        PC8           PC9
## wt_37_1   -0.27315091  0.6200814 -0.2403687  1.158145e-15
## wt_37_2    1.72318626 -1.6070618 -0.6000073  8.080559e-16
## wt_37_3   -1.25805546  0.9670404  0.9112224  1.113259e-15
## wt_GSNO_1 -0.86443777 -0.9196316 -0.2327268 -5.809223e-16
## wt_GSNO_2  0.50682366  0.2246499  0.0313378  1.206893e-15
## wt_GSNO_3 -0.01217303  0.5283262  0.1151434 -5.705614e-16
plot(pc_scores[,1], pc_scores[,2])

Running PCA using Eigen

Another way to do this the longer way is to use eigen to extract the eigenvalues and eigenvectors from a correlation matrix

trans_cov_matrix = cov(pca_data)
trans_cov_matrix[1:5,1:5] # just for show
##                 TR24|c0_g1_i1 TR2779|c0_g1_i1 TR127|c1_g1_i1
## TR24|c0_g1_i1      0.15188295      0.17170011     0.02597495
## TR2779|c0_g1_i1    0.17170011      0.25956606    -0.16405131
## TR127|c1_g1_i1     0.02597495     -0.16405131     0.73150779
## TR2107|c1_g1_i1   -0.15814030     -0.37080771     0.55359069
## TR2011|c5_g1_i1   -0.04382272     -0.04770746    -0.04861672
##                 TR2107|c1_g1_i1 TR2011|c5_g1_i1
## TR24|c0_g1_i1       -0.15814030     -0.04382272
## TR2779|c0_g1_i1     -0.37080771     -0.04770746
## TR127|c1_g1_i1       0.55359069     -0.04861672
## TR2107|c1_g1_i1      0.75789618      0.03199903
## TR2011|c5_g1_i1      0.03199903      0.07460541
myEig = eigen(trans_cov_matrix)
names(myEig)
## [1] "values"  "vectors"
#head(myEig$vectors)
scores = pca_data %*% myEig$vectors
scores[,1:15]
##                [,1]       [,2]        [,3]       [,4]        [,5]
## wt_37_1   -6.717970 -0.7209640 -1.10175522 -1.2286432 -2.64543507
## wt_37_2   -6.924955 -0.2120236  0.87416865  0.4582072  0.72656328
## wt_37_3   -7.269614 -0.3572181  0.07139531  0.6523621  1.83772671
## wt_GSNO_1  2.957313  3.0995315 -3.23410554  1.5778476 -0.04694307
## wt_GSNO_2  3.127170  3.9599796 -0.30566905 -2.9802351  1.17675164
## wt_GSNO_3  2.543771  4.5140454  3.02848432  1.3970307 -1.16249076
## wt_ph8_1   4.322625 -3.0894574 -1.12777727  0.9942290  0.25213595
## wt_ph8_2   4.377269 -3.5323629  0.53911967 -0.5235525 -0.70512047
## wt_ph8_3   3.584390 -3.6615306  1.25613912 -0.3472459  0.56681179
##                  [,6]         [,7]        [,8]          [,9]         [,10]
## wt_37_1   -0.27315091  0.620081425 -0.24036870  9.107298e-18 -5.142750e-15
## wt_37_2    1.72318626 -1.607061817 -0.60000732  1.426376e-15 -5.995665e-15
## wt_37_3   -1.25805546  0.967040377  0.91122239 -1.443290e-15 -1.029206e-15
## wt_GSNO_1 -0.86443777 -0.919631635 -0.23272681  8.413409e-17  9.834053e-15
## wt_GSNO_2  0.50682366  0.224649859  0.03133780  1.245965e-15  2.790184e-15
## wt_GSNO_3 -0.01217303  0.528326169  0.11514339  1.455650e-15 -8.487595e-15
## wt_ph8_1   1.95376326  1.547826894 -0.04378211 -1.973248e-16  2.621248e-15
## wt_ph8_2  -0.15742830 -1.351335589  1.42379318 -2.094679e-16  4.785397e-15
## wt_ph8_3  -1.61852770 -0.009895683 -1.36461182 -3.400058e-16  3.783866e-17
##                   [,11]         [,12]         [,13]         [,14]
## wt_37_1    8.456777e-16 -2.537033e-15 -7.502679e-16  7.615436e-16
## wt_37_2    6.062859e-16 -5.195497e-16 -8.153200e-17  5.577136e-16
## wt_37_3    9.679757e-16 -2.173609e-15 -1.961972e-15  8.743006e-16
## wt_GSNO_1  2.023989e-15  4.684946e-15  7.927686e-16 -6.292709e-16
## wt_GSNO_2  1.346579e-16  1.796848e-15  1.734723e-16 -4.098284e-16
## wt_GSNO_3  7.602426e-16 -3.240030e-15  6.409803e-16  3.035766e-17
## wt_ph8_1  -8.907805e-16  6.756748e-16  1.186551e-15 -3.764350e-16
## wt_ph8_2  -2.470246e-15  1.885644e-15  4.926615e-16 -1.368697e-15
## wt_ph8_3  -1.434616e-15 -1.150989e-15  7.112366e-17 -9.523632e-16
##                   [,15]
## wt_37_1    1.731254e-15
## wt_37_2    3.070461e-16
## wt_37_3   -6.071532e-16
## wt_GSNO_1 -4.774826e-15
## wt_GSNO_2 -3.472049e-15
## wt_GSNO_3  1.674876e-15
## wt_ph8_1   2.067790e-15
## wt_ph8_2   1.318390e-16
## wt_ph8_3   3.665471e-15
plot(scores[,1], scores[,2])