PCA using TEs

I used the following bash code to generate the matrix:

COLUMNS=()
for STRAIN in $(tr '\n' ' ' < /homes/users/lmateo/gonzalez_lab/Tlex_results/Individual_Italy_Swedish/strains.list); do
  #tail -n +2 /homes/users/lmateo/gonzalez_lab/Tlex_results/Individual_Italy_Swedish/${STRAIN}/tlex_tklcl1/Tresults | awk 'BEGIN{status="N"; print "TE","'$STRAIN'"}{if ($5 =="present"){status=1} else if ($5=="absent"){status=0} else if ($5=="polymorphic"){status=0.5} else {status="NA"}; print $2, status}' | (read -r; printf "%s\n" "$REPLY"; sort) > ${STRAIN}.col
    COLUMNS+="$STRAIN.col "
done;
paste $COLUMNS > IndividualStr.full.mat
awk '{for (i=3;i<=NF;i+=2) $i="" }1' IndividualStr.full.mat > IndividualStr.clean.mat

I load the matrix:

rootdir="/Users/lidia/Documents/PCA_TEs"
TEtable<-read.table(file.path(rootdir,"IndividualStr.clean.noNA.mat"), header=TRUE, sep = "\t", row.names = 1, as.is=TRUE)
TE1636<-read.table(file.path(rootdir,"TE_1636.txt"), header=FALSE, col.names = c("TE"))
TEmat<-as.matrix(TEtable)
TEmatPCA<-prcomp(TEmat)
summary(TEmatPCA)
## Importance of components:
##                          PC1     PC2     PC3     PC4     PC5     PC6
## Standard deviation     1.572 0.08166 0.06625 0.06456 0.06093 0.05869
## Proportion of Variance 0.969 0.00262 0.00172 0.00164 0.00146 0.00135
## Cumulative Proportion  0.969 0.97170 0.97342 0.97505 0.97651 0.97786
##                            PC7     PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.05626 0.05462 0.05412 0.05231 0.05162 0.05073
## Proportion of Variance 0.00124 0.00117 0.00115 0.00107 0.00105 0.00101
## Cumulative Proportion  0.97910 0.98027 0.98142 0.98250 0.98354 0.98455
##                           PC13    PC14    PC15    PC16    PC17    PC18
## Standard deviation     0.04835 0.04770 0.04688 0.04565 0.04456 0.04433
## Proportion of Variance 0.00092 0.00089 0.00086 0.00082 0.00078 0.00077
## Cumulative Proportion  0.98547 0.98636 0.98722 0.98804 0.98882 0.98959
##                           PC19    PC20    PC21    PC22    PC23    PC24
## Standard deviation     0.04378 0.04313 0.04069 0.04001 0.03949 0.03843
## Proportion of Variance 0.00075 0.00073 0.00065 0.00063 0.00061 0.00058
## Cumulative Proportion  0.99034 0.99107 0.99172 0.99235 0.99296 0.99354
##                           PC25    PC26   PC27    PC28    PC29    PC30
## Standard deviation     0.03747 0.03648 0.0356 0.03394 0.03335 0.03285
## Proportion of Variance 0.00055 0.00052 0.0005 0.00045 0.00044 0.00042
## Cumulative Proportion  0.99409 0.99461 0.9951 0.99556 0.99600 0.99642
##                          PC31    PC32    PC33    PC34    PC35    PC36
## Standard deviation     0.0321 0.03155 0.03037 0.02930 0.02841 0.02705
## Proportion of Variance 0.0004 0.00039 0.00036 0.00034 0.00032 0.00029
## Cumulative Proportion  0.9968 0.99722 0.99758 0.99792 0.99823 0.99852
##                           PC37    PC38    PC39    PC40   PC41    PC42
## Standard deviation     0.02598 0.02517 0.02411 0.02354 0.0225 0.02099
## Proportion of Variance 0.00026 0.00025 0.00023 0.00022 0.0002 0.00017
## Cumulative Proportion  0.99879 0.99903 0.99926 0.99948 0.9997 0.99985
##                           PC43
## Standard deviation     0.01948
## Proportion of Variance 0.00015
## Cumulative Proportion  1.00000
plot(TEmatPCA)

plot of chunk unnamed-chunk-1

plot(TEmatPCA$x)
identify(TEmatPCA$x, labels =row.names(TEtable))

plot of chunk unnamed-chunk-1

## integer(0)
biplot(TEmatPCA)

plot of chunk unnamed-chunk-1

tTEmat<-t(TEmat)
tTEmatPCA<-prcomp(tTEmat)
color=c(rep("blue",26),rep("red",16),"blue")

summary(tTEmatPCA)
## Importance of components:
##                           PC1    PC2    PC3   PC4   PC5    PC6    PC7
## Standard deviation     0.7940 0.6428 0.6263 0.591 0.572 0.5465 0.5302
## Proportion of Variance 0.0849 0.0556 0.0528 0.047 0.044 0.0402 0.0379
## Cumulative Proportion  0.0849 0.1405 0.1933 0.240 0.284 0.3246 0.3624
##                           PC8    PC9   PC10   PC11   PC12   PC13   PC14
## Standard deviation     0.5257 0.5078 0.5007 0.4921 0.4691 0.4628 0.4548
## Proportion of Variance 0.0372 0.0347 0.0338 0.0326 0.0296 0.0288 0.0278
## Cumulative Proportion  0.3997 0.4344 0.4681 0.5007 0.5304 0.5592 0.5870
##                          PC15   PC16   PC17   PC18   PC19  PC20   PC21
## Standard deviation     0.4429 0.4324 0.4302 0.4248 0.4184 0.395 0.3883
## Proportion of Variance 0.0264 0.0252 0.0249 0.0243 0.0236 0.021 0.0203
## Cumulative Proportion  0.6135 0.6386 0.6635 0.6878 0.7114 0.732 0.7527
##                          PC22   PC23   PC24   PC25   PC26   PC27   PC28
## Standard deviation     0.3831 0.3729 0.3635 0.3539 0.3455 0.3292 0.3246
## Proportion of Variance 0.0198 0.0187 0.0178 0.0169 0.0161 0.0146 0.0142
## Cumulative Proportion  0.7725 0.7912 0.8090 0.8259 0.8419 0.8565 0.8707
##                          PC29  PC30   PC31   PC32   PC33   PC34   PC35
## Standard deviation     0.3187 0.311 0.3063 0.2948 0.2843 0.2758 0.2629
## Proportion of Variance 0.0137 0.013 0.0126 0.0117 0.0109 0.0102 0.0093
## Cumulative Proportion  0.8844 0.897 0.9101 0.9217 0.9326 0.9429 0.9522
##                           PC36    PC37    PC38    PC39    PC40    PC41
## Standard deviation     0.25206 0.24420 0.23385 0.22835 0.21862 0.20414
## Proportion of Variance 0.00855 0.00803 0.00736 0.00702 0.00644 0.00561
## Cumulative Proportion  0.96073 0.96876 0.97612 0.98314 0.98958 0.99519
##                           PC42     PC43
## Standard deviation     0.18898 4.14e-16
## Proportion of Variance 0.00481 0.00e+00
## Cumulative Proportion  1.00000 1.00e+00
plot(tTEmatPCA)

plot of chunk unnamed-chunk-1

plot(tTEmatPCA$x, col=color)
identify(tTEmatPCA$x, labels=row.names(tTEmat))

plot of chunk unnamed-chunk-1

## integer(0)
#biplot(tTEmatPCA)
barplot(tTEmatPCA$x[,1], ylab="PC1", col=color)

plot of chunk unnamed-chunk-1

fctable<-read.table(file.path(rootdir,"IndividualStr.clean.noNA.5cat.mat"), header=TRUE, sep = "\t", row.names = 1, as.is=TRUE)
fcatmat<-as.matrix(fctable)
fcatPCA<-prcomp(fcatmat)
summary(fcatPCA)
## Importance of components:
##                          PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.243 0.0936 0.07714 0.07255 0.07112 0.06971 0.0681
## Proportion of Variance 0.975 0.0017 0.00115 0.00102 0.00098 0.00094 0.0009
## Cumulative Proportion  0.975 0.9771 0.97831 0.97933 0.98031 0.98125 0.9821
##                            PC8     PC9   PC10    PC11    PC12    PC13
## Standard deviation     0.06641 0.06584 0.0642 0.06235 0.06186 0.06079
## Proportion of Variance 0.00085 0.00084 0.0008 0.00075 0.00074 0.00072
## Cumulative Proportion  0.98300 0.98384 0.9846 0.98540 0.98614 0.98685
##                          PC14    PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.0600 0.05844 0.05819 0.05737 0.05534 0.05478
## Proportion of Variance 0.0007 0.00066 0.00066 0.00064 0.00059 0.00058
## Cumulative Proportion  0.9876 0.98822 0.98887 0.98951 0.99010 0.99068
##                           PC20    PC21    PC22    PC23    PC24    PC25
## Standard deviation     0.05456 0.05330 0.05243 0.05189 0.05053 0.05032
## Proportion of Variance 0.00058 0.00055 0.00053 0.00052 0.00049 0.00049
## Cumulative Proportion  0.99126 0.99181 0.99234 0.99287 0.99336 0.99385
##                           PC26    PC27    PC28    PC29    PC30    PC31
## Standard deviation     0.04839 0.04733 0.04725 0.04654 0.04577 0.04500
## Proportion of Variance 0.00045 0.00043 0.00043 0.00042 0.00041 0.00039
## Cumulative Proportion  0.99431 0.99474 0.99517 0.99559 0.99600 0.99639
##                           PC32    PC33    PC34    PC35    PC36    PC37
## Standard deviation     0.04485 0.04323 0.04248 0.04139 0.04092 0.04020
## Proportion of Variance 0.00039 0.00036 0.00035 0.00033 0.00032 0.00031
## Cumulative Proportion  0.99678 0.99714 0.99749 0.99782 0.99815 0.99846
##                           PC38    PC39    PC40    PC41    PC42    PC43
## Standard deviation     0.03890 0.03785 0.03625 0.03583 0.03511 0.03401
## Proportion of Variance 0.00029 0.00028 0.00025 0.00025 0.00024 0.00022
## Cumulative Proportion  0.99876 0.99903 0.99929 0.99954 0.99978 1.00000
plot(fcatPCA)

plot of chunk 5cat

plot(fcatPCA$x)
identify(fcatPCA$x, labels =row.names(fctable))

plot of chunk 5cat

## integer(0)
tfcatmat<-t(fcatmat)
tfcatmatPCA<-prcomp(tfcatmat)
color=c(rep("blue",26),rep("red",16),"blue")

summary(tfcatmatPCA)
## Importance of components:
##                           PC1   PC2    PC3    PC4    PC5    PC6    PC7
## Standard deviation     0.9577 0.788 0.7412 0.7262 0.7118 0.6951 0.6784
## Proportion of Variance 0.0694 0.047 0.0416 0.0399 0.0383 0.0365 0.0348
## Cumulative Proportion  0.0694 0.116 0.1580 0.1978 0.2362 0.2727 0.3075
##                           PC8    PC9   PC10   PC11   PC12   PC13  PC14
## Standard deviation     0.6726 0.6570 0.6367 0.6321 0.6208 0.6137 0.597
## Proportion of Variance 0.0342 0.0327 0.0307 0.0302 0.0292 0.0285 0.027
## Cumulative Proportion  0.3417 0.3744 0.4051 0.4353 0.4644 0.4929 0.520
##                          PC15  PC16   PC17   PC18   PC19   PC20   PC21
## Standard deviation     0.5945 0.586 0.5655 0.5595 0.5575 0.5445 0.5353
## Proportion of Variance 0.0267 0.026 0.0242 0.0237 0.0235 0.0224 0.0217
## Cumulative Proportion  0.5467 0.573 0.5968 0.6205 0.6440 0.6664 0.6881
##                          PC22   PC23  PC24   PC25   PC26   PC27   PC28
## Standard deviation     0.5299 0.5161 0.514 0.4944 0.4835 0.4825 0.4751
## Proportion of Variance 0.0212 0.0202 0.020 0.0185 0.0177 0.0176 0.0171
## Cumulative Proportion  0.7093 0.7295 0.749 0.7679 0.7856 0.8032 0.8203
##                          PC29  PC30   PC31   PC32   PC33   PC34   PC35
## Standard deviation     0.4675 0.460 0.4580 0.4418 0.4343 0.4227 0.4178
## Proportion of Variance 0.0165 0.016 0.0159 0.0148 0.0143 0.0135 0.0132
## Cumulative Proportion  0.8368 0.853 0.8687 0.8835 0.8978 0.9113 0.9245
##                          PC36  PC37   PC38   PC39   PC40    PC41    PC42
## Standard deviation     0.4114 0.398 0.3869 0.3708 0.3664 0.35877 0.34743
## Proportion of Variance 0.0128 0.012 0.0113 0.0104 0.0101 0.00974 0.00913
## Cumulative Proportion  0.9373 0.949 0.9606 0.9710 0.9811 0.99087 1.00000
##                            PC43
## Standard deviation     5.51e-16
## Proportion of Variance 0.00e+00
## Cumulative Proportion  1.00e+00
plot(tfcatmatPCA)

plot of chunk 5cat

plot(tfcatmatPCA$x, col=color)
identify(tfcatmatPCA$x, labels=row.names(tfcatmat))

plot of chunk 5cat

## integer(0)
#biplot(tfcatmatPCA)
barplot(tfcatmatPCA$x[,1], ylab="PC1", col=color)

plot of chunk 5cat

Eu1636mat<-fcatmat[row.names(fcatmat) %in% TE1636$TE, , drop = FALSE]
Eu1636matPCA<-prcomp(Eu1636mat)
summary(Eu1636matPCA)
## Importance of components:
##                         PC1     PC2     PC3     PC4     PC5    PC6     PC7
## Standard deviation     3.07 0.14036 0.12264 0.11556 0.11201 0.1079 0.10655
## Proportion of Variance 0.97 0.00203 0.00155 0.00138 0.00129 0.0012 0.00117
## Cumulative Proportion  0.97 0.97235 0.97390 0.97528 0.97657 0.9778 0.97895
##                            PC8    PC9    PC10    PC11    PC12    PC13
## Standard deviation     0.10446 0.0985 0.09732 0.09711 0.09473 0.09368
## Proportion of Variance 0.00113 0.0010 0.00098 0.00097 0.00093 0.00091
## Cumulative Proportion  0.98007 0.9811 0.98205 0.98302 0.98395 0.98485
##                           PC14    PC15    PC16    PC17    PC18   PC19
## Standard deviation     0.09126 0.09006 0.08890 0.08517 0.08405 0.0822
## Proportion of Variance 0.00086 0.00084 0.00082 0.00075 0.00073 0.0007
## Cumulative Proportion  0.98571 0.98655 0.98736 0.98811 0.98884 0.9895
##                           PC20    PC21    PC22    PC23    PC24    PC25
## Standard deviation     0.08068 0.07978 0.07785 0.07575 0.07558 0.07445
## Proportion of Variance 0.00067 0.00066 0.00063 0.00059 0.00059 0.00057
## Cumulative Proportion  0.99021 0.99086 0.99149 0.99208 0.99267 0.99324
##                           PC26    PC27    PC28    PC29    PC30    PC31
## Standard deviation     0.07387 0.07273 0.06909 0.06728 0.06594 0.06412
## Proportion of Variance 0.00056 0.00055 0.00049 0.00047 0.00045 0.00042
## Cumulative Proportion  0.99380 0.99435 0.99484 0.99531 0.99576 0.99618
##                           PC32   PC33    PC34    PC35    PC36    PC37
## Standard deviation     0.06316 0.0623 0.06150 0.05887 0.05765 0.05722
## Proportion of Variance 0.00041 0.0004 0.00039 0.00036 0.00034 0.00034
## Cumulative Proportion  0.99659 0.9970 0.99738 0.99774 0.99808 0.99842
##                           PC38    PC39    PC40    PC41    PC42    PC43
## Standard deviation     0.05451 0.05289 0.05218 0.04991 0.04703 0.04604
## Proportion of Variance 0.00031 0.00029 0.00028 0.00026 0.00023 0.00022
## Cumulative Proportion  0.99873 0.99902 0.99930 0.99955 0.99978 1.00000
plot(Eu1636matPCA)

plot of chunk Eu1636

plot(Eu1636matPCA$x)

plot of chunk Eu1636

#identify(Eu1636matPCA$x, labels =row.names(fctable))

tEu1636mat<-t(Eu1636mat)
tEu1636matPCA<-prcomp(tEu1636mat)
color=c(rep("blue",26),rep("red",16),"blue")

summary(tEu1636matPCA)
## Importance of components:
##                           PC1    PC2    PC3    PC4    PC5    PC6    PC7
## Standard deviation     0.8065 0.7036 0.6625 0.6418 0.6183 0.6106 0.5987
## Proportion of Variance 0.0687 0.0523 0.0464 0.0435 0.0404 0.0394 0.0379
## Cumulative Proportion  0.0687 0.1210 0.1673 0.2109 0.2512 0.2906 0.3285
##                           PC8    PC9   PC10   PC11   PC12   PC13   PC14
## Standard deviation     0.5648 0.5608 0.5565 0.5441 0.5368 0.5231 0.5164
## Proportion of Variance 0.0337 0.0332 0.0327 0.0313 0.0304 0.0289 0.0282
## Cumulative Proportion  0.3622 0.3954 0.4281 0.4593 0.4898 0.5187 0.5468
##                          PC15   PC16   PC17   PC18   PC19   PC20  PC21
## Standard deviation     0.5095 0.4881 0.4816 0.4711 0.4624 0.4580 0.446
## Proportion of Variance 0.0274 0.0252 0.0245 0.0234 0.0226 0.0222 0.021
## Cumulative Proportion  0.5743 0.5994 0.6239 0.6474 0.6699 0.6921 0.713
##                         PC22   PC23   PC24   PC25   PC26   PC27   PC28
## Standard deviation     0.435 0.4332 0.4267 0.4233 0.4173 0.3960 0.3855
## Proportion of Variance 0.020 0.0198 0.0192 0.0189 0.0184 0.0166 0.0157
## Cumulative Proportion  0.733 0.7530 0.7722 0.7911 0.8095 0.8261 0.8418
##                          PC29   PC30   PC31   PC32   PC33   PC34   PC35
## Standard deviation     0.3782 0.3681 0.3620 0.3575 0.3525 0.3392 0.3305
## Proportion of Variance 0.0151 0.0143 0.0138 0.0135 0.0131 0.0121 0.0115
## Cumulative Proportion  0.8569 0.8712 0.8850 0.8985 0.9116 0.9238 0.9353
##                          PC36   PC37    PC38    PC39    PC40    PC41
## Standard deviation     0.3284 0.3125 0.30331 0.29906 0.28636 0.27136
## Proportion of Variance 0.0114 0.0103 0.00972 0.00945 0.00866 0.00778
## Cumulative Proportion  0.9467 0.9570 0.96676 0.97621 0.98487 0.99265
##                           PC42     PC43
## Standard deviation     0.26387 5.95e-16
## Proportion of Variance 0.00735 0.00e+00
## Cumulative Proportion  1.00000 1.00e+00
plot(tEu1636matPCA)

plot of chunk Eu1636

plot(tEu1636matPCA$x, col=color)

plot of chunk Eu1636

#identify(tfcatmatPCA$x, labels=row.names(tfcatmat))
#biplot(tfcatmatPCA)
barplot(tEu1636matPCA$x[,1], ylab="PC1", col=color)

plot of chunk Eu1636