This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.13.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
library(ggplot2)
library(ggpubr)
setwd("/Users/mansiavunoori/Documents/FinalProject")
getwd()
## [1] "/Users/mansiavunoori/Documents/FinalProject"
list.files(pattern = "vcf")
## [1] "mansi_snps.vcf.gz" "vcf_num_df.csv" "vcf_num_df2"
## [4] "vcf_num.csv" "vcf_scaled.csv"
my_vcf <- "mansi_snps.vcf.gz"
vcf <- vcfR::read.vcfR(my_vcf, convertNA = T)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 130
## header_line: 131
## variant count: 6956
## column count: 2513
##
Meta line 130 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 6956
## Character matrix gt cols: 2513
## skip: 0
## nrows: 6956
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant: 6956
## All variants processed
vcf_num <- vcfR::extract.gt(vcf, element = "GT", IDtoRowNames = F, as.numeric = T, convertNA = T)
write.csv(vcf_num, file = "vcf_num.csv", row.names = F)
list.files()
## [1] "1000genomes_people_info2-1.csv" "Final Project Work Flow_Mansi.Rmd"
## [3] "final_report_Mansi.Rmd" "Final-Project-Work-Flow_Mansi.html"
## [5] "Final-Project-Work-Flow_Mansi.Rmd" "mansi_snps.vcf.gz"
## [7] "rsconnect" "tester"
## [9] "vcf_num_df.csv" "vcf_num_df2"
## [11] "vcf_num.csv" "vcf_scaled.csv"
vcf_num_t <- t(vcf_num)
vcf_num_df <- data.frame(vcf_num_t)
sample <- row.names(vcf_num_df)
vcf_num_df <- data.frame(sample, vcf_num_df)
getwd()
## [1] "/Users/mansiavunoori/Documents/FinalProject"
write.csv(vcf_num_df, file = "vcf_num_df.csv", row.names = F)
list.files()
## [1] "1000genomes_people_info2-1.csv" "Final Project Work Flow_Mansi.Rmd"
## [3] "final_report_Mansi.Rmd" "Final-Project-Work-Flow_Mansi.html"
## [5] "Final-Project-Work-Flow_Mansi.Rmd" "mansi_snps.vcf.gz"
## [7] "rsconnect" "tester"
## [9] "vcf_num_df.csv" "vcf_num_df2"
## [11] "vcf_num.csv" "vcf_scaled.csv"
pop_meta <- read.csv(file = "1000genomes_people_info2-1.csv")
names(pop_meta)
## [1] "pop" "super_pop" "sample" "sex" "lat" "lng"
vcf_num_df2 <- merge(pop_meta, vcf_num_df, by = "sample")
nrow(vcf_num_df2) == nrow(vcf_num_df)
## [1] TRUE
names(vcf_num_df2)[1:15]
## [1] "sample" "pop" "super_pop" "sex" "lat" "lng"
## [7] "X1" "X2" "X3" "X4" "X5" "X6"
## [13] "X7" "X8" "X9"
getwd()
## [1] "/Users/mansiavunoori/Documents/FinalProject"
write.csv(vcf_num_df2, file = "vcf_num_df2", row.names = F)
list.files()
## [1] "1000genomes_people_info2-1.csv" "Final Project Work Flow_Mansi.Rmd"
## [3] "final_report_Mansi.Rmd" "Final-Project-Work-Flow_Mansi.html"
## [5] "Final-Project-Work-Flow_Mansi.Rmd" "mansi_snps.vcf.gz"
## [7] "rsconnect" "tester"
## [9] "vcf_num_df.csv" "vcf_num_df2"
## [11] "vcf_num.csv" "vcf_scaled.csv"
invar_omit <- function(x){
cat("Dataframe of dim", dim(x), "processed...\n")
sds <- apply(x, 2, sd, na.rm = TRUE)
i_var0 <- which(sds == 0)
cat(length(i_var0), "columns removed\n")
if (length(i_var0) > 0) {
x <- x[, -i_var0]
}
return(x)
}
names(vcf_num_df2)[1:10]
## [1] "sample" "pop" "super_pop" "sex" "lat" "lng"
## [7] "X1" "X2" "X3" "X4"
vcf_noinvar <- vcf_num_df2
vcf_noinvar[, -c(1:6)] <- invar_omit(vcf_noinvar[, -c(1:6)])
## Dataframe of dim 2504 6956 processed...
## 1780 columns removed
my_meta_N_invar_cols <- 1780
find_NAs <- function(x){
NAs_TF <- is.na(x)
i_NA <- which(NAs_TF == TRUE)
N_NA <- length(i_NA)
return(i_NA)
}
N_rows <- nrow(vcf_noinvar)
N_NA <- rep(x = 0, times = N_rows)
N_SNPs <- ncol(vcf_noinvar)
cat("This may take a minute...")
## This may take a minute...
for(i in 1:N_rows){
i_NA <- find_NAs(vcf_noinvar[i,])
N_NA_i <- length(i_NA)
N_NA[i] <- N_NA_i
}
cutoff50 <- N_SNPs*0.5
percent_NA <- N_NA/N_SNPs*100
any(percent_NA>50)
## [1] FALSE
mean(percent_NA)
## [1] 0.002455135
my_meta_N_meanNA_rows <- mean(percent_NA)
mean_imputation <- function(df){
cat("This may take some time...")
n_cols <- ncol(df)
for(i in 1:n_cols){
column_i <- df[,i]
mean_i <- mean(column_i, na.rm = TRUE)
NAs_i <- which(is.na(column_i))
N_NAs <- length(NAs_i)
column_i[NAs_i] <- mean_i
df[,i] <- column_i
}
return(df)
}
names(vcf_noinvar)[1:10]
## [1] "sample" "pop" "super_pop" "sex" "lat" "lng"
## [7] "X1" "X2" "X3" "X4"
vcf_noNA <- vcf_noinvar
vcf_noNA[, -c(1:6)] <- mean_imputation(vcf_noinvar[,-c(1:6)])
## This may take some time...
vcf_scaled <- vcf_noNA
vcf_scaled[, -c(1:6)] <- scale(vcf_noNA[, -c(1:6)])
write.csv(vcf_scaled, file = "vcf_scaled.csv")
vcf_pca <- prcomp(vcf_scaled[, -c(1:6)])
screeplot(vcf_pca)
PCA_variation <- function(pca_summary, PCs = 2){
var_explained <- pca_summary$importance[2,1:PCs]*100
var_explained <- round(var_explained, 3)
return(var_explained)
}
vcf_pca_summary <- summary(vcf_pca)
var_out <- PCA_variation(vcf_pca_summary, PCs = 1500)
var_out
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
## 3.139 2.874 2.141 1.744 1.579 1.423 1.372 1.253 1.228 1.150 1.034
## PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1.004 0.926 0.845 0.826 0.719 0.666 0.662 0.619 0.589 0.558 0.523
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32 PC33
## 0.504 0.495 0.483 0.435 0.415 0.413 0.397 0.387 0.368 0.348 0.343
## PC34 PC35 PC36 PC37 PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 0.332 0.329 0.312 0.311 0.304 0.297 0.294 0.289 0.284 0.276 0.265
## PC45 PC46 PC47 PC48 PC49 PC50 PC51 PC52 PC53 PC54 PC55
## 0.264 0.254 0.249 0.244 0.241 0.238 0.230 0.227 0.222 0.220 0.218
## PC56 PC57 PC58 PC59 PC60 PC61 PC62 PC63 PC64 PC65 PC66
## 0.214 0.210 0.209 0.205 0.202 0.198 0.197 0.196 0.191 0.191 0.188
## PC67 PC68 PC69 PC70 PC71 PC72 PC73 PC74 PC75 PC76 PC77
## 0.188 0.187 0.185 0.184 0.181 0.178 0.174 0.171 0.170 0.167 0.164
## PC78 PC79 PC80 PC81 PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 0.163 0.162 0.159 0.159 0.158 0.156 0.155 0.154 0.152 0.151 0.150
## PC89 PC90 PC91 PC92 PC93 PC94 PC95 PC96 PC97 PC98 PC99
## 0.148 0.146 0.144 0.144 0.143 0.142 0.142 0.141 0.140 0.139 0.139
## PC100 PC101 PC102 PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 0.138 0.137 0.136 0.136 0.135 0.134 0.133 0.132 0.132 0.132 0.131
## PC111 PC112 PC113 PC114 PC115 PC116 PC117 PC118 PC119 PC120 PC121
## 0.131 0.130 0.129 0.128 0.127 0.126 0.126 0.125 0.123 0.123 0.122
## PC122 PC123 PC124 PC125 PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 0.121 0.121 0.120 0.119 0.119 0.119 0.117 0.117 0.116 0.115 0.115
## PC133 PC134 PC135 PC136 PC137 PC138 PC139 PC140 PC141 PC142 PC143
## 0.115 0.114 0.114 0.113 0.113 0.112 0.112 0.112 0.112 0.111 0.110
## PC144 PC145 PC146 PC147 PC148 PC149 PC150 PC151 PC152 PC153 PC154
## 0.110 0.109 0.109 0.109 0.108 0.108 0.107 0.107 0.106 0.106 0.106
## PC155 PC156 PC157 PC158 PC159 PC160 PC161 PC162 PC163 PC164 PC165
## 0.105 0.105 0.104 0.104 0.103 0.103 0.102 0.102 0.102 0.101 0.101
## PC166 PC167 PC168 PC169 PC170 PC171 PC172 PC173 PC174 PC175 PC176
## 0.100 0.100 0.100 0.100 0.099 0.099 0.098 0.098 0.097 0.097 0.097
## PC177 PC178 PC179 PC180 PC181 PC182 PC183 PC184 PC185 PC186 PC187
## 0.096 0.096 0.095 0.095 0.095 0.095 0.094 0.094 0.094 0.093 0.093
## PC188 PC189 PC190 PC191 PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 0.093 0.093 0.092 0.092 0.092 0.091 0.090 0.090 0.090 0.090 0.089
## PC199 PC200 PC201 PC202 PC203 PC204 PC205 PC206 PC207 PC208 PC209
## 0.089 0.089 0.089 0.089 0.088 0.088 0.088 0.087 0.087 0.087 0.087
## PC210 PC211 PC212 PC213 PC214 PC215 PC216 PC217 PC218 PC219 PC220
## 0.087 0.086 0.086 0.086 0.086 0.085 0.085 0.085 0.084 0.084 0.084
## PC221 PC222 PC223 PC224 PC225 PC226 PC227 PC228 PC229 PC230 PC231
## 0.083 0.083 0.083 0.083 0.083 0.083 0.082 0.082 0.082 0.082 0.082
## PC232 PC233 PC234 PC235 PC236 PC237 PC238 PC239 PC240 PC241 PC242
## 0.081 0.081 0.081 0.080 0.080 0.080 0.080 0.080 0.079 0.079 0.079
## PC243 PC244 PC245 PC246 PC247 PC248 PC249 PC250 PC251 PC252 PC253
## 0.079 0.079 0.078 0.078 0.078 0.078 0.078 0.078 0.077 0.077 0.076
## PC254 PC255 PC256 PC257 PC258 PC259 PC260 PC261 PC262 PC263 PC264
## 0.076 0.076 0.076 0.076 0.075 0.075 0.075 0.075 0.074 0.074 0.074
## PC265 PC266 PC267 PC268 PC269 PC270 PC271 PC272 PC273 PC274 PC275
## 0.074 0.074 0.074 0.074 0.073 0.073 0.073 0.073 0.073 0.073 0.072
## PC276 PC277 PC278 PC279 PC280 PC281 PC282 PC283 PC284 PC285 PC286
## 0.072 0.072 0.072 0.072 0.071 0.071 0.071 0.070 0.070 0.070 0.070
## PC287 PC288 PC289 PC290 PC291 PC292 PC293 PC294 PC295 PC296 PC297
## 0.069 0.069 0.069 0.069 0.069 0.069 0.068 0.068 0.068 0.068 0.068
## PC298 PC299 PC300 PC301 PC302 PC303 PC304 PC305 PC306 PC307 PC308
## 0.067 0.067 0.067 0.067 0.067 0.067 0.067 0.066 0.066 0.066 0.066
## PC309 PC310 PC311 PC312 PC313 PC314 PC315 PC316 PC317 PC318 PC319
## 0.066 0.066 0.065 0.065 0.065 0.065 0.065 0.065 0.065 0.064 0.064
## PC320 PC321 PC322 PC323 PC324 PC325 PC326 PC327 PC328 PC329 PC330
## 0.064 0.064 0.064 0.064 0.063 0.063 0.063 0.063 0.063 0.063 0.063
## PC331 PC332 PC333 PC334 PC335 PC336 PC337 PC338 PC339 PC340 PC341
## 0.062 0.062 0.062 0.062 0.062 0.062 0.062 0.061 0.061 0.061 0.061
## PC342 PC343 PC344 PC345 PC346 PC347 PC348 PC349 PC350 PC351 PC352
## 0.061 0.061 0.061 0.061 0.061 0.061 0.061 0.060 0.060 0.060 0.060
## PC353 PC354 PC355 PC356 PC357 PC358 PC359 PC360 PC361 PC362 PC363
## 0.060 0.060 0.060 0.059 0.059 0.059 0.059 0.059 0.059 0.059 0.059
## PC364 PC365 PC366 PC367 PC368 PC369 PC370 PC371 PC372 PC373 PC374
## 0.059 0.058 0.058 0.058 0.058 0.058 0.058 0.058 0.058 0.058 0.058
## PC375 PC376 PC377 PC378 PC379 PC380 PC381 PC382 PC383 PC384 PC385
## 0.058 0.058 0.058 0.058 0.058 0.057 0.057 0.057 0.057 0.057 0.057
## PC386 PC387 PC388 PC389 PC390 PC391 PC392 PC393 PC394 PC395 PC396
## 0.057 0.057 0.057 0.057 0.057 0.057 0.056 0.056 0.056 0.056 0.056
## PC397 PC398 PC399 PC400 PC401 PC402 PC403 PC404 PC405 PC406 PC407
## 0.056 0.056 0.056 0.056 0.056 0.055 0.055 0.055 0.055 0.055 0.055
## PC408 PC409 PC410 PC411 PC412 PC413 PC414 PC415 PC416 PC417 PC418
## 0.055 0.055 0.054 0.054 0.054 0.054 0.054 0.054 0.053 0.053 0.053
## PC419 PC420 PC421 PC422 PC423 PC424 PC425 PC426 PC427 PC428 PC429
## 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.052 0.052 0.052
## PC430 PC431 PC432 PC433 PC434 PC435 PC436 PC437 PC438 PC439 PC440
## 0.052 0.052 0.052 0.052 0.052 0.052 0.052 0.051 0.051 0.051 0.051
## PC441 PC442 PC443 PC444 PC445 PC446 PC447 PC448 PC449 PC450 PC451
## 0.051 0.051 0.051 0.051 0.051 0.051 0.050 0.050 0.050 0.050 0.050
## PC452 PC453 PC454 PC455 PC456 PC457 PC458 PC459 PC460 PC461 PC462
## 0.050 0.050 0.050 0.050 0.049 0.049 0.049 0.049 0.049 0.049 0.049
## PC463 PC464 PC465 PC466 PC467 PC468 PC469 PC470 PC471 PC472 PC473
## 0.049 0.049 0.049 0.048 0.048 0.048 0.048 0.048 0.048 0.048 0.048
## PC474 PC475 PC476 PC477 PC478 PC479 PC480 PC481 PC482 PC483 PC484
## 0.048 0.048 0.048 0.047 0.047 0.047 0.047 0.047 0.047 0.047 0.047
## PC485 PC486 PC487 PC488 PC489 PC490 PC491 PC492 PC493 PC494 PC495
## 0.047 0.047 0.047 0.047 0.047 0.046 0.046 0.046 0.046 0.046 0.046
## PC496 PC497 PC498 PC499 PC500 PC501 PC502 PC503 PC504 PC505 PC506
## 0.046 0.046 0.046 0.046 0.046 0.046 0.046 0.046 0.045 0.045 0.045
## PC507 PC508 PC509 PC510 PC511 PC512 PC513 PC514 PC515 PC516 PC517
## 0.045 0.045 0.045 0.045 0.045 0.045 0.045 0.045 0.045 0.045 0.045
## PC518 PC519 PC520 PC521 PC522 PC523 PC524 PC525 PC526 PC527 PC528
## 0.045 0.045 0.045 0.044 0.044 0.044 0.044 0.044 0.044 0.044 0.044
## PC529 PC530 PC531 PC532 PC533 PC534 PC535 PC536 PC537 PC538 PC539
## 0.044 0.044 0.044 0.044 0.044 0.044 0.044 0.044 0.044 0.044 0.044
## PC540 PC541 PC542 PC543 PC544 PC545 PC546 PC547 PC548 PC549 PC550
## 0.044 0.044 0.044 0.043 0.043 0.043 0.043 0.043 0.043 0.043 0.043
## PC551 PC552 PC553 PC554 PC555 PC556 PC557 PC558 PC559 PC560 PC561
## 0.043 0.043 0.043 0.043 0.043 0.043 0.043 0.042 0.042 0.042 0.042
## PC562 PC563 PC564 PC565 PC566 PC567 PC568 PC569 PC570 PC571 PC572
## 0.042 0.042 0.042 0.042 0.042 0.042 0.042 0.042 0.042 0.042 0.042
## PC573 PC574 PC575 PC576 PC577 PC578 PC579 PC580 PC581 PC582 PC583
## 0.041 0.041 0.041 0.041 0.041 0.041 0.041 0.041 0.041 0.041 0.041
## PC584 PC585 PC586 PC587 PC588 PC589 PC590 PC591 PC592 PC593 PC594
## 0.041 0.041 0.041 0.040 0.040 0.040 0.040 0.040 0.040 0.040 0.040
## PC595 PC596 PC597 PC598 PC599 PC600 PC601 PC602 PC603 PC604 PC605
## 0.040 0.040 0.040 0.040 0.039 0.039 0.039 0.039 0.039 0.039 0.039
## PC606 PC607 PC608 PC609 PC610 PC611 PC612 PC613 PC614 PC615 PC616
## 0.039 0.039 0.039 0.039 0.039 0.039 0.039 0.038 0.038 0.038 0.038
## PC617 PC618 PC619 PC620 PC621 PC622 PC623 PC624 PC625 PC626 PC627
## 0.038 0.038 0.038 0.038 0.038 0.038 0.038 0.038 0.038 0.038 0.038
## PC628 PC629 PC630 PC631 PC632 PC633 PC634 PC635 PC636 PC637 PC638
## 0.038 0.038 0.038 0.037 0.037 0.037 0.037 0.037 0.037 0.037 0.037
## PC639 PC640 PC641 PC642 PC643 PC644 PC645 PC646 PC647 PC648 PC649
## 0.037 0.037 0.037 0.037 0.037 0.037 0.037 0.037 0.036 0.036 0.036
## PC650 PC651 PC652 PC653 PC654 PC655 PC656 PC657 PC658 PC659 PC660
## 0.036 0.036 0.036 0.036 0.036 0.036 0.036 0.036 0.036 0.036 0.036
## PC661 PC662 PC663 PC664 PC665 PC666 PC667 PC668 PC669 PC670 PC671
## 0.035 0.035 0.035 0.035 0.035 0.035 0.035 0.035 0.035 0.035 0.035
## PC672 PC673 PC674 PC675 PC676 PC677 PC678 PC679 PC680 PC681 PC682
## 0.035 0.035 0.035 0.035 0.035 0.034 0.034 0.034 0.034 0.034 0.034
## PC683 PC684 PC685 PC686 PC687 PC688 PC689 PC690 PC691 PC692 PC693
## 0.034 0.034 0.034 0.034 0.034 0.034 0.034 0.034 0.034 0.034 0.034
## PC694 PC695 PC696 PC697 PC698 PC699 PC700 PC701 PC702 PC703 PC704
## 0.034 0.034 0.034 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.033
## PC705 PC706 PC707 PC708 PC709 PC710 PC711 PC712 PC713 PC714 PC715
## 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.033 0.032
## PC716 PC717 PC718 PC719 PC720 PC721 PC722 PC723 PC724 PC725 PC726
## 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032
## PC727 PC728 PC729 PC730 PC731 PC732 PC733 PC734 PC735 PC736 PC737
## 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032 0.032
## PC738 PC739 PC740 PC741 PC742 PC743 PC744 PC745 PC746 PC747 PC748
## 0.032 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031
## PC749 PC750 PC751 PC752 PC753 PC754 PC755 PC756 PC757 PC758 PC759
## 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.031
## PC760 PC761 PC762 PC763 PC764 PC765 PC766 PC767 PC768 PC769 PC770
## 0.031 0.031 0.031 0.031 0.031 0.031 0.031 0.030 0.030 0.030 0.030
## PC771 PC772 PC773 PC774 PC775 PC776 PC777 PC778 PC779 PC780 PC781
## 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030
## PC782 PC783 PC784 PC785 PC786 PC787 PC788 PC789 PC790 PC791 PC792
## 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030
## PC793 PC794 PC795 PC796 PC797 PC798 PC799 PC800 PC801 PC802 PC803
## 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.030 0.029
## PC804 PC805 PC806 PC807 PC808 PC809 PC810 PC811 PC812 PC813 PC814
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC815 PC816 PC817 PC818 PC819 PC820 PC821 PC822 PC823 PC824 PC825
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC826 PC827 PC828 PC829 PC830 PC831 PC832 PC833 PC834 PC835 PC836
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC837 PC838 PC839 PC840 PC841 PC842 PC843 PC844 PC845 PC846 PC847
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC848 PC849 PC850 PC851 PC852 PC853 PC854 PC855 PC856 PC857 PC858
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC859 PC860 PC861 PC862 PC863 PC864 PC865 PC866 PC867 PC868 PC869
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.029
## PC870 PC871 PC872 PC873 PC874 PC875 PC876 PC877 PC878 PC879 PC880
## 0.029 0.029 0.029 0.029 0.029 0.029 0.029 0.028 0.028 0.028 0.028
## PC881 PC882 PC883 PC884 PC885 PC886 PC887 PC888 PC889 PC890 PC891
## 0.028 0.028 0.028 0.028 0.028 0.028 0.028 0.028 0.027 0.027 0.027
## PC892 PC893 PC894 PC895 PC896 PC897 PC898 PC899 PC900 PC901 PC902
## 0.027 0.027 0.027 0.027 0.027 0.027 0.027 0.027 0.027 0.027 0.027
## PC903 PC904 PC905 PC906 PC907 PC908 PC909 PC910 PC911 PC912 PC913
## 0.027 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026
## PC914 PC915 PC916 PC917 PC918 PC919 PC920 PC921 PC922 PC923 PC924
## 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.026 0.025
## PC925 PC926 PC927 PC928 PC929 PC930 PC931 PC932 PC933 PC934 PC935
## 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025
## PC936 PC937 PC938 PC939 PC940 PC941 PC942 PC943 PC944 PC945 PC946
## 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.025 0.024 0.024
## PC947 PC948 PC949 PC950 PC951 PC952 PC953 PC954 PC955 PC956 PC957
## 0.024 0.024 0.024 0.024 0.024 0.024 0.024 0.024 0.024 0.024 0.024
## PC958 PC959 PC960 PC961 PC962 PC963 PC964 PC965 PC966 PC967 PC968
## 0.024 0.024 0.024 0.024 0.024 0.024 0.024 0.023 0.023 0.023 0.023
## PC969 PC970 PC971 PC972 PC973 PC974 PC975 PC976 PC977 PC978 PC979
## 0.023 0.023 0.023 0.023 0.023 0.023 0.023 0.023 0.023 0.023 0.023
## PC980 PC981 PC982 PC983 PC984 PC985 PC986 PC987 PC988 PC989 PC990
## 0.023 0.023 0.023 0.023 0.023 0.023 0.022 0.022 0.022 0.022 0.022
## PC991 PC992 PC993 PC994 PC995 PC996 PC997 PC998 PC999 PC1000 PC1001
## 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022 0.022
## PC1002 PC1003 PC1004 PC1005 PC1006 PC1007 PC1008 PC1009 PC1010 PC1011 PC1012
## 0.022 0.022 0.022 0.022 0.022 0.022 0.021 0.021 0.021 0.021 0.021
## PC1013 PC1014 PC1015 PC1016 PC1017 PC1018 PC1019 PC1020 PC1021 PC1022 PC1023
## 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021
## PC1024 PC1025 PC1026 PC1027 PC1028 PC1029 PC1030 PC1031 PC1032 PC1033 PC1034
## 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021 0.021
## PC1035 PC1036 PC1037 PC1038 PC1039 PC1040 PC1041 PC1042 PC1043 PC1044 PC1045
## 0.021 0.021 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020
## PC1046 PC1047 PC1048 PC1049 PC1050 PC1051 PC1052 PC1053 PC1054 PC1055 PC1056
## 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020 0.020
## PC1057 PC1058 PC1059 PC1060 PC1061 PC1062 PC1063 PC1064 PC1065 PC1066 PC1067
## 0.020 0.020 0.020 0.020 0.020 0.019 0.019 0.019 0.019 0.019 0.019
## PC1068 PC1069 PC1070 PC1071 PC1072 PC1073 PC1074 PC1075 PC1076 PC1077 PC1078
## 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019
## PC1079 PC1080 PC1081 PC1082 PC1083 PC1084 PC1085 PC1086 PC1087 PC1088 PC1089
## 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019 0.019
## PC1090 PC1091 PC1092 PC1093 PC1094 PC1095 PC1096 PC1097 PC1098 PC1099 PC1100
## 0.019 0.019 0.019 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018
## PC1101 PC1102 PC1103 PC1104 PC1105 PC1106 PC1107 PC1108 PC1109 PC1110 PC1111
## 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018
## PC1112 PC1113 PC1114 PC1115 PC1116 PC1117 PC1118 PC1119 PC1120 PC1121 PC1122
## 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.018 0.017
## PC1123 PC1124 PC1125 PC1126 PC1127 PC1128 PC1129 PC1130 PC1131 PC1132 PC1133
## 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017
## PC1134 PC1135 PC1136 PC1137 PC1138 PC1139 PC1140 PC1141 PC1142 PC1143 PC1144
## 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017
## PC1145 PC1146 PC1147 PC1148 PC1149 PC1150 PC1151 PC1152 PC1153 PC1154 PC1155
## 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017
## PC1156 PC1157 PC1158 PC1159 PC1160 PC1161 PC1162 PC1163 PC1164 PC1165 PC1166
## 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
## PC1167 PC1168 PC1169 PC1170 PC1171 PC1172 PC1173 PC1174 PC1175 PC1176 PC1177
## 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
## PC1178 PC1179 PC1180 PC1181 PC1182 PC1183 PC1184 PC1185 PC1186 PC1187 PC1188
## 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
## PC1189 PC1190 PC1191 PC1192 PC1193 PC1194 PC1195 PC1196 PC1197 PC1198 PC1199
## 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016 0.016
## PC1200 PC1201 PC1202 PC1203 PC1204 PC1205 PC1206 PC1207 PC1208 PC1209 PC1210
## 0.016 0.016 0.016 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1211 PC1212 PC1213 PC1214 PC1215 PC1216 PC1217 PC1218 PC1219 PC1220 PC1221
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1222 PC1223 PC1224 PC1225 PC1226 PC1227 PC1228 PC1229 PC1230 PC1231 PC1232
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1233 PC1234 PC1235 PC1236 PC1237 PC1238 PC1239 PC1240 PC1241 PC1242 PC1243
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1244 PC1245 PC1246 PC1247 PC1248 PC1249 PC1250 PC1251 PC1252 PC1253 PC1254
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1255 PC1256 PC1257 PC1258 PC1259 PC1260 PC1261 PC1262 PC1263 PC1264 PC1265
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1266 PC1267 PC1268 PC1269 PC1270 PC1271 PC1272 PC1273 PC1274 PC1275 PC1276
## 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015 0.015
## PC1277 PC1278 PC1279 PC1280 PC1281 PC1282 PC1283 PC1284 PC1285 PC1286 PC1287
## 0.015 0.015 0.015 0.015 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1288 PC1289 PC1290 PC1291 PC1292 PC1293 PC1294 PC1295 PC1296 PC1297 PC1298
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1299 PC1300 PC1301 PC1302 PC1303 PC1304 PC1305 PC1306 PC1307 PC1308 PC1309
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1310 PC1311 PC1312 PC1313 PC1314 PC1315 PC1316 PC1317 PC1318 PC1319 PC1320
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1321 PC1322 PC1323 PC1324 PC1325 PC1326 PC1327 PC1328 PC1329 PC1330 PC1331
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1332 PC1333 PC1334 PC1335 PC1336 PC1337 PC1338 PC1339 PC1340 PC1341 PC1342
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1343 PC1344 PC1345 PC1346 PC1347 PC1348 PC1349 PC1350 PC1351 PC1352 PC1353
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.014
## PC1354 PC1355 PC1356 PC1357 PC1358 PC1359 PC1360 PC1361 PC1362 PC1363 PC1364
## 0.014 0.014 0.014 0.014 0.014 0.014 0.014 0.013 0.013 0.013 0.013
## PC1365 PC1366 PC1367 PC1368 PC1369 PC1370 PC1371 PC1372 PC1373 PC1374 PC1375
## 0.013 0.013 0.013 0.013 0.013 0.013 0.013 0.013 0.013 0.013 0.013
## PC1376 PC1377 PC1378 PC1379 PC1380 PC1381 PC1382 PC1383 PC1384 PC1385 PC1386
## 0.013 0.013 0.013 0.012 0.012 0.012 0.012 0.012 0.012 0.012 0.012
## PC1387 PC1388 PC1389 PC1390 PC1391 PC1392 PC1393 PC1394 PC1395 PC1396 PC1397
## 0.012 0.012 0.012 0.012 0.012 0.012 0.012 0.012 0.012 0.012 0.012
## PC1398 PC1399 PC1400 PC1401 PC1402 PC1403 PC1404 PC1405 PC1406 PC1407 PC1408
## 0.012 0.012 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011
## PC1409 PC1410 PC1411 PC1412 PC1413 PC1414 PC1415 PC1416 PC1417 PC1418 PC1419
## 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011 0.011
## PC1420 PC1421 PC1422 PC1423 PC1424 PC1425 PC1426 PC1427 PC1428 PC1429 PC1430
## 0.011 0.011 0.011 0.011 0.011 0.010 0.010 0.010 0.010 0.010 0.010
## PC1431 PC1432 PC1433 PC1434 PC1435 PC1436 PC1437 PC1438 PC1439 PC1440 PC1441
## 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010
## PC1442 PC1443 PC1444 PC1445 PC1446 PC1447 PC1448 PC1449 PC1450 PC1451 PC1452
## 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010 0.010
## PC1453 PC1454 PC1455 PC1456 PC1457 PC1458 PC1459 PC1460 PC1461 PC1462 PC1463
## 0.010 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009
## PC1464 PC1465 PC1466 PC1467 PC1468 PC1469 PC1470 PC1471 PC1472 PC1473 PC1474
## 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009
## PC1475 PC1476 PC1477 PC1478 PC1479 PC1480 PC1481 PC1482 PC1483 PC1484 PC1485
## 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.009 0.008 0.008
## PC1486 PC1487 PC1488 PC1489 PC1490 PC1491 PC1492 PC1493 PC1494 PC1495 PC1496
## 0.008 0.008 0.008 0.008 0.008 0.008 0.008 0.008 0.008 0.008 0.008
## PC1497 PC1498 PC1499 PC1500
## 0.008 0.008 0.008 0.008
N_columns <- ncol(vcf_scaled)
N_columns
## [1] 6962
cut_off <- 1/N_columns*100
cut_off
## [1] 0.01436369
i_cut_off <- which(var_out < cut_off)
i_cut_off
## PC1281 PC1282 PC1283 PC1284 PC1285 PC1286 PC1287 PC1288 PC1289 PC1290 PC1291
## 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
## PC1292 PC1293 PC1294 PC1295 PC1296 PC1297 PC1298 PC1299 PC1300 PC1301 PC1302
## 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302
## PC1303 PC1304 PC1305 PC1306 PC1307 PC1308 PC1309 PC1310 PC1311 PC1312 PC1313
## 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
## PC1314 PC1315 PC1316 PC1317 PC1318 PC1319 PC1320 PC1321 PC1322 PC1323 PC1324
## 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
## PC1325 PC1326 PC1327 PC1328 PC1329 PC1330 PC1331 PC1332 PC1333 PC1334 PC1335
## 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
## PC1336 PC1337 PC1338 PC1339 PC1340 PC1341 PC1342 PC1343 PC1344 PC1345 PC1346
## 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
## PC1347 PC1348 PC1349 PC1350 PC1351 PC1352 PC1353 PC1354 PC1355 PC1356 PC1357
## 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357
## PC1358 PC1359 PC1360 PC1361 PC1362 PC1363 PC1364 PC1365 PC1366 PC1367 PC1368
## 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
## PC1369 PC1370 PC1371 PC1372 PC1373 PC1374 PC1375 PC1376 PC1377 PC1378 PC1379
## 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
## PC1380 PC1381 PC1382 PC1383 PC1384 PC1385 PC1386 PC1387 PC1388 PC1389 PC1390
## 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390
## PC1391 PC1392 PC1393 PC1394 PC1395 PC1396 PC1397 PC1398 PC1399 PC1400 PC1401
## 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
## PC1402 PC1403 PC1404 PC1405 PC1406 PC1407 PC1408 PC1409 PC1410 PC1411 PC1412
## 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
## PC1413 PC1414 PC1415 PC1416 PC1417 PC1418 PC1419 PC1420 PC1421 PC1422 PC1423
## 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
## PC1424 PC1425 PC1426 PC1427 PC1428 PC1429 PC1430 PC1431 PC1432 PC1433 PC1434
## 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434
## PC1435 PC1436 PC1437 PC1438 PC1439 PC1440 PC1441 PC1442 PC1443 PC1444 PC1445
## 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
## PC1446 PC1447 PC1448 PC1449 PC1450 PC1451 PC1452 PC1453 PC1454 PC1455 PC1456
## 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456
## PC1457 PC1458 PC1459 PC1460 PC1461 PC1462 PC1463 PC1464 PC1465 PC1466 PC1467
## 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
## PC1468 PC1469 PC1470 PC1471 PC1472 PC1473 PC1474 PC1475 PC1476 PC1477 PC1478
## 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478
## PC1479 PC1480 PC1481 PC1482 PC1483 PC1484 PC1485 PC1486 PC1487 PC1488 PC1489
## 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
## PC1490 PC1491 PC1492 PC1493 PC1494 PC1495 PC1496 PC1497 PC1498 PC1499 PC1500
## 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
i_cut_off <- min(i_cut_off)
i_cut_off
## [1] 1281
my_meta_N_meanNA_rowsPCs <- i_cut_off
my_meta_var_PC123 <- var_out[c(1,2,3)]
my_meta_var_PC123
## PC1 PC2 PC3
## 3.139 2.874 2.141
barplot(var_out,
main = "Percent variation (%) Scree plot",
ylab = "Percent variation (%) explained",
names.arg = 1:length(var_out))
abline(h = cut_off, col = 2, lwd = 2)
abline(v = i_cut_off)
legend("topright",
col = c(2,1),
lty = c(1,1),
legend = c("Vertical line: cutoff",
"Horizontal line: 1st value below cut off"))
cumulative_variation <- cumsum(var_out)
plot(cumulative_variation, type = "l")
vcf_pca_scores <- vegan::scores(vcf_pca)
vcf_pca_scores2 <- data.frame(super_pop = vcf_noNA$super_pop, vcf_pca_scores)
my_meta_var_PC123[1]
## PC1
## 3.139
my_meta_var_PC123[2]
## PC2
## 2.874
my_meta_var_PC123[3]
## PC3
## 2.141
ggpubr::ggscatter(data = vcf_pca_scores2,
y = "PC2",
x = "PC1",
color = "super_pop",
shape = "super_pop",
main = "PCA Scatterplot",
xlab = "PC1 (3.139% of variation)",
ylab = "PC2 (2.874% of variation)")
ggpubr::ggscatter(data = vcf_pca_scores2,
y = "PC3",
x = "PC2",
color = "super_pop",
shape = "super_pop",
main = "PCA Scatterplot",
xlab = "PC2 (2.874% of variation)",
ylab = "PC3 (2.141% of variation)")
ggpubr::ggscatter(data = vcf_pca_scores2,
y = "PC3",
x = "PC1",
color = "super_pop",
shape = "super_pop",
main = "PCA Scatterplot",
xlab = "PC1 (3.139% of variation)",
ylab = "PC3 (2.141% of variation)")