CMD commands (Post BLAST installation on local pc)
mkdir test_data,cd test_data
Directories changed into the blast/bin folder where the centroids_test.txt was placed, need to change path to pull from db folder.
makeblastdb -in centroids_test.txt -parse_seqids -dbtype nucl blastn -db centroids_test.txt -outfmt "6 qseqid sseqid pident"-out match_list.txt -qcov_hsp_perc 80 -perc_identity 84 -query centroids_test.txt
these two sections of code generate a list namaed match_list.txt which is then used in R with the LULU package.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
curated_result <- lulu(otutab, matchlist, minimum_ratio_type = "min", minimum_ratio = 1, minimum_match = 84, minimum_relative_cooccurence = 0.95)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
curated_result$curated_table
curated_result$original_table
curated_result$curated_count
## [1] 459
curated_result$discarded_count
## [1] 1966
head(curated_result$discarded_otus)
## [1] "7c535c7709639b9ec025858cca671d406966a653"
## [2] "1ea168de62e8686635707db62629aae301a14b2b"
## [3] "0c2c529cbd545bc3675f3433b0160e0cb56c4b2c"
## [4] "ee7271685168ed084bfcaa5515caf4761012f260"
## [5] "2e721d0157683b7e1ab7999fc5c9d22b0a3b4397"
## [6] "57f40e612102dc39214b2670caca5d8c3f5b7897"
curated_result$runtime
## Time difference of 1.412079 mins
curated_result$minimum_match
## [1] 84
curated_result$minimum_relative_cooccurence
## [1] 0.95
head(curated_result$otu_map)
## total spread
## ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb 136715 58
## 79a49b866cf4bdc00d11eb1c7b91957ce15a0314 104908 50
## c2f02be9235142d605aaa5170f38d5a9c8a684de 98839 45
## 9b88a08f039c7bfc513e52b4369b4f05857cb1f5 171279 42
## a2e5ad0bd2a99776da541051125b0ad377f7ea6e 634469 41
## aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0 301433 36
## parent_id
## ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb
## 79a49b866cf4bdc00d11eb1c7b91957ce15a0314 79a49b866cf4bdc00d11eb1c7b91957ce15a0314
## c2f02be9235142d605aaa5170f38d5a9c8a684de c2f02be9235142d605aaa5170f38d5a9c8a684de
## 9b88a08f039c7bfc513e52b4369b4f05857cb1f5 9b88a08f039c7bfc513e52b4369b4f05857cb1f5
## a2e5ad0bd2a99776da541051125b0ad377f7ea6e a2e5ad0bd2a99776da541051125b0ad377f7ea6e
## aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0 aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0
## curated rank
## ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb parent 6
## 79a49b866cf4bdc00d11eb1c7b91957ce15a0314 parent 11
## c2f02be9235142d605aaa5170f38d5a9c8a684de parent 13
## 9b88a08f039c7bfc513e52b4369b4f05857cb1f5 parent 5
## a2e5ad0bd2a99776da541051125b0ad377f7ea6e parent 1
## aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0 parent 3
curated_result$otu_map[300:308,]
## total spread
## 709d050ce8c823a6650c74f085ff034093e3ad42 108 5
## 1dbf509b1f6bd8470354e29855459b1c0bf4d033 94 5
## 01b7e27549e043e22aebe8c215746fdbbd37a4e4 88 5
## 3eda946fc9435377e003bf85089f75ddf7972a7d 87 5
## c623dbeece5ff9df34c56decee695be51d30b5e1 86 5
## 212eae5cd8133d47b085ea861a0f6865928a9276 85 5
## 22740909902c879d2b044a0ac8ac4bbdee2a9bdf 79 5
## 3f17a0b4a4097f5348fa817a1ada92ec3ae7d37e 67 5
## 9612a49d162af29198945e1b09ddf0616da0288f 65 5
## parent_id
## 709d050ce8c823a6650c74f085ff034093e3ad42 ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb
## 1dbf509b1f6bd8470354e29855459b1c0bf4d033 0b2e099f3eebf3ef942767f4c190c4ec703bbe30
## 01b7e27549e043e22aebe8c215746fdbbd37a4e4 ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb
## 3eda946fc9435377e003bf85089f75ddf7972a7d a2e5ad0bd2a99776da541051125b0ad377f7ea6e
## c623dbeece5ff9df34c56decee695be51d30b5e1 ec84eb6504ec23a3fe659c533bf9b3f08f5bd1cb
## 212eae5cd8133d47b085ea861a0f6865928a9276 aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0
## 22740909902c879d2b044a0ac8ac4bbdee2a9bdf bd34bf9b277639657f65381c53d7715718a184c7
## 3f17a0b4a4097f5348fa817a1ada92ec3ae7d37e aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0
## 9612a49d162af29198945e1b09ddf0616da0288f aafb7fcf4cfed42eaae4141f2af712b5ca7db7f0
## curated rank
## 709d050ce8c823a6650c74f085ff034093e3ad42 merged 483
## 1dbf509b1f6bd8470354e29855459b1c0bf4d033 merged 518
## 01b7e27549e043e22aebe8c215746fdbbd37a4e4 merged 537
## 3eda946fc9435377e003bf85089f75ddf7972a7d merged 539
## c623dbeece5ff9df34c56decee695be51d30b5e1 merged 547
## 212eae5cd8133d47b085ea861a0f6865928a9276 merged 550
## 22740909902c879d2b044a0ac8ac4bbdee2a9bdf merged 563
## 3f17a0b4a4097f5348fa817a1ada92ec3ae7d37e merged 611
## 9612a49d162af29198945e1b09ddf0616da0288f merged 619
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.