library(rentrez)
sea <- entrez_search(db="nuccore", term="Lithocarpus hongiaoensis[Organism] AND matK[Gene]")
sea
## Entrez search result with 1 hits (object contains 1 IDs and no web_history object)
## Search term (as translated): "Lithocarpus hongiaoensis"[Organism] AND matK[Gene ...
sp1.fasta <- entrez_fetch(db="nuccore", sea$ids, rettype="fasta")
sp1.fasta
## [1] ">LC318542.1 Lithocarpus sp. NVN-2017 chloroplast matK gene for maturase K, partial cds, specimen_voucher: FU<JPN_:V3235\nAAAGATGCTTCTTATTTGCATTTATTGCGGTTCTTTCTTCATGAGTATTCTAATTGTAACAGTCTTATTA\nTTACAAATAAATCTATTTCCATTTTTTCAAAAAGTAATCCGAGATTCTTTTTATTCCTATATAATTCTTA\nTATATGTGAATACGAATCCATCTTCCTTTTTCTCCGTAACCAATCTTCTCATTTACGATTAACATCTTCT\nGGAGTCCTTTTTGAACGACTCTGTTTATATAGAAAAATAGAACATTTTGCCGAAGTCTTTGCTAATGATT\nTTCCGGTCATCCCATGCTTTCTCAAGGATCCTTTCATGCATTATGTTAGATATCAAGGAAAATCAATTCT\nGGCTTCCAAAGACACACCTCTTCTAATGAATAAATGGAAATCTTACCTTGTCAATATATGGCAATGTCAT\nTTTGATGTATGGTCTCACGCGGCAAGTATCCGTATAAACCAATTATCCAAGCATTCCCTCGATTTTTTGA\nGTTACTTTTCAAGTGTTCGACGAAATCCTGCAGTGGTGCGGAATCAAATGCTAGAAAGTTCATTTCTACT\nAAATAATGCTCCCAATAAACTCGATACAATAGTTCCAATTATTCCTCTGATTGGATCATTGGCTAAAGCG\nAAATTTTGTAACGCAGTAGGGCATCCAATTAGTAAGCTGACT\n\n"
library(readr)
df <- "https://raw.githubusercontent.com/ngocdlu/data_analysis/main/data_1_(leaf).txt"
df <- read_delim(df)
## Rows: 138 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): species
## dbl (5): len, wid, rat, cir, pet
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(df)
av <- aov(pet~species)
t <- TukeyHSD(av)
plot(t)
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
data(iris)
attach(iris)
ggscatter(iris, x = "Petal.Length", y = "Petal.Width", add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson", xlab = "Petal Length", ylab = "Petal width")
## `geom_smooth()` using formula 'y ~ x'
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
ggbetweenstats(iris, x = Species, y = Sepal.Width, plot.type = "box", title = "Biểu đồ hộp so sánh chiều rộng lá đài 3 loài hoa Iris", xlab = "Loài hoa Iris", ylab = "Chiều rộng đài hoa (Cm)")
library("FactoMineR")
library("factoextra")
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
df2 <- "https://raw.githubusercontent.com/ngocdlu/data_analysis/main/bidoupensis.csv"
df2 <- read.csv(df2)
df3 <- df2[,1:5]
attach(df3)
## The following objects are masked from df:
##
## cir, len, pet, rat, wid
pca <- PCA(df3, graph = FALSE)
print(pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 65 individuals, described by 5 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
eig.val <- get_eigenvalue(pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.974304748 59.4860950 59.48609
## Dim.2 1.661036931 33.2207386 92.70683
## Dim.3 0.299409050 5.9881810 98.69501
## Dim.4 0.058451339 1.1690268 99.86404
## Dim.5 0.006797931 0.1359586 100.00000
fviz_eig(pca, addlabels = TRUE, ylim = c(0, 100))
fviz_pca_ind(pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = df2$species, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
Hết