library(readr)
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ purrr 0.3.4 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
df1 <- "https://raw.githubusercontent.com/ngocdlu/K44_test/master/iris.csv"
df1 <- read_csv(df1)
## Rows: 150 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): species
## dbl (4): sepal_length, sepal_width, petal_length, petal_width
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df1)
## # A tibble: 6 × 5
## sepal_length sepal_width petal_length petal_width species
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
ggplot(df1, aes(x=species, y=sepal_length, fill = species)) + geom_boxplot()
min_sepal_by_species <- df1 %>%
group_by(species) %>%
summarise(min_sepal = min(sepal_length)) %>%
as.data.frame
min_sepal_by_species
## species min_sepal
## 1 setosa 4.3
## 2 versicolor 4.9
## 3 virginica 4.9
ggplot(df1, aes(x=species, y=sepal_length, fill = species)) + geom_boxplot() + labs(title = "Biểu đồ so sánh chiều dài đài hoa", xlab = "Loài thực vật", ylab = "Chiều dài đài hoa (Cm)") + geom_point(data = min_sepal_by_species, aes( x= species, y = round(min_sepal)), col = "red") + geom_text(aes(label =round(min_sepal, 1), x= species, y = round(min_sepal, 1)), data = min_sepal_by_species, check_overlap = TRUE, vjust = -0.5)
Xong bước này lưu biểu đồ dưới dạng pdf với tên file “Plot_1”
data(iris)
attach(iris)
av <- aov(Sepal.Length~Species)
t <- TukeyHSD(av)
plot(t)
Xong lưu biểu đồ dạng pdf với tên “Plot_2”
library(ggpubr)
df4 <- "https://raw.githubusercontent.com/ngocdlu/data_analysis/main/bidoupensis.csv"
df4 <- read_csv(df4)
## Rows: 65 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): species
## dbl (5): pet, len, wid, rat, cir
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggscatter(df4, x = "len", y = "wid",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Blade Length", ylab = "Blade width")
## `geom_smooth()` using formula 'y ~ x'
*Xong ý này lưu biểu đồ tương quan với tên “Plot_3”
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
ggbetweenstats(data= df4, x = species, y = wid, plot.type = "box", title = "Biểu đồ hộp so sánh chiều rộng phiến lá", xlab = "Loài thực vật", ylab = "Chiều rộng phiến lá (Cm)")
Xong bước này lưu biểu đồ dạng pdf và lưu tên file là “Plot_4”
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
df5 <- "https://raw.githubusercontent.com/ngocdlu/data_analysis/main/bidoupensis.csv"
df5 <- read_csv(df5)
## Rows: 65 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): species
## dbl (5): pet, len, wid, rat, cir
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df6 <- df5[,1:5]
head(df6)
## # A tibble: 6 × 5
## pet len wid rat cir
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.61 11.0 5.21 2.12 0.75
## 2 0.58 11.6 5.03 2.3 0.74
## 3 0.35 9.1 3.72 2.45 0.64
## 4 0.32 9.72 4.26 2.28 0.71
## 5 0.35 8.99 4.16 2.16 0.67
## 6 0.43 10.0 4.93 2.04 0.74
attach(df6)
pca <- PCA(df6, graph = FALSE)
print(pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 65 individuals, described by 5 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
eig.val <- get_eigenvalue(pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.974304748 59.4860950 59.48609
## Dim.2 1.661036931 33.2207386 92.70683
## Dim.3 0.299409050 5.9881810 98.69501
## Dim.4 0.058451339 1.1690268 99.86404
## Dim.5 0.006797931 0.1359586 100.00000
fviz_eig(pca, addlabels = TRUE, ylim = c(0, 100))
Xong bước này lưu biểu đồ dang pdf với tên “Plot_5”
fviz_pca_ind(pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = df5$species, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
Xong bước này lưu biểu đồ dạng pdf với tên “Plot_6”
—– Hết —–