Bioinformatic - Bài thực hành tổng hợp

Cài đặt PACKAGES

– “taxize”, “rentrez”, “seqinr” “ape”

1. Truy xuất trình tự DNA từ Genbank

Dowload trình tự nucleotide từ các cơ sở dữ liệu online

Gọi packages “rentrez” của NCBI

library(rentrez)

Download trình tự

sea <- entrez_search(db="nuccore", term="Lithocarpus vuquangensis [Organism] AND matK[Gene]") 
sea

## Entrez search result with 1 hits (object contains 1 IDs and no web_history object)
##  Search term (as translated):  "Lithocarpus vuquangensis"[Organism] AND matK[Gene ...

sea$ids

## [1] "1241187594"

sp1.fasta <- entrez_fetch(db="nuccore", sea$ids, rettype="fasta")
sp1.fasta

## [1] ">LC319670.1 Lithocarpus vuquangensis chloroplast matK gene for maturase K, partial cds, specimen_voucher: FU<JPN_:V5743\nTCATTCAATATTTCCCTTTTAGAGGAAAAGTTCCCACATTTAAATTATTCGGCGGATATACTAATACCCT\nACCCTGCCCATCTGGAAATATTGGTTCAAACCCTTCGTTACCGGGTGAAAGATGCTTCTTATTTGCATTT\nATTGCGGTTCTTTCTTCATGAGTATTCTAATTGTAACAGTCTTATTATTACAAATAAATCTATTTCCATT\nTTTTCAAAAAGTAATCCGAGATTCTTTTTATTCCTATATAATTCTTATATATGTGAATACGAATCCATCT\nTCCTTTTTCTCCGTAACCAATCTTCTCATTTACGATTAACATCTTCTGGAGTCCTTTTTGAACGACTCTG\nTTTATATAGAAAAATAGAACATTTTGCCGAAGTCTTTGCTAATGATTTTCCGGTCATCCCATGCTTTCTC\nAAGGATCCTTTCATGCATTATGTTAGATATCAAGGAAAATCAATTCTGGCTTCCAAAGACACACCTCTTC\nTAATGAATAAATGGAAATCTTACCTTGTCAATTTATGGCAATGTCATTTTGATGTATGGTCTCACGCGGC\nAAGTATCCGTATAAACCAATTATCCAAGCATTCCCTCGATTTTTTGAGTTACTTTTCAAGTGTTCGACGA\nAATCCTGCAGTGGTGCGGAATCAAATGCTAGAAAGTTCATTTCTACTAAATAATGCTCCCAATAAACTCG\nATACAATAGTTCCAATTATTCCTCTGATTGGATCATTGGCTAAAGCGAAATTTTGTAACGCAGTAGGGCA\nTCCAATTAGTAAGCTGACTCGGGCCGATTTATCGGATTTTGAGATTATCAATCGATTTTTGCATATATGC\nAGAAATCTTTCTCATTATTACAGCGGATCCTCAAAAAAAAAGAATATGTATCGAATAAAATATATACTTC\nGACTTT\n\n"

write(sp1.fasta, "Sp_matK_1.fasta")

Bài tập: Thực hiện load trình tự ITS, matK và rbcL của loài Quercus baolamensis

Sử dụng packages seqinr để load và ghi trình tự DNA

library(seqinr)

Chọn cơ sở dữ liệu

choosebank("genbank")

Download trình tự

sp <- query("sp", "sp=Lithocarpus dahuoaiensis AND K=matK")
sp$req

## [[1]]
##       name     length      frame     ncbicg 
## "LC318548"      "831"        "0"       "11" 
## 
## [[2]]
##       name     length      frame     ncbicg 
## "LC318551"      "916"        "0"       "11"

Ghi thành file fasta

write.fasta(getSequence(sp), getName(sp), file.out="Sp_matK_2.fasta")

Đọc trình tự

Cài đặt và dùng packages ape install.packages(“ape”)

library(ape)

## 
## Attaching package: 'ape'

## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus

Đọc trình tự bằng lệnh read.dna

dna1 <- read.dna("Sp_matK_1.fasta", format="fasta")
dna1

## 1 DNA sequence in binary format stored in a matrix.
## 
## Sequence length: 916 
## 
## Label:
## LC319670.1 Lithocarpus vuquangensis chloroplast matK gene fo...
## 
## Base composition:
##     a     c     g     t 
## 0.298 0.197 0.146 0.359 
## (Total: 916 bases)

dna2 <- read.dna("Sp_matK_2.fasta", format="fasta")
dna2

## 2 DNA sequences in binary format stored in a list.
## 
## Mean sequence length: 873.5 
##    Shortest sequence: 831 
##     Longest sequence: 916 
## 
## Labels:
## LC318548
## LC318551
## 
## Base composition:
##     a     c     g     t 
## 0.294 0.199 0.149 0.358 
## (Total: 1.75 kb)

2. Thiết lập bản đồ địa điểm nghiên cứu:

Cài đạt packages bản đồ nền install.packages(maps), install.packages(mapdata) #Gọi packages

library(maps)
library(mapdata)

Tạo Giới hạn phân bố với các giá trị biết trước của Kinh tuyến(x), vĩ tuyến (y)

VD1: Tạo bản đồ phân bố của loài dẻ Cà Đam, biết toạ độ phân bố của nó là tại toạ độ: vĩ tuyến y = 15.205992, kinh tuyến x = 108.497412

map('worldHires', resolution=0, xlim=c(102,112),  #Định vị khu vực phân bố bằng toạ độ
    ylim=c(8,23.5), lwd=0.5)                     #fill=T, col="gray" ...
map.axes()                                        #Đóng khung bản đồ
locnx <- c(108.464768)
locny <- c(15.158920)                       #Định vị địa điểm phân bố bằng toạ độ biết trước
points(locnx,locny,pch=19)                  #Ký hiệu địa điểm phân bố bằng plot characters (pch)

3. Vẽ bản đồ mật độ phân bố của loài

Chuẩn bị dữ liệu địa lý - Download dữ liệu địa lý định dạng .shp của Việt Nam tại website: https://www.gadm.org/download_country_v3.html chọn Shapefile và lưu vể folder trên máy tính.

Trong folder VNM_adm load về có 4 mức độ: gadm36_VNM_0.shp là dữ liệu địa lý VN ko có đường biên các tỉnh, gadm36_VNM_1.shp là dữ liệu địa lý VN có đườg biên các tỉnh, gadm36_VNM_2.shp mức độ đường biên các huyện, gadm36_VNM_3.shp mức độ đường biên cấp xã.
Để dọc dữ liệu địa lý định dang .shp, ta sử dụng package sf với hàm st_read;
Thự hiện cài các packages: ggplot2, sd, readxl, tmap tmaptools, sf, leaflet và tidyverse bằng lệnh install.packages("")
Gọi các packages

library(tidyverse)

## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::count()  masks seqinr::count()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x purrr::map()    masks maps::map()

library(sf)

## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0

library(ggplot2)
library(readxl)
library(readr)
library(tmap)
library(tmaptools)
library(leaflet)
library(knitr)

Đọc dữ liệu địa lý vào R: Nếu đọc từ máy cá nhân thì lệnh như sau:

vn <- st_read("~/OneDrive/Teaching/R project/Map/VNM_adm/gadm36_VNM_1.shp")

## Reading layer `gadm36_VNM_1' from data source `/Users/ngocnv/OneDrive/Teaching/R project/Map/VNM_adm/gadm36_VNM_1.shp' using driver `ESRI Shapefile'
## Simple feature collection with 63 features and 10 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 102.1446 ymin: 8.381355 xmax: 109.4692 ymax: 23.39269
## CRS:            4326

vn %>% class

## [1] "sf"         "data.frame"

Download data tại: “https://github.com/ngocdlu/distribution/blob/master/gadm36_VNM_1.shp”

Show bản đồ VN với đường biên các tỉnh

qtm(vn)

Dữ liệu có định dạng sf và data.frame dùng ggplot2 ta có thể trực quan hoá bản đồ như sau:

theme_set(
  theme_minimal() +
  theme(axis.line = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank(), 
        axis.title = element_blank()))

vn %>% 
  ggplot() +
  geom_sf(aes(fill = NAME_1)) +
  theme(legend.position = "none")

Tạo bản đồ nhiệt mật độ phân bố của đối tượng nghiên cứu

Trước tiên chúng ta cần chuẩn bị dữ liệu phân bố của đối tượng nghiên cứu, tổ chức dữ liệu có cấu trúc tương đồng về tỉnh thành trong đó có ít nhất 1 biến trùng tên với 1 biến trong dữ liệu địa lý ở trên (tốt nhất là biến biến mã ID của tỉnh (GID_1) hoặc biến tên tỉnh dạng tiếng anh (VANAME_1)).
Load dữ liệu đã chuẩn bị từ máy tính cá nhân hoạt Github

dist <- read_excel("~/OneDrive/Teaching/R project/RMarkdown/Distribution.xlsx")

Download data tại https://github.com/ngocdlu/distribution/blob/master/Distribution.csv (dataframe "dist cũng có 7 biến và 63 observation)

Tiếp theo sẽ trộn dữ liệu của 2 data frame “vn” và “dist”

vn1 <- merge(vn, dist, by = "GID_1")

Như vậy hiện tại ta đã có dataframe “vn” mới với 17 biến và 63 observaion (đã thêm các biên từ data dist)

Tạo bản đồ mật độ phân bố năm 2015 (biến N_2015) trong dữ liệu đã biến đổi “vn”

vn1 %>%
  ggplot() +
  geom_sf(aes(fill = N_2015)) +
  scale_fill_gradient(low="white", high="blue")

Thêm tiêu đề bản đồ:

vn1 %>%
  ggplot() +
  geom_sf(aes(fill = N_2015)) +
  ggtitle("Ban đo phan bo loai Lithocarpus sp.") +
  scale_fill_gradient(low="white", high="blue")

Xây dựng bản đồ phân bố 1 tỉnh theo huyện

Load dữ liệu địa lý Vì xây dựng bản đổ phân bố theo huyện nên ta sẽ sử dụng dữ liệu địa lý cấp 2 của Việt Nam

vn2 <- st_read("~/OneDrive/Teaching/R project/Map/VNM_adm/gadm36_VNM_2.shp")

## Reading layer `gadm36_VNM_2' from data source `/Users/ngocnv/OneDrive/Teaching/R project/Map/VNM_adm/gadm36_VNM_2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 710 features and 13 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 102.1446 ymin: 8.381355 xmax: 109.4692 ymax: 23.39269
## CRS:            4326

vn2 %>% class

## [1] "sf"         "data.frame"

Download data từ Github: “https://github.com/ngocdlu/distribution/blob/master/gadm36_VNM_2.shp”

VD: Xây dựng bản đồ phân bố loài Lithocarpus sp. tại Lâm Đồng

Trong dataframe vn1 chúng ta tìm đến tỉnh Lâm Đồng (Từ row 405 đến 416), ta sẽ thực hiện extract dữ liệu địa lý của tỉnh Lâm Đồng bằng lệnh sau:

ld <-vn2[(405:416),]

Tạo bản đồ nhiệt mật độ phân bố của đối tượng nghiên cứu theo huyện

Tương tự ở trên chúng ta cần chuẩn bị dữ liệu phân bố của đối tượng nghiên cứu, tổ chức dữ liệu có cấu trúc tương đồng về huyện trong đó có ít nhất 1 biến trùng tên với 1 biến trong dữ liệu địa lý ở trên (ở đây là biến biến mã ID của huyện(GID_2) hoặc biến tên huyện dạng tiếng anh (VANAME_2)).
Load dữ liệu đã chuẩn bị từ máy tính cá nhân hoặc Github

ld1 <- read_excel("~/OneDrive/Teaching/R project/RMarkdown/ld_huyen.xlsx")

hoặc load data tại https://github.com/ngocdlu/distribution/blob/master/ld_huyen.csv

Như vậy chúng ta đã có 2 đataframe là “ld” và “ld1”, tiếp theo sẽ trộn dữ liệu của 2 dataframe này theo biến GID_2.

ld2 <- merge(ld, ld1, by = "GID_2")

Chúng ta đã có dataframe “ld2” với 20 biến và 12 obs.

Tiếp theo sẽ tạo bản đồ phân bố từ dữ liệu đã biến đổi “ld2”

ld2 %>%
  ggplot() +
  geom_sf(aes(fill = N_2015)) +
  ggtitle("Ban đo phan bo loai Lithocarpus sp. tai LD") +
  scale_fill_gradient(low="white", high="blue")

Tạo bản đồ phân bố với tên huyện

ld2 %>%
  ggplot() +
  geom_sf(aes(fill = N_2015)) +
  ggtitle("Ban đo phan bo loai Lithocarpus sp. tai LD") +
  geom_sf_text(aes(label = VARNAME_2)) +
  scale_fill_gradient(low="white", high="blue")

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

4. Vẽ biểu đồ với ggplot2

Cài packages install.packages("ggplot2")

Gọi các packages:

library(readr)
library(knitr)

– Load và view dữ liệu (Lưu ý: Nếu dùng dữ liệu trên máy thì dùng các lệnh khác để đọc dữ liệu hoặc thay thế đường dẫn đến folder lưu dữ liệu của bạn)

Litho <- "https://raw.githubusercontent.com/ngocdlu/Litho/master/Litho.csv"
data <- read_csv(Litho)

## Parsed with column specification:
## cols(
##   len = col_double(),
##   wid = col_double(),
##   rat = col_double(),
##   cir = col_double(),
##   pet = col_double(),
##   species = col_character()
## )

attach(data)

Gọi các packages “dplyr” và “ggplot2”

library(dplyr)
library(ggplot2)

Vẽ 1 biểu đồ boxplot đơn giản

data %>% 
  ggplot(aes(species, len, fill = species)) + 
  geom_boxplot() +
  labs(title = "Overview of Leaf_Length by Species", 
       x = "Species",
       y = "Length (cm)") +
  scale_y_continuous(breaks = seq(0,30, by = 5))

Tính giá trị trung bình Tạo thêm 1 dataframe tính độ dài trung bình của phiến lá

mean_length_by_species <- data %>% 
  group_by(species) %>% 
  summarise(mean_len = mean(len)) %>% 
  as.data.frame

Xem dữ liệu mới tổng hợp

mean_length_by_species

##          species mean_len
## 1   L_cadamensis 21.37737
## 2 L_campylolepis 12.73895
## 3   L_gougerotae 10.22842

Biểu diễn độ dài trung bình lá bằng những điểm màu đỏ tương ứng trên biểu đồ. Dùng geom_point() để vẽ biểu đồ điểm

p1 <- data %>% 
  ggplot(aes(species, len, fill = species)) + 
 geom_boxplot() +
 labs(title = "Overview of Leaf Length by Species", 
            x = "Species",
            y = "Length (cm)") +
 scale_y_continuous(breaks = seq(0,30, by = 5)) +
 geom_point(aes(x = species, 
            y = round(mean_len,1)), 
            data = mean_length_by_species, 
            col = "red") + 
 geom_text(aes(label = round(mean_len,1), 
            x = species, 
            y = round(mean_len,1)), 
            data = mean_length_by_species, 
            check_overlap = TRUE, 
            vjust = -0.5)
p1

Thực hiện tương tự đối với các biến khác

Chiều rộng

mean_width_by_species <- data %>% 
  group_by(species) %>% 
  summarise(mean_wid = mean(wid)) %>% 
  as.data.frame
mean_width_by_species

##          species mean_wid
## 1   L_cadamensis 7.999474
## 2 L_campylolepis 4.000000
## 3   L_gougerotae 4.486842

Boxplot cho chiều rộng phiển lá

p2 <- data %>% 
  ggplot(aes(species, wid, fill = species)) + 
  geom_boxplot() +
  labs(title = "Overview of Leaf Width by Species", 
       x = "Species",
       y = "Width (cm)") +
  scale_y_continuous(breaks = seq(0,30, by = 2)) +
  geom_point(aes(x = species, 
                 y = round(mean_wid,1)), 
             data = mean_width_by_species, 
             col = "red") + 
  geom_text(aes(label = round(mean_wid,1), 
                x = species, 
                y = round(mean_wid,1)), 
            data = mean_width_by_species, 
            check_overlap = TRUE, 
            vjust = -0.5)
p2

Giá trị trung bình cho dạng tròn phiến lá (Circularity)

mean_Cir_by_species <- data %>% 
  group_by(species) %>% 
  summarise(mean_cir = mean(cir)) %>% 
  as.data.frame

mean_Cir_by_species

##          species  mean_cir
## 1   L_cadamensis 0.6547368
## 2 L_campylolepis 0.5800000
## 3   L_gougerotae 0.7294737

Biểu đồ

p3 <- data%>% 
ggplot(aes(species, cir, fill = species)) + 
geom_boxplot() +
labs(title = "Overview of Leaf Circularity by Species", 
     x = "Species",
     y = "Circularity") +
scale_y_continuous(breaks = seq(0,1, by = 0.1)) +
geom_point(aes(x = species, 
     y = round(mean_cir,2)), 
     data = mean_Cir_by_species, 
     col = "red") + 
geom_text(aes(label = round(mean_cir,2), 
     x = species, 
     y = round(mean_cir,2)), 
     data = mean_Cir_by_species, 
     check_overlap = TRUE, 
     vjust = -0.5)

p3

Tỉ lệ lá (Aspect_ratio)

mean_aspect_by_species <- data %>% 
  group_by(species) %>% 
  summarise(mean_aspect = mean(rat)) %>% 
  as.data.frame
mean_aspect_by_species

##          species mean_aspect
## 1   L_cadamensis    2.671053
## 2 L_campylolepis    3.186316
## 3   L_gougerotae    2.279474

Boxplot

p4 <- data %>% 
ggplot(aes(species, rat, fill = species)) + 
geom_boxplot() +
labs(title = "Overview of Leaf_Aspect_ratio by Species", 
       x = "Species",
       y = "Aspect_ratio") +
scale_y_continuous(breaks = seq(0,5, by = 1)) +
geom_point(aes(x = species, 
       y = round(mean_aspect,1)), 
       data = mean_aspect_by_species, 
       col = "red") + 
geom_text(aes(label = round(mean_aspect,1), 
       x = species, 
       y = round(mean_aspect,1)), 
       data = mean_aspect_by_species, 
       check_overlap = TRUE, 
       vjust = -0.5)

p4

Chiều dài cuống lá (pet)

mean_pet_length_by_species <- data %>% 
  group_by(species) %>% 
  summarise(mean_pet = mean(pet)) %>% 
  as.data.frame
mean_pet_length_by_species

##          species mean_pet
## 1   L_cadamensis 2.317368
## 2 L_campylolepis 1.210526
## 3   L_gougerotae 1.110000

Boxplot

p5 <- data %>% 
  ggplot(aes(species, pet, fill = species)) + 
  geom_boxplot() +
  labs(title = "Overview of pet length by Species", 
       x = "Species",
       y = "Petiole length (cm)") +
  scale_y_continuous(breaks = seq(0,4, by = 1)) +
  geom_point(aes(x = species, 
                 y = round(mean_pet,1)), 
             data = mean_pet_length_by_species, 
             col = "red") + 
  geom_text(aes(label = round(mean_pet,1), 
                x = species, 
                y = round(mean_pet,1)), 
            data = mean_pet_length_by_species, 
            check_overlap = TRUE, 
            vjust = -0.5)

p5

Vẽ biểu đồ Scatter plot mối tương quan giữa Apspect ratio và Circularity

qplot(rat, cir, data=data)

Fill màu theo Species

p6 <- qplot(rat, cir, data=data, facets=species~., col=species)
p6

Sắp xếp các biểu đồ trên cùng một trang install.packages("ggpubr")

library(ggpubr)

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:ape':
## 
##     rotate

Thực hiện sắp xếp các biểu đồ trên cùng một trang bằng lệnh:

ggarrange(p1, p2, p3, p4, labels = c("A", "B", "C", "D", "E", "F"),
            ncol = 2, nrow = 2)

Phân tích phương sai và hậu định

VD: Phân tích phương sai của của biến chiều dài giữa các loài

av <-aov(len~species)
summary(av)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## species      2 1299.8   649.9   97.95 <2e-16 ***
## Residuals   54  358.3     6.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Phân tích hậu định bằng Tukey’s HSD test

t<-TukeyHSD(av)
t

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = len ~ species)
## 
## $species
##                                   diff        lwr        upr     p adj
## L_campylolepis-L_cadamensis  -8.638421 -10.652467 -6.6243755 0.0000000
## L_gougerotae-L_cadamensis   -11.148947 -13.162993 -9.1349018 0.0000000
## L_gougerotae-L_campylolepis  -2.510526  -4.524572 -0.4964808 0.0110701

Trực quan hoá phân tích hậu định

plot(t, xlim =c(-15,0))

Kruskal test (kiểm định phi tham số) Cài đặt packages install.packages("pgirmess")

library(pgirmess)

## Registered S3 method overwritten by 'spdep':
##   method   from
##   plot.mst ape

VD: Kiểm định kruskal cho biến aspect ratio (rat)

kruskal.test(rat~species)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  rat by species
## Kruskal-Wallis chi-squared = 38.826, df = 2, p-value = 3.707e-09

Phân tích hậu định

kruskalmc(rat, species)

## Multiple comparison test after Kruskal-Wallis 
## p.value: 0.05 
## Comparisons
##                              obs.dif critical.dif difference
## L_cadamensis-L_campylolepis 14.47368     12.89198       TRUE
## L_cadamensis-L_gougerotae   18.97368     12.89198       TRUE
## L_campylolepis-L_gougerotae 33.44737     12.89198       TRUE

VD: Kiểm định kruskal cho biến aspect ratio (cir)

kruskal.test(cir~species)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  cir by species
## Kruskal-Wallis chi-squared = 39.482, df = 2, p-value = 2.671e-09

Phân tích hậu định

kruskalmc(cir, species)

## Multiple comparison test after Kruskal-Wallis 
## p.value: 0.05 
## Comparisons
##                              obs.dif critical.dif difference
## L_cadamensis-L_campylolepis 13.71053     12.89198       TRUE
## L_cadamensis-L_gougerotae   19.86842     12.89198       TRUE
## L_campylolepis-L_gougerotae 33.57895     12.89198       TRUE

5. Vẽ biểu đồ động với hichracter

– Cài đặt install.packages("highcharter") và install.packages("viridis")

#Gọi packages
library(highcharter)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Highcharts (www.highcharts.com) is a Highsoft software product which is

## not free for commercial and Governmental use

library(viridis)

## Loading required package: viridisLite

#Đọc dữ liệu
library(readxl)
depart_1 <- read_excel("~/OneDrive/Teaching/R project/Student/data/depart_1.xlsx")
attach(depart_1)

cols <- viridis(8)              #Set số màu theo só biến sẽ thể hiện
h <- highchart() %>% 
  hc_xAxis(categories = depart_1$year) %>%      #Dữ liệu sử dụng biển diễn theo trục x
  hc_add_series(name = "SP", data = depart_1$SP) %>%  #Biểu diễn biến 1
  hc_add_series(name = "NL", data = depart_1$NL) %>%  #Biểu diễn biến 2
  hc_add_series(name = "QTH", data = depart_1$QTH) %>%    #Biểu diễn biến 3
  hc_add_series(name = "XHH_CTXH", data = depart_1$XHH_CTXH) %>% #Biểu diễn biến 4
  hc_add_series(name = "SH", data = depart_1$SH) %>% ##Biểu diễn biến 5
  hc_add_series(name = "CNTT", data = depart_1$CNTT) %>% #...
  hc_add_series(name = "LH", data = depart_1$LH) %>%
  hc_add_series(name = "NN", data = depart_1$NN) %>%
  hc_add_series(name = "KT_QTKD", data = depart_1$KT_QTKD) %>%
  hc_add_series(name = "QTDL", data = depart_1$QTDL) %>%
  hc_colors(cols)       #Fill màu theo biến
h

cols <- viridis(5)         #Chỉ thể hiện 5 biến
h <- highchart() %>% 
  hc_xAxis(categories = depart_1$year) %>%
  hc_add_series(name = "QTH", data = depart_1$QTH) %>%
  hc_add_series(name = "LH", data = depart_1$LH) %>%
  hc_add_series(name = "NN", data = depart_1$NN) %>%
  hc_add_series(name = "KT_QTKD", data = depart_1$KT_QTKD) %>%
  hc_add_series(name = "QTDL", data = depart_1$QTDL) %>%
  hc_colors(cols)
h

Tuỳ chỉnh biểu đồ

h1 <- h %>%
  # Add tên tiêu đề
  hc_title(text = "Biến động số lượng sv",
           margin = 20, 
           align = "left",
           style = list(color = "black", fontWeight = "bold")
           ) %>% 
  # Add subtitle
  hc_subtitle(text = "2010 to 2019",
              align = "left") %>% 
  # Add caption
  hc_credits(enabled = T, # add caption
             text = "Nguồn: Dalat University",
             href = "http://dlu.edu.vn") %>% 
  # Add chú giải
  hc_legend(align = "right",
            verticalAlign = "top",
            layout = "vertical",
            x = 0, 
            y = 100
            ) %>%
  # Add đường so sánh
  hc_tooltip(crosshairs = TRUE, 
             backgroundColor = "#FCFFC5",
             shared = TRUE, 
             borderWidth = 4)
h1

https://rpubs.com/ngocnv

——————————————–Ngoc Nguyen—————————————–