Summarise multiple continuous variables
This is an R Markdown document. Markdown is a simple formatting syntax Để tiết kiệm thời gian chúng ta cần một cách mô tả tóm tắt nhiều biến số liên tục với kết quả được trình bày trong một bảng tóm tắt.
library(tidyverse)
library(readr)
library(readxl)
library(purrr)
purrr package
3 biến số đầu tiên của data iris là continuous, chúng ta muốn mô tả tóm tắt cùng nhau.
Lập bảng của các thống kê của các biến số liên tục này
tibble(
Column = names(iris2),
Min = purrr::map_dbl(iris2, min),
Mean = purrr::map_dbl(iris2, mean),
Median = purrr::map_dbl(iris2, median),
Max = purrr::map_dbl(iris2, max),
SD = purrr::map_dbl(iris2, sd)
)
NA
Function
Viết function để tạo bảng của các thống kê như mean, median, min, max, sd
continuous_stats <- function(var) {
iris %>%
summarise(
n_miss = sum(is.na({{ var }})),
n = length({{ var }}),
mean = mean({{ var }}, na.rm = TRUE),
median = median({{ var }}, na.rm = TRUE),
min = min({{ var }}, na.rm = TRUE),
max = max({{ var }}, na.rm = TRUE),
sd = sd({{ var }}, na.rm = TRUE)
)
}
tb1 <- rbind(
continuous_stats(Sepal.Length),
continuous_stats(Sepal.Width),
continuous_stats(Petal.Length))
cbind(names(iris2), tb1)
iris2 <- iris[,1:3]
continuous_stats <- function(data, var) {
data %>% # Don't forget to replace "study" with "data" here too!
summarise(
variable = quo_name(var),
n_miss = sum(is.na({{ var}} )),
mean = mean({{ var }}, na.rm = TRUE),
median = median({{ var }}, na.rm = TRUE),
min = min({{ var }}, na.rm = TRUE),
max = max({{ var }}, na.rm = TRUE),
sd = sd({{ var }}, na.rm = TRUE)
)
}
map_dfr(
.x = quos(Sepal.Length, Sepal.Width, Petal.Length),
.f = continuous_stats, data = iris2
)
dplyr::across()
tb2 <- iris %>%
summarise(
across(
.cols = c(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width),
.fns = ~ c(min(.x, na.rm = TRUE), mean(.x, na.rm = TRUE),
median(.x, na.rm = TRUE), max(.x, na.rm = TRUE),
sd(.x, na.rm = TRUE))
)
)
cbind(stats = c("min", "mean", "median", "max", "sd"), tb2)
for loop
iris3 <- iris[,1:4]
iris_mins <- vector("double", ncol(iris3))
iris_means <- vector("double", ncol(iris3))
iris_medians <- vector("double", ncol(iris3))
iris_maxs <- vector("double", ncol(iris3))
iris_sds <- vector("double", ncol(iris3))
for loop
for(i in seq_along(iris3)) {
list(
iris_mins[[i]] <- min(iris3[[i]]),
iris_means[[i]] <- round(mean(iris3[[i]]),2),
iris_medians[[i]] <- median(iris3[[i]]),
iris_maxs[[i]] <- max(iris3[[i]]),
iris_sds[[i]] <- round(sd(iris3[[i]]),2)
)
}
df <- as.data.frame( rbind(
names(iris3) ,
iris_mins ,
iris_means,
iris_medians,
iris_maxs,
iris_sds
))
colnames(df) <- c("Sepal.Length", "Sepal.Width",
"Petal.Length", "Petal.Width")
df <- df[-1,]
print(df)
Name the rowname (đặt tên cho column rowname)
df <- cbind(stats = rownames(df), df)
row.names(df) <- NULL
print(df)
Hoặc sử dụng rownames_to_column()
library(tibble)
rownames_to_column(df, var="stats") %>% head
NA
NA
LS0tDQp0aXRsZTogIk11bHRpcGxlIENvbnRpbnVvdXMgVmFyaWFibGUgU3VtbWFyeSINCmF1dGhvcjogIlRoaWV1IE5ndXllbiINCmRhdGU6ICIxMC8yMy8yMDIxIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpDQpgYGANCg0KIyMgU3VtbWFyaXNlIG11bHRpcGxlIGNvbnRpbnVvdXMgdmFyaWFibGVzDQoNClRoaXMgaXMgYW4gUiBNYXJrZG93biBkb2N1bWVudC4gTWFya2Rvd24gaXMgYSBzaW1wbGUgZm9ybWF0dGluZyBzeW50YXggxJDhu4MgdGnhur90IGtp4buHbSB0aOG7nWkgZ2lhbiBjaMO6bmcgdGEgY+G6p24gbeG7mXQgY8OhY2ggbcO0IHThuqMgdMOzbSB04bqvdCBuaGnhu4F1IGJp4bq/biBz4buRIGxpw6puIHThu6VjIHbhu5tpIGvhur90IHF14bqjIMSRxrDhu6NjIHRyw6xuaCBiw6B5IHRyb25nIG3hu5l0IGLhuqNuZyB0w7NtIHThuq90Lg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShwdXJycikNCmBgYA0KDQojIyBwdXJyciBwYWNrYWdlDQoNCjMgYmnhur9uIHPhu5EgxJHhuqd1IHRpw6puIGPhu6dhIGRhdGEgaXJpcyBsw6AgY29udGludW91cywgY2jDum5nIHRhIG114buRbiBtw7QgdOG6oyB0w7NtIHThuq90IGPDuW5nIG5oYXUuDQoNCmBgYHtyIGVjaG89RkFMU0V9DQpkYXRhKCJpcmlzIikNCmlyaXMyIDwtIGlyaXNbLDE6M10NCmhlYWQoaXJpczIpDQpgYGANCg0KDQpM4bqtcCBi4bqjbmcgY+G7p2EgY8OhYyB0aOG7kW5nIGvDqiBj4bunYSBjw6FjIGJp4bq/biBz4buRIGxpw6puIHThu6VjIG7DoHkNCg0KYGBge3J9DQp0aWJibGUoDQogIENvbHVtbiAgID0gbmFtZXMoaXJpczIpLA0KICBNaW4gICAgICA9IHB1cnJyOjptYXBfZGJsKGlyaXMyLCBtaW4pLA0KICBNZWFuICAgICA9IHB1cnJyOjptYXBfZGJsKGlyaXMyLCBtZWFuKSwNCiAgTWVkaWFuICAgPSBwdXJycjo6bWFwX2RibChpcmlzMiwgbWVkaWFuKSwNCiAgTWF4ICAgICAgPSBwdXJycjo6bWFwX2RibChpcmlzMiwgbWF4KSwNCiAgU0QgICAgICAgPSBwdXJycjo6bWFwX2RibChpcmlzMiwgc2QpDQopDQoNCmBgYA0KDQojIyBGdW5jdGlvbg0KDQpWaeG6v3QgZnVuY3Rpb24gxJHhu4MgdOG6oW8gYuG6o25nIGPhu6dhIGPDoWMgdGjhu5FuZyBrw6ogbmjGsCBtZWFuLCBtZWRpYW4sIG1pbiwgbWF4LCBzZA0KDQoNCmBgYHtyfQ0KY29udGludW91c19zdGF0cyA8LSBmdW5jdGlvbih2YXIpIHsNCiAgaXJpcyAlPiUgDQogICAgc3VtbWFyaXNlKA0KICAgICAgbl9taXNzID0gc3VtKGlzLm5hKHt7IHZhciB9fSkpLA0KICAgICAgbiAgICAgID0gbGVuZ3RoKHt7IHZhciB9fSksDQogICAgICBtZWFuICAgPSBtZWFuKHt7IHZhciB9fSwgbmEucm0gPSBUUlVFKSwNCiAgICAgIG1lZGlhbiA9IG1lZGlhbih7eyB2YXIgfX0sIG5hLnJtID0gVFJVRSksDQogICAgICBtaW4gICAgPSBtaW4oe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgbWF4ICAgID0gbWF4KHt7IHZhciB9fSwgbmEucm0gPSBUUlVFKSwNCiAgICAgIHNkID0gc2Qoe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpDQogICAgKQ0KfQ0KDQp0YjEgPC0gcmJpbmQoDQpjb250aW51b3VzX3N0YXRzKFNlcGFsLkxlbmd0aCksDQpjb250aW51b3VzX3N0YXRzKFNlcGFsLldpZHRoKSwNCmNvbnRpbnVvdXNfc3RhdHMoUGV0YWwuTGVuZ3RoKSkNCg0KY2JpbmQobmFtZXMoaXJpczIpLCB0YjEpDQpgYGANCg0KYGBge3J9DQppcmlzMiA8LSBpcmlzWywxOjNdDQoNCmNvbnRpbnVvdXNfc3RhdHMgPC0gZnVuY3Rpb24oZGF0YSwgdmFyKSB7DQogIGRhdGEgJT4lICAjIERvbid0IGZvcmdldCB0byByZXBsYWNlICJzdHVkeSIgd2l0aCAiZGF0YSIgaGVyZSB0b28hDQogICAgc3VtbWFyaXNlKA0KICAgICAgdmFyaWFibGUgPSBxdW9fbmFtZSh2YXIpLA0KICAgICAgbl9taXNzICAgPSBzdW0oaXMubmEoe3sgdmFyfX0gKSksDQogICAgICBtZWFuICAgICA9IG1lYW4oe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgbWVkaWFuICAgPSBtZWRpYW4oe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgbWluICAgICAgPSBtaW4oe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgbWF4ICAgICAgPSBtYXgoe3sgdmFyIH19LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgc2QgICAgICAgPSBzZCh7eyB2YXIgfX0sIG5hLnJtID0gVFJVRSkNCiAgICApIA0KfQ0KDQptYXBfZGZyKA0KICAueCA9IHF1b3MoU2VwYWwuTGVuZ3RoLCBTZXBhbC5XaWR0aCwgUGV0YWwuTGVuZ3RoKSwNCiAgLmYgPSBjb250aW51b3VzX3N0YXRzLCBkYXRhID0gaXJpczINCikNCmBgYA0KDQoNCg0KIyMgZHBseXI6OmFjcm9zcygpDQoNCmBgYHtyfQ0KdGIyIDwtIGlyaXMgJT4lIA0KICBzdW1tYXJpc2UoDQogICAgYWNyb3NzKA0KICAgICAgLmNvbHMgID0gYyhTZXBhbC5MZW5ndGgsIFNlcGFsLldpZHRoLCBQZXRhbC5MZW5ndGgsICBQZXRhbC5XaWR0aCksDQogICAgICAuZm5zICAgPSB+IGMobWluKC54LCBuYS5ybSA9IFRSVUUpLCBtZWFuKC54LCBuYS5ybSA9IFRSVUUpLA0KICAgICAgICAgICAgICAgICAgIG1lZGlhbigueCwgbmEucm0gPSBUUlVFKSwgbWF4KC54LCBuYS5ybSA9IFRSVUUpLCANCiAgICAgICAgICAgICAgICAgICBzZCgueCwgbmEucm0gPSBUUlVFKSkNCiAgICApDQogICkNCg0KY2JpbmQoc3RhdHMgPSBjKCJtaW4iLCAibWVhbiIsICJtZWRpYW4iLCAibWF4IiwgInNkIiksIHRiMikNCmBgYA0KDQojIyBmb3IgbG9vcA0KDQpgYGB7cn0NCmlyaXMzIDwtIGlyaXNbLDE6NF0NCg0KaXJpc19taW5zIDwtIHZlY3RvcigiZG91YmxlIiwgbmNvbChpcmlzMykpDQppcmlzX21lYW5zIDwtIHZlY3RvcigiZG91YmxlIiwgbmNvbChpcmlzMykpDQppcmlzX21lZGlhbnMgPC0gdmVjdG9yKCJkb3VibGUiLCBuY29sKGlyaXMzKSkNCmlyaXNfbWF4cyA8LSB2ZWN0b3IoImRvdWJsZSIsIG5jb2woaXJpczMpKQ0KaXJpc19zZHMgPC0gdmVjdG9yKCJkb3VibGUiLCBuY29sKGlyaXMzKSkNCmBgYA0KDQpmb3IgbG9vcA0KDQpgYGB7cn0NCmZvcihpIGluIHNlcV9hbG9uZyhpcmlzMykpIHsNCiAgbGlzdCgNCiAgaXJpc19taW5zW1tpXV0gPC0gbWluKGlyaXMzW1tpXV0pLA0KICBpcmlzX21lYW5zW1tpXV0gPC0gcm91bmQobWVhbihpcmlzM1tbaV1dKSwyKSwNCiAgaXJpc19tZWRpYW5zW1tpXV0gPC0gbWVkaWFuKGlyaXMzW1tpXV0pLA0KICBpcmlzX21heHNbW2ldXSA8LSBtYXgoaXJpczNbW2ldXSksDQogIGlyaXNfc2RzW1tpXV0gPC0gcm91bmQoc2QoaXJpczNbW2ldXSksMikNCiAgKQ0KfQ0KDQoNCmRmIDwtIGFzLmRhdGEuZnJhbWUoIHJiaW5kKA0KbmFtZXMoaXJpczMpICwNCmlyaXNfbWlucyAsDQppcmlzX21lYW5zLA0KaXJpc19tZWRpYW5zLA0KaXJpc19tYXhzLA0KaXJpc19zZHMNCikpDQpjb2xuYW1lcyhkZikgPC0gYygiU2VwYWwuTGVuZ3RoIiwgIlNlcGFsLldpZHRoIiwgDQogICAgICAgICAgICAgICAgICAiUGV0YWwuTGVuZ3RoIiwgIlBldGFsLldpZHRoIikNCmRmIDwtIGRmWy0xLF0NCg0KcHJpbnQoZGYpDQpgYGANCg0KDQojIyBOYW1lIHRoZSByb3duYW1lICjEkeG6t3QgdMOqbiBjaG8gY29sdW1uIHJvd25hbWUpDQoNCmBgYHtyfQ0KZGYgPC0gY2JpbmQoc3RhdHMgPSByb3duYW1lcyhkZiksIGRmKQ0Kcm93Lm5hbWVzKGRmKSA8LSBOVUxMDQpwcmludChkZikNCmBgYA0KDQpIb+G6t2Mgc+G7rSBk4bulbmcgcm93bmFtZXNfdG9fY29sdW1uKCkNCg0KYGBge3J9DQpsaWJyYXJ5KHRpYmJsZSkNCnJvd25hbWVzX3RvX2NvbHVtbihkZiwgdmFyPSJzdGF0cyIpICU+JSBoZWFkDQoNCg0KYGBgDQoNCg==