library(knitr)
library(readr)
library(psych)
library("tidyverse", "ggplot2", "dplyr")
## Warning in library("tidyverse", "ggplot2", "dplyr"): 'dplyr' not found on search
## path, using pos = 2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ dplyr 1.0.7
## ✓ tibble 3.1.6 ✓ stringr 1.4.0
## ✓ tidyr 1.1.4 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Litho <- "https://raw.githubusercontent.com/ngocdlu/Litho/master/Litho.csv"
df <- read_csv(Litho)
## Rows: 57 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): species
## dbl (5): len, wid, rat, cir, pet
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(df)
describe.by(len, species)
## Warning: describe.by is deprecated. Please use the describeBy function
##
## Descriptive statistics by group
## group: L_cadamensis
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 19 21.38 2.92 21.14 21.39 2.79 16.12 26.46 10.34 0.19 -0.89 0.67
## ------------------------------------------------------------
## group: L_campylolepis
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 19 12.74 2.9 12.93 12.68 2.49 7.05 19.4 12.35 0.17 -0.06 0.66
## ------------------------------------------------------------
## group: L_gougerotae
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 19 10.23 1.72 10.93 10.29 2.05 6.29 13.18 6.89 -0.4 -0.65 0.4
cadam1 <-df[(1:19),(4:6)]
cadam2 <- df[(1:19),]
attach(cadam2)
## The following objects are masked from df:
##
## cir, len, pet, rat, species, wid
describe(cadam2)
## vars n mean sd median trimmed mad min max range skew
## len 1 19 21.38 2.92 21.14 21.39 2.79 16.12 26.46 10.34 0.19
## wid 2 19 8.00 0.77 7.92 7.97 0.74 6.58 9.88 3.30 0.34
## rat 3 19 2.67 0.23 2.65 2.68 0.13 2.15 3.01 0.86 -0.32
## cir 4 19 0.65 0.04 0.64 0.65 0.03 0.60 0.73 0.13 0.51
## pet 5 19 2.32 0.41 2.29 2.30 0.40 1.61 3.40 1.79 0.56
## species* 6 19 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## kurtosis se
## len -0.89 0.67
## wid -0.08 0.18
## rat -0.45 0.05
## cir -1.05 0.01
## pet 0.45 0.09
## species* NaN 0.00
attach(df)
## The following objects are masked from cadam2:
##
## cir, len, pet, rat, species, wid
## The following objects are masked from df (pos = 4):
##
## cir, len, pet, rat, species, wid
ggplot(data= df, mapping =aes(x=species, y = wid, fill = species)) + geom_boxplot() + labs(title = "Biểu đồ so sánh chiều rộng phiến lá", x = "Loài thực vật", y = "Chiều rộng (cm)")
p1 <- df %>%
ggplot(aes(species, wid, fill = species)) + geom_boxplot() + labs(title = "Biểu đồ so sánh chiều rộng phiến lá", x = "Loài thực vật", y = "Chiều rộng (cm)")
p1
mean_by_species <-df %>%
group_by(species) %>%
summarise(mean_wid = mean(wid)) %>%
as.data.frame
p2 <- p1 + geom_point(aes(x = species, y = mean_wid), data = mean_by_species, col = "red")
p2
min_by_species <- df %>%
group_by(species) %>%
summarise(min_len = min(wid)) %>%
as.data.frame
p3 <- p1 + geom_point(aes(x = species, y = round(min_len,1)), data = min_by_species, col = "blue")
p3
p3 + geom_text(aes(label = round(min_len,1), x = species, y = round(min_len,1)), data = min_by_species, col="red", check_overlap = TRUE, vjust = -0.5)
p3
——- end———–
Ngoc Nguyen