palm <- read_excel("../00_data/palmtrees.xlsx")
## Warning: Coercing text to numeric in V1449 / R1449C22: '0.56675675700000006'
palm %>% skimr::skim()
| Name | Piped data |
| Number of rows | 2557 |
| Number of columns | 29 |
| _______________________ | |
| Column type frequency: | |
| character | 17 |
| numeric | 12 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| spec_name | 0 | 1.00 | 10 | 32 | 0 | 2557 | 0 |
| acc_genus | 0 | 1.00 | 4 | 19 | 0 | 185 | 0 |
| acc_species | 0 | 1.00 | 2 | 18 | 0 | 1977 | 0 |
| palm_tribe | 0 | 1.00 | 7 | 16 | 0 | 29 | 0 |
| palm_subfamily | 0 | 1.00 | 9 | 13 | 0 | 5 | 0 |
| climbing | 0 | 1.00 | 4 | 12 | 0 | 3 | 0 |
| acaulescent | 0 | 1.00 | 4 | 15 | 0 | 3 | 0 |
| erect | 0 | 1.00 | 4 | 9 | 0 | 3 | 0 |
| stem_solitary | 375 | 0.85 | 4 | 12 | 0 | 3 | 0 |
| stem_armed | 55 | 0.98 | 5 | 9 | 0 | 2 | 0 |
| leaves_armed | 120 | 0.95 | 5 | 9 | 0 | 2 | 0 |
| understorey_canopy | 265 | 0.90 | 4 | 11 | 0 | 3 | 0 |
| fruit_size_categorical | 505 | 0.80 | 5 | 5 | 0 | 2 | 0 |
| fruit_shape | 765 | 0.70 | 5 | 9 | 0 | 7 | 0 |
| fruit_color_description | 709 | 0.72 | 3 | 96 | 0 | 769 | 0 |
| main_fruit_colors | 758 | 0.70 | 3 | 32 | 0 | 147 | 0 |
| conspicuousness | 758 | 0.70 | 7 | 11 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| max_stem_height_m | 446 | 0.83 | 10.86 | 13.03 | 0.00 | 2.50 | 6.00 | 15.00 | 170.00 | ▇▁▁▁▁ |
| max_stem_dia_cm | 602 | 0.76 | 12.38 | 17.07 | 0.00 | 2.00 | 5.00 | 17.00 | 175.00 | ▇▁▁▁▁ |
| max_leaf_number | 1251 | 0.51 | 14.37 | 9.85 | 4.00 | 8.00 | 11.00 | 18.00 | 75.00 | ▇▂▁▁▁ |
| max__blade__length_m | 659 | 0.74 | 2.37 | 2.25 | 0.15 | 1.00 | 1.69 | 3.00 | 25.00 | ▇▁▁▁▁ |
| max__rachis__length_m | 1026 | 0.60 | 1.97 | 1.80 | 0.05 | 0.75 | 1.50 | 2.70 | 18.50 | ▇▁▁▁▁ |
| max__petiole_length_m | 1347 | 0.47 | 0.85 | 0.84 | 0.00 | 0.25 | 0.55 | 1.25 | 6.75 | ▇▂▁▁▁ |
| average_fruit_length_cm | 505 | 0.80 | 2.20 | 2.24 | 0.30 | 1.05 | 1.50 | 2.50 | 45.00 | ▇▁▁▁▁ |
| min_fruit_length_cm | 1651 | 0.35 | 2.18 | 2.30 | 0.30 | 1.00 | 1.50 | 2.50 | 40.00 | ▇▁▁▁▁ |
| max_fruit_length_cm | 1641 | 0.36 | 3.10 | 3.32 | 0.50 | 1.40 | 2.00 | 3.50 | 50.00 | ▇▁▁▁▁ |
| average_fruit_width_cm | 563 | 0.78 | 1.59 | 1.55 | 0.20 | 0.75 | 1.05 | 1.80 | 20.00 | ▇▁▁▁▁ |
| min_fruit_width_cm | 1563 | 0.39 | 1.48 | 1.36 | 0.20 | 0.70 | 1.00 | 1.80 | 13.00 | ▇▁▁▁▁ |
| max_fruit_width_cm | 1555 | 0.39 | 2.13 | 2.09 | 0.22 | 1.00 | 1.50 | 2.50 | 20.00 | ▇▁▁▁▁ |
ncol_num <- palm %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 12
count_ncol_numeric <- function(.data) {
# body
ncol_num <- .data %>%
# select a type of variable
select(where(is.numeric)) %>%
# count columns
ncol()
# return the new variable
return(ncol_num)
}
palm %>% count_ncol_numeric()
## [1] 12
palm %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 10
count_ncol_type <- function(.data, type_data = "numeric") {
# if statement for type of variables
if (type_data == "numeric") {
# body
ncol_type <- .data %>%
# select a type of variable
select(where(is.numeric)) %>%
# count columns
ncol()
} else if (type_data == "character") {
# body
ncol_type <- .data %>%
# select a type of variable
select(where(is.character)) %>%
# count columns
ncol()
}
# return the new variable
return(ncol_type)
}
palm %>% count_ncol_type()
## [1] 12
palm %>% count_ncol_type(type_data = "character")
## [1] 17
palm %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 5
nrow_num <- palm %>%
# filter rows that meet a condition
filter(palm_tribe == "Areceae") %>%
# Count rows
nrow()
nrow_num
## [1] 689
count_num_palms_by_tribe <- function(.data, palm_tribe) {
# body
nrow_num <- .data %>%
# filter rows that meet a condition
filter(palm_tribe == "Areceae") %>%
# count rows
nrow()
# return the new variable
return(nrow_num)
}
palm %>% .[1:30, "palm_tribe"] %>% count_num_palms_by_tribe(palm_tribe = "Areceae")
## [1] 8
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()
nrow_num <- palm %>%
# filter rows that meet a condition
filter(fruit_shape == "ovoid") %>%
# Count rows
nrow()
nrow_num
## [1] 813
count_fruit_by_shape <- function(.data, fruit_shape) {
# body
nrow_num <- .data %>%
# filter rows that meet a condition
filter(fruit_shape == "ovoid") %>%
# count rows
nrow()
# return the new variable
return(nrow_num)
}
palm %>% .[1:15, "fruit_shape"] %>% count_fruit_by_shape(fruit_shape = "ovoid")
## [1] 6