MyData <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-11-26/cbp_resp.csv')
## Rows: 68815 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): month_grouping, month_abbv, component, land_border_region, area_of...
## dbl (2): fiscal_year, encounter_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MyData %>% skimr::skim()
| Name | Piped data |
| Number of rows | 68815 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| character | 10 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| month_grouping | 0 | 1 | 4 | 4 | 0 | 1 | 0 |
| month_abbv | 0 | 1 | 3 | 3 | 0 | 12 | 0 |
| component | 0 | 1 | 18 | 26 | 0 | 2 | 0 |
| land_border_region | 0 | 1 | 5 | 21 | 0 | 3 | 0 |
| area_of_responsibility | 0 | 1 | 11 | 26 | 0 | 41 | 0 |
| aor_abbv | 0 | 1 | 3 | 13 | 0 | 41 | 0 |
| demographic | 0 | 1 | 4 | 18 | 0 | 4 | 0 |
| citizenship | 0 | 1 | 4 | 26 | 0 | 22 | 0 |
| title_of_authority | 0 | 1 | 7 | 8 | 0 | 2 | 0 |
| encounter_type | 0 | 1 | 10 | 13 | 0 | 3 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| fiscal_year | 0 | 1 | 2022.20 | 1.35 | 2020 | 2021 | 2022 | 2023 | 2024 | ▅▆▇▇▇ |
| encounter_count | 0 | 1 | 166.71 | 727.46 | 0 | 2 | 9 | 57 | 25457 | ▇▁▁▁▁ |
ncol_num <- MyData %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 2
count_ncol_numeric <- function(.data) {
ncol_num <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
# return new variable
return(ncol_num)
}
MyData %>% count_ncol_numeric()
## [1] 2
MyData %>% .[1:10, 1:10] %>% count_ncol_numeric()
## [1] 1
count_ncol_type <- function(.data, type_data = "numeric") {
# if statement for type of variables
if(type_data == "numeric") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
} else if (type_data == "character") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.character)) %>%
# Count columns
ncol()
}
# return new variable
return(ncol_type)
}
MyData %>% count_ncol_type()
## [1] 2
MyData %>% count_ncol_type(type_data = "numeric")
## [1] 2
MyData %>% count_ncol_type(type_data = "character")
## [1] 10
MyData %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 4
nrow_num <- MyData %>%
# filter rows that meet a condition
filter(citizenship == "CANADA") %>%
# Count rows
nrow()
nrow_num
## [1] 2527
count_type_citizenship <- function(.data, citizenship_type) {
# body
nrow_num <- MyData %>%
# filter rows that meet a condition
filter(citizenship == "CANADA") %>%
# Count rows
nrow()
# return new variable
return(nrow_num)
}
MyData %>% .[1:10, "citzenship"] %>%
count_type_citizenship(citizenship_type = "CANADA")
## [1] 2527
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()