data(flights)
flights %>% skimr::skim()
| Name | Piped data |
| Number of rows | 336776 |
| Number of columns | 19 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| numeric | 14 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| carrier | 0 | 1.00 | 2 | 2 | 0 | 16 | 0 |
| tailnum | 2512 | 0.99 | 5 | 6 | 0 | 4043 | 0 |
| origin | 0 | 1.00 | 3 | 3 | 0 | 3 | 0 |
| dest | 0 | 1.00 | 3 | 3 | 0 | 105 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| year | 0 | 1.00 | 2013.00 | 0.00 | 2013 | 2013 | 2013 | 2013 | 2013 | ▁▁▇▁▁ |
| month | 0 | 1.00 | 6.55 | 3.41 | 1 | 4 | 7 | 10 | 12 | ▇▆▆▆▇ |
| day | 0 | 1.00 | 15.71 | 8.77 | 1 | 8 | 16 | 23 | 31 | ▇▇▇▇▆ |
| dep_time | 8255 | 0.98 | 1349.11 | 488.28 | 1 | 907 | 1401 | 1744 | 2400 | ▁▇▆▇▃ |
| sched_dep_time | 0 | 1.00 | 1344.25 | 467.34 | 106 | 906 | 1359 | 1729 | 2359 | ▁▇▇▇▃ |
| dep_delay | 8255 | 0.98 | 12.64 | 40.21 | -43 | -5 | -2 | 11 | 1301 | ▇▁▁▁▁ |
| arr_time | 8713 | 0.97 | 1502.05 | 533.26 | 1 | 1104 | 1535 | 1940 | 2400 | ▁▃▇▇▇ |
| sched_arr_time | 0 | 1.00 | 1536.38 | 497.46 | 1 | 1124 | 1556 | 1945 | 2359 | ▁▃▇▇▇ |
| arr_delay | 9430 | 0.97 | 6.90 | 44.63 | -86 | -17 | -5 | 14 | 1272 | ▇▁▁▁▁ |
| flight | 0 | 1.00 | 1971.92 | 1632.47 | 1 | 553 | 1496 | 3465 | 8500 | ▇▃▃▁▁ |
| air_time | 9430 | 0.97 | 150.69 | 93.69 | 20 | 82 | 129 | 192 | 695 | ▇▂▂▁▁ |
| distance | 0 | 1.00 | 1039.91 | 733.23 | 17 | 502 | 872 | 1389 | 4983 | ▇▃▂▁▁ |
| hour | 0 | 1.00 | 13.18 | 4.66 | 1 | 9 | 13 | 17 | 23 | ▁▇▇▇▅ |
| minute | 0 | 1.00 | 26.23 | 19.30 | 0 | 8 | 29 | 44 | 59 | ▇▃▆▃▅ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| time_hour | 0 | 1 | 2013-01-01 05:00:00 | 2013-12-31 23:00:00 | 2013-07-03 10:00:00 | 6936 |
myData <- read.csv("../00_data/boardgames_details.csv")
myData %>% skimr::skim()
| Name | Piped data |
| Number of rows | 21631 |
| Number of columns | 23 |
| _______________________ | |
| Column type frequency: | |
| character | 10 |
| numeric | 13 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| primary | 0 | 1.00 | 1 | 107 | 0 | 21236 | 0 |
| description | 1 | 1.00 | 49 | 16144 | 0 | 21615 | 0 |
| boardgamecategory | 283 | 0.99 | 8 | 216 | 0 | 6730 | 0 |
| boardgamemechanic | 1590 | 0.93 | 8 | 478 | 0 | 8291 | 0 |
| boardgamefamily | 3761 | 0.83 | 13 | 2768 | 0 | 11285 | 0 |
| boardgameexpansion | 16125 | 0.25 | 7 | 18150 | 0 | 5264 | 0 |
| boardgameimplementation | 16769 | 0.22 | 6 | 890 | 0 | 4247 | 0 |
| boardgamedesigner | 596 | 0.97 | 7 | 332 | 0 | 9136 | 0 |
| boardgameartist | 5907 | 0.73 | 6 | 8408 | 0 | 9080 | 0 |
| boardgamepublisher | 1 | 1.00 | 6 | 3744 | 0 | 11265 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| num | 0 | 1 | 10815.00 | 6244.48 | 0 | 5407.5 | 10815 | 16222.5 | 21630 | ▇▇▇▇▇ |
| id | 0 | 1 | 118133.09 | 105310.42 | 1 | 12280.5 | 105187 | 207013.0 | 350992 | ▇▂▃▃▂ |
| yearpublished | 0 | 1 | 1986.09 | 210.04 | -3500 | 2001.0 | 2011 | 2017.0 | 2023 | ▁▁▁▁▇ |
| minplayers | 0 | 1 | 2.01 | 0.69 | 0 | 2.0 | 2 | 2.0 | 10 | ▇▁▁▁▁ |
| maxplayers | 0 | 1 | 5.71 | 15.10 | 0 | 4.0 | 4 | 6.0 | 999 | ▇▁▁▁▁ |
| playingtime | 0 | 1 | 90.51 | 534.83 | 0 | 25.0 | 45 | 90.0 | 60000 | ▇▁▁▁▁ |
| minplaytime | 0 | 1 | 63.65 | 447.21 | 0 | 20.0 | 30 | 60.0 | 60000 | ▇▁▁▁▁ |
| maxplaytime | 0 | 1 | 90.51 | 534.83 | 0 | 25.0 | 45 | 90.0 | 60000 | ▇▁▁▁▁ |
| minage | 0 | 1 | 9.61 | 3.64 | 0 | 8.0 | 10 | 12.0 | 25 | ▂▇▆▁▁ |
| owned | 0 | 1 | 1487.92 | 5395.08 | 0 | 150.0 | 322 | 903.5 | 168364 | ▇▁▁▁▁ |
| trading | 0 | 1 | 43.59 | 102.41 | 0 | 5.0 | 13 | 38.0 | 2508 | ▇▁▁▁▁ |
| wanting | 0 | 1 | 42.03 | 117.94 | 0 | 3.0 | 9 | 29.0 | 2011 | ▇▁▁▁▁ |
| wishing | 0 | 1 | 233.66 | 800.66 | 0 | 14.0 | 39 | 131.0 | 19325 | ▇▁▁▁▁ |
ncol_type <- flights %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_type
## [1] 14
count_ncol_numeric <- function(.data) {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
# return the new variable
return(ncol_type)
}
flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10,-1:-13] %>% count_ncol_numeric()
## [1] 4
count_ncol_type <- function(.data, type_data = "numeric") {
# if statement for type of variables
if(type_data == "numeric") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
} else if(type_data == "character") {
# body
ncol_type <- .data %>%
# Select a type of variables
select(where(is.character)) %>%
# Count columns
ncol()
}
# return the new variable
return(ncol_type)
}
flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10,1:5] %>% count_ncol_type(type_data = "character")
## [1] 0
nrow_num <- flights %>%
# filter rows that meet a condition
filter(carrier == "DL") %>%
# Count rows
nrow()
nrow_num
## [1] 48110
count_num_flights_by_carrier <- function(.data,carrier_name) {
# body
nrow_num <- .data %>%
# filter rows that meet a condition
filter(carrier == carrier_name) %>%
# Count rows
nrow()
# return the new variable
return(nrow_num)
}
flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()
nrow_num <- myData %>%
# filter rows that meet a condition
filter(boardgamecategory == "['Medical']") %>%
# Count rows
nrow()
nrow_num
## [1] 8
count_num_boardgames_by_category <- function(.data, game_category) {
# body
nrow_num <- .data %>%
# filter rows that meet a condition
filter(boardgamecategory == game_category) %>%
# Count rows
nrow()
# return the new variable
return(nrow_num)
}
myData %>% .["boardgamecategory"] %>% count_num_boardgames_by_category(game_category = "['Medical']")
## [1] 8