Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
myData <- read.csv("../00_data/boardgames_details.csv")
myData %>% skimr::skim()
Data summary
Name Piped data
Number of rows 21631
Number of columns 23
_______________________
Column type frequency:
character 10
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
primary 0 1.00 1 107 0 21236 0
description 1 1.00 49 16144 0 21615 0
boardgamecategory 283 0.99 8 216 0 6730 0
boardgamemechanic 1590 0.93 8 478 0 8291 0
boardgamefamily 3761 0.83 13 2768 0 11285 0
boardgameexpansion 16125 0.25 7 18150 0 5264 0
boardgameimplementation 16769 0.22 6 890 0 4247 0
boardgamedesigner 596 0.97 7 332 0 9136 0
boardgameartist 5907 0.73 6 8408 0 9080 0
boardgamepublisher 1 1.00 6 3744 0 11265 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
num 0 1 10815.00 6244.48 0 5407.5 10815 16222.5 21630 ▇▇▇▇▇
id 0 1 118133.09 105310.42 1 12280.5 105187 207013.0 350992 ▇▂▃▃▂
yearpublished 0 1 1986.09 210.04 -3500 2001.0 2011 2017.0 2023 ▁▁▁▁▇
minplayers 0 1 2.01 0.69 0 2.0 2 2.0 10 ▇▁▁▁▁
maxplayers 0 1 5.71 15.10 0 4.0 4 6.0 999 ▇▁▁▁▁
playingtime 0 1 90.51 534.83 0 25.0 45 90.0 60000 ▇▁▁▁▁
minplaytime 0 1 63.65 447.21 0 20.0 30 60.0 60000 ▇▁▁▁▁
maxplaytime 0 1 90.51 534.83 0 25.0 45 90.0 60000 ▇▁▁▁▁
minage 0 1 9.61 3.64 0 8.0 10 12.0 25 ▂▇▆▁▁
owned 0 1 1487.92 5395.08 0 150.0 322 903.5 168364 ▇▁▁▁▁
trading 0 1 43.59 102.41 0 5.0 13 38.0 2508 ▇▁▁▁▁
wanting 0 1 42.03 117.94 0 3.0 9 29.0 2011 ▇▁▁▁▁
wishing 0 1 233.66 800.66 0 14.0 39 131.0 19325 ▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_type <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_type
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
    ncol_type <- .data %>%
    
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    # return the new variable
    return(ncol_type)
    
}

flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10,-1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variables
    if(type_data == "numeric") {
         # body
    ncol_type <- .data %>%
    
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    } else if(type_data == "character") {
         # body
    ncol_type <- .data %>%
    
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()
    }
   
    # return the new variable
    return(ncol_type)
    
}
flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10,1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data,carrier_name) {
    
    # body
    nrow_num <- .data %>%
    
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- myData %>%
    
    # filter rows that meet a condition
    filter(boardgamecategory == "['Medical']") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 8

Turn them into a function

count_num_boardgames_by_category <- function(.data, game_category) {
    
    # body
    nrow_num <- .data %>%
    
        # filter rows that meet a condition
        filter(boardgamecategory == game_category) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

myData %>% .["boardgamecategory"] %>% count_num_boardgames_by_category(game_category = "['Medical']")
## [1] 8