Import your data

data(flights)
flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
attendance <- read_excel("../00_data/nfl_attendance.xlsx")
attendance %>% skimr::skim()
Data summary
Name Piped data
Number of rows 10846
Number of columns 8
_______________________
Column type frequency:
character 3
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
team 0 1 5 13 0 32 0
team_name 0 1 4 10 0 32 0
weekly_attendance 0 1 2 6 0 4074 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 2009.53 5.75 2000 2005 2010 2015 2019 ▇▇▇▇▇
total 0 1 1080910.03 72876.97 760644 1040509 1081090 1123230 1322087 ▁▁▇▆▁
home 0 1 540455.01 66774.65 202687 504360 543185 578342 741775 ▁▁▅▇▁
away 0 1 540455.01 25509.33 450295 524974 541757 557741 601655 ▁▂▇▇▂
week 0 1 9.00 4.90 1 5 9 13 17 ▇▆▆▆▇
standings <- read_excel("../00_data/nfl_standings.xlsx")
standings %>% skimr::skim()
Data summary
Name Piped data
Number of rows 638
Number of columns 15
_______________________
Column type frequency:
character 4
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
team 0 1 5 13 0 32 0
team_name 0 1 4 10 0 32 0
playoffs 0 1 8 11 0 2 0
sb_winner 0 1 12 13 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 2009.53 5.76 2000.0 2005.00 2010.0 2014.75 2019.0 ▇▇▇▇▇
wins 0 1 7.98 3.08 0.0 6.00 8.0 10.00 16.0 ▂▆▇▆▂
loss 0 1 7.98 3.08 0.0 6.00 8.0 10.00 16.0 ▂▆▇▆▂
points_for 0 1 350.28 71.40 161.0 299.00 348.0 396.00 606.0 ▂▇▇▂▁
points_against 0 1 350.28 59.55 165.0 310.00 347.0 391.50 517.0 ▁▃▇▆▁
points_differential 0 1 0.00 101.09 -261.0 -75.00 1.5 72.75 315.0 ▂▆▇▅▁
margin_of_victory 0 1 0.00 6.32 -16.3 -4.70 0.1 4.57 19.7 ▂▆▇▅▁
strength_of_schedule 0 1 0.00 1.63 -4.6 -1.10 0.0 1.20 4.3 ▁▅▇▅▁
simple_rating 0 1 0.00 6.20 -17.4 -4.47 0.0 4.50 20.1 ▁▆▇▅▁
offensive_ranking 0 1 0.00 4.34 -11.7 -3.18 0.0 2.70 15.9 ▁▇▇▂▁
defensive_ranking 0 1 0.00 3.57 -9.8 -2.40 0.1 2.50 9.8 ▁▅▇▅▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {

    # Body
    ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()    
    # Return new variable
    return(ncol_num)
        
}

flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {

    # If statement for type of variables
    if(type_data == "numeric") {
        # Body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()        
    } else if (type_data == "character") {
        # Body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()    
            
    }
    
 
    # Return new variable
    return(ncol_type)
        
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10,1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 58665

Turn them into a function

count_num_flights_by_carrier <-  function(.data, carrier_name) {
    
    # Body  
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
        
    # Return the new variable
    return(nrow_num)
}

flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "UA")
## [1] 3
flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrows_num <- standings %>%
    
    # filter rows that meet a condition
    filter(team_name == "Patriots") %>%
    
    # Count rows
    nrow()

nrows_num
## [1] 20

Turn them into a function

count_num_seasons_by_team <- function(.data, team_name_txt) {
    
    # Body
    nrows_num <- .data %>%
        
        # filter rows that meet a condition
        filter(team_name == team_name_txt) %>%
        
        # Count rows
        nrow()    
    
    # Return new variable
    return(nrows_num)
}

standings %>% count_num_seasons_by_team(team_name_txt = "Patriots")
## [1] 20
standings %>% count_num_seasons_by_team(team_name_txt = "Texans")
## [1] 18
# Since the Texans were made in 2002, they only have 18 seasons as opposed to the Patriot's 20