Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
Mydata <- read_csv("../00_data/tdf_winners.csv")
## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl  (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mydata
## # A tibble: 106 × 19
##    edition start_date winner_name  winner_team distance time_overall time_margin
##      <dbl> <date>     <chr>        <chr>          <dbl>        <dbl>       <dbl>
##  1       1 1903-07-01 Maurice Gar… La Françai…     2428         94.6        2.99
##  2       2 1904-07-02 Henri Cornet Conte           2428         96.1        2.27
##  3       3 1905-07-09 Louis Trous… Peugeot–Wo…     2994         NA         NA   
##  4       4 1906-07-04 René Pottier Peugeot–Wo…     4637         NA         NA   
##  5       5 1907-07-08 Lucien Peti… Peugeot–Wo…     4488         NA         NA   
##  6       6 1908-07-13 Lucien Peti… Peugeot–Wo…     4497         NA         NA   
##  7       7 1909-07-05 François Fa… Alcyon–Dun…     4498         NA         NA   
##  8       8 1910-07-01 Octave Lapi… Alcyon–Dun…     4734         NA         NA   
##  9       9 1911-07-02 Gustave Gar… Alcyon–Dun…     5343         NA         NA   
## 10      10 1912-06-30 Odile Defra… Alcyon–Dun…     5289         NA         NA   
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## #   weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## #   nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    #return new variable
    return(ncol_num)
}

flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement
    if(type_data == "numeric") {
        ncol_type <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    } else if(type_data == "character") {
       ncol_type <- .data %>%
    
    # Select a type of variables
    select(where(is.character)) %>%
    
    # Count columns
    ncol()  
    }
   return(ncol_type)
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 58665

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    nrow_num <- .data %>%
        
        filter(carrier == carrier_name) %>%
        
        nrow()
    
    return(nrow_num)
}
flights %>% count_num_flights_by_carrier(carrier_name = "UA")
## [1] 58665
flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2

Example 3: count rows

code snippets

nrow_num <- Mydata %>%
    
    # filter rows that meet a condition
    filter(winner_name == "Lance Armstrong") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 7

Turn them into a function

count_num_wins_by_perosn <- function(.data, winner_name) {
    
    nrow_num <- .data %>%
        
        filter(winner_name == !!winner_name) %>%
        
        nrow()
    
    return(nrow_num)
}

Mydata %>% count_num_wins_by_perosn(winner_name = "Lance Armstrong")
## [1] 7
Mydata %>% count_num_wins_by_perosn(winner_name = "Philippe Thys")
## [1] 3