Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
# excel file
Movies <- read_excel("../00_data/MyData.xlsx")
## New names:
## • `` -> `...1`
Movies
## # A tibble: 3,401 × 9
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1     1 6/22/2007    Evan Alm…         175000000      100289690       174131329
##  2     2 7/28/1995    Waterwor…         175000000       88246220       264246220
##  3     3 5/12/2017    King Art…         175000000       39175066       139950708
##  4     4 12/25/2013   47 Ronin          175000000       38362475       151716815
##  5     5 6/22/2018    Jurassic…         170000000      416769345      1304866322
##  6     6 8/1/2014     Guardian…         170000000      333172112       771051335
##  7     7 5/7/2010     Iron Man…         170000000      312433331       621156389
##  8     8 4/4/2014     Captain …         170000000      259746958       714401889
##  9     9 7/11/2014    Dawn of …         170000000      208545589       710644566
## 10    10 11/10/2004   The Pola…         170000000      186493587       310634169
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>
Movies %>% skimr::skim()
Data summary
Name Piped data
Number of rows 3401
Number of columns 9
_______________________
Column type frequency:
character 5
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
release_date 0 1 8 10 0 1768 0
movie 0 1 1 35 0 3400 0
distributor 0 1 2 22 0 202 0
mpaa_rating 0 1 1 5 0 5 0
genre 0 1 5 9 0 5 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 1701 981.93 1 851 1701 2551 3401 ▇▇▇▇▇
production_budget 0 1 33284743 34892390.59 250000 9000000 20000000 45000000 175000000 ▇▂▁▁▁
domestic_gross 0 1 45421793 58825660.56 0 6118683 25533818 60323786 474544677 ▇▁▁▁▁
worldwide_gross 0 1 94115117 140918241.82 0 10618813 40159017 117615211 1304866322 ▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
     ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()   
    
     # return the new variable
     return(ncol_num)
}


flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variables
    
    if (type_data == "numeric") {
         # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()   
    } else if (type_data == "character") {
             # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()   
        
        
    }
    
   
    
     # return the new variable
     return(ncol_type)
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    # body
        nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

flights %>% head(10) %>% count_num_flights_by_carrier(carrier_name = "UA")
## [1] 3

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- Movies %>%
    
    # filter rows that meet a condition
    filter(genre == "Action") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 573

Turn them into a function

count_num_Movies_by_genre <- function(Movies, genre) {
    
    # body
        nrow_num <- Movies %>%
        
        # filter rows that meet a condition
        filter(genre == genre) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

Movies %>% .[1:10, 1:5] %>% count_num_Movies_by_genre(genre = "Action")
## [1] 10