Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
ufo_sightings <- read_csv("../00_data/ufo_sightings.csv")
## Rows: 60632 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Location.City, Location.State, Location.Country, Data.Shape, Data....
## dbl (11): Data.Encounter duration, Location.Coordinates.Latitude, Location.C...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ufo_sightings %>% skimr::skim()
Data summary
Name Piped data
Number of rows 60632
Number of columns 16
_______________________
Column type frequency:
character 5
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Location.City 0 1 3 23 0 9149 0
Location.State 0 1 2 2 0 51 0
Location.Country 0 1 2 2 0 1 0
Data.Shape 0 1 3 9 0 28 0
Data.Description excerpt 0 1 1 246 0 60410 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Data.Encounter duration 0 1 5410.13 414386.67 0.01 30.00 180.00 600.00 66276000.00 ▇▁▁▁▁
Location.Coordinates.Latitude 0 1 38.31 5.55 19.43 34.09 38.90 41.92 70.64 ▁▇▆▁▁
Location.Coordinates.Longitude 0 1 -95.58 18.03 -170.48 -114.34 -89.91 -81.03 -66.98 ▁▁▅▅▇
Dates.Sighted.Year 0 1 2004.45 10.18 1910.00 2002.00 2007.00 2011.00 2014.00 ▁▁▁▁▇
Dates.Sighted.Month 0 1 6.87 3.25 1.00 4.00 7.00 10.00 12.00 ▆▅▇▆▇
Date.Sighted.Day 0 1 15.03 8.92 1.00 7.00 15.00 22.00 31.00 ▇▆▇▆▅
Dates.Sighted.Hour 0 1 15.81 7.54 0.00 11.00 19.00 21.00 23.00 ▂▁▁▂▇
Dates.Sighted.Minute 0 1 17.72 17.92 0.00 0.00 15.00 30.00 59.00 ▇▂▅▂▁
Dates.Documented.Year 0 1 2007.40 4.48 1998.00 2004.00 2008.00 2012.00 2014.00 ▃▃▅▅▇
Dates.Documented.Month 0 1 6.71 3.49 1.00 4.00 7.00 10.00 12.00 ▇▅▅▅▇
Dates.Documented.Day 0 1 15.23 8.79 1.00 8.00 14.00 22.00 31.00 ▇▇▆▅▆

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {

    # Body of function
    ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
    
    # Return new variable
    return(ncol_num)
}

flights %>% count_ncol_numeric()
## [1] 14
    # To see first 10 rows and first 5 columns
flights %>% .[1:10, 1:5] %>% count_ncol_numeric()
## [1] 5
    # To remove columns
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {

    # If statement for type of variables
    if(type_data == "numeric") {
        
         # Body of function
        ncol_type <- .data %>%
            
            # Select a type of variable
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()

    } else if(type_data == "character") {
         # Body of function
            ncol_type <- .data %>%
                
                # Select a type of variable
                select(where(is.character)) %>%
                
                # Count columns
                ncol()

    }

   
    
    # Return new variable
    return(ncol_type)
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    # Body of the function
     nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()   
    
    # Return the new variable
     return(nrow_num)
    
}

flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "AA")
## [1] 2

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- ufo_sightings %>%
    
    # filter rows that meet a condition
    filter(Dates.Sighted.Year == "1910") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 1

Turn them into a function

count_num_year_of_sighting <- function(.data, year_sighted) {
    
    # Body of the function
     nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(Dates.Sighted.Year == year_sighted) %>%
        
        # Count rows
        nrow()   
    
    # Return the new variable
     return(nrow_num)
    
}

ufo_sightings %>% count_num_year_of_sighting(year_sighted = "2014")
## [1] 1894
count_num_year_of_sighting(ufo_sightings, year_sighted = "2014")
## [1] 1894
ufo_sightings %>% .[1:10, "Dates.Sighted.Year"] %>% count_num_year_of_sighting(year_sighted = "2014")
## [1] 1
ufo_sightings %>% .[1:500, "Dates.Sighted.Year"] %>% count_num_year_of_sighting(year_sighted = "2014")
## [1] 34
ufo_sightings %>% .[1:30316, "Dates.Sighted.Year"] %>% count_num_year_of_sighting(year_sighted = "2014")
## [1] 1018