Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

# Create a function to count # of a type of columns
count_numeric_var <- function(.data) {
    
    # body
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    # return value
    return(ncol_num)
    
}
flights %>% count_numeric_var()
## [1] 14
flights %>% .[, -1:-13] %>% count_numeric_var()
## [1] 4

Adding arguments for details of operation

# Create a function to count # of a type of columns
count_type_of_var <- function(.data, type = "numeric") {
    
    # if statement for type of variables
    if(type == "numeric") {
        
        # body
        ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
        
    } else if(type == "character") {
        
        # body
        ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.character)) %>%
        
        # Count columns
        ncol()        
        
    }
    
    # return value
    return(ncol_num)
    
}
flights %>% count_type_of_var(type = "character")
## [1] 4
flights %>% .[, -1:-13] %>% count_type_of_var(type = "character")
## [1] 1

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 58665

Turn them into a function

# Create a function to count # of a type of columns
count_n_flights_by_carrier <- function(.data, carrier_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
    
    nrow_num
    
    # return value
    return(nrow_num)
    
}
flights %>% count_n_flights_by_carrier(carrier_name = "UA")
## [1] 58665
flights %>% .[1:10, ] %>% count_n_flights_by_carrier(carrier_name = "UA")
## [1] 3

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

library(troopdata)
troopdata <- tibble(troopdata)
troopdata
## # A tibble: 14,435 × 10
##    countryname   ccode iso3c  year  troops  army  navy air_force marine…¹ region
##    <chr>         <int> <chr> <int>   <int> <int> <int>     <int>    <int> <chr> 
##  1 United States     2 USA    1950  941231    NA    NA        NA       NA North…
##  2 United States     2 USA    1951 1645490    NA    NA        NA       NA North…
##  3 United States     2 USA    1952 2338379    NA    NA        NA       NA North…
##  4 United States     2 USA    1953 2017164    NA    NA        NA       NA North…
##  5 United States     2 USA    1954 2159404    NA    NA        NA       NA North…
##  6 United States     2 USA    1955 2003012    NA    NA        NA       NA North…
##  7 United States     2 USA    1956 1913912    NA    NA        NA       NA North…
##  8 United States     2 USA    1957 1830532    NA    NA        NA       NA North…
##  9 United States     2 USA    1958 1786761    NA    NA        NA       NA North…
## 10 United States     2 USA    1959 1556335    NA    NA        NA       NA North…
## # … with 14,425 more rows, and abbreviated variable name ¹​marine_corps
troopdata %>% count(year)
## # A tibble: 72 × 2
##     year     n
##    <int> <int>
##  1  1950   186
##  2  1951   186
##  3  1952   186
##  4  1953   186
##  5  1954   187
##  6  1955   187
##  7  1956   187
##  8  1957   187
##  9  1958   187
## 10  1959   187
## # … with 62 more rows
troopdata %>% count(countryname)
## # A tibble: 239 × 2
##    countryname           n
##    <chr>             <int>
##  1 Aden                 56
##  2 Afghanistan          72
##  3 Afloat                2
##  4 Akrotiri              6
##  5 Albania              72
##  6 Algeria              72
##  7 Andorra              29
##  8 Angola               72
##  9 Antigua              58
## 10 Antigua & Barbuda    41
## # … with 229 more rows
troopdata %>% count(region)
## # A tibble: 9 × 2
##   region                         n
##   <chr>                      <int>
## 1 Afloat                         2
## 2 East Asia & Pacific         2269
## 3 Europe & Central Asia       3806
## 4 Latin America & Caribbean   2581
## 5 Middle East & North Africa  1524
## 6 North America                215
## 7 South Asia                   655
## 8 Sub-Saharan Africa          3369
## 9 <NA>                          14
# number of marine corps in North America in 2021
troopdata %>%
    filter(region == "North America", year == 2021) %>%
    summarise(n_marines = sum(marine_corps))
## # A tibble: 1 × 1
##   n_marines
##       <int>
## 1    147863
troopdata %>%
    filter(region == "East Asia & Pacific", year == 1990) %>%
    summarise(n_marines = sum(marine_corps))
## # A tibble: 1 × 1
##   n_marines
##       <int>
## 1        NA

Turn them into a function