Import your data

MyData <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-11-26/cbp_resp.csv')
## Rows: 68815 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): month_grouping, month_abbv, component, land_border_region, area_of...
## dbl  (2): fiscal_year, encounter_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MyData %>% skimr::skim()
Data summary
Name Piped data
Number of rows 68815
Number of columns 12
_______________________
Column type frequency:
character 10
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
month_grouping 0 1 4 4 0 1 0
month_abbv 0 1 3 3 0 12 0
component 0 1 18 26 0 2 0
land_border_region 0 1 5 21 0 3 0
area_of_responsibility 0 1 11 26 0 41 0
aor_abbv 0 1 3 13 0 41 0
demographic 0 1 4 18 0 4 0
citizenship 0 1 4 26 0 22 0
title_of_authority 0 1 7 8 0 2 0
encounter_type 0 1 10 13 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
fiscal_year 0 1 2022.20 1.35 2020 2021 2022 2023 2024 ▅▆▇▇▇
encounter_count 0 1 166.71 727.46 0 2 9 57 25457 ▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- MyData %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 2

Turn them into a function

count_ncol_numeric <- function(.data) {
    
        ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
        
        # return new variable 
        return(ncol_num)
}

MyData %>% count_ncol_numeric()
## [1] 2
MyData %>% .[1:10, 1:10] %>% count_ncol_numeric()
## [1] 1

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    
    # if statement for type of variables 
        if(type_data == "numeric") {
            # body
           ncol_type <- .data %>%
                
                # Select a type of variables
                select(where(is.numeric)) %>%
                
                # Count columns
                ncol()
        } else if (type_data == "character") {
        
            # body
            ncol_type <- .data %>%
            
                # Select a type of variables
                select(where(is.character)) %>% 
                    
                # Count columns
                    ncol()
        
        }
            
        # return new variable 
        return(ncol_type)
    
}

MyData %>% count_ncol_type()
## [1] 2
MyData %>% count_ncol_type(type_data = "numeric")
## [1] 2
MyData %>% count_ncol_type(type_data = "character")
## [1] 10
MyData %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 4

Example 2: count rows

code snippets

nrow_num <- MyData %>%
    
    # filter rows that meet a condition
    filter(citizenship == "CANADA") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 2527

Turn them into a function

count_type_citizenship <- function(.data, citizenship_type) {
    
    # body
        nrow_num <- MyData %>%
        
        # filter rows that meet a condition
        filter(citizenship == "CANADA") %>%
        
        # Count rows
        nrow()
        
    # return new variable 
     return(nrow_num)  
    
}

MyData %>% .[1:10, "citzenship"] %>% 
    count_type_citizenship(citizenship_type = "CANADA")
## [1] 2527

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

Turn them into a function