Import your data

data <- read.csv("../00_data/myData.csv")

data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 691
Number of columns 22
_______________________
Column type frequency:
character 8
numeric 14
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
sort_name 0 1.00 1 34 0 391 0
clean_name 0 1.00 1 34 0 386 0
album 0 1.00 1 69 0 685 0
genre 164 0.76 5 35 0 16 0
type 0 1.00 4 13 0 5 0
spotify_url 36 0.95 22 36 0 655 0
artist_gender 5 0.99 4 11 0 3 0
album_id 0 1.00 6 22 0 691 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
X 0 1.00 346.00 199.62 1 173.50 346.0 518.50 691 ▇▇▇▇▇
rank_2003 191 0.72 250.50 144.49 1 125.75 250.5 375.25 500 ▇▇▇▇▇
rank_2012 191 0.72 250.50 144.48 1 125.75 250.5 375.25 500 ▇▇▇▇▇
rank_2020 191 0.72 250.50 144.48 1 125.75 250.5 375.25 500 ▇▇▇▇▇
differential 0 1.00 -12.32 199.04 -501 -137.50 -8.0 106.00 484 ▂▅▇▃▂
release_year 0 1.00 1982.87 14.55 1955 1971.00 1979.0 1994.00 2019 ▂▇▃▃▂
weeks_on_billboard 119 0.83 64.27 75.14 1 20.75 44.5 81.00 741 ▇▁▁▁▁
peak_billboard_position 0 1.00 61.19 77.16 1 2.00 17.0 111.50 201 ▇▁▁▁▂
spotify_popularity 37 0.95 55.81 14.95 10 46.00 57.0 68.00 91 ▁▃▇▇▂
artist_member_count 5 0.99 2.75 2.02 1 1.00 2.0 4.00 12 ▇▅▁▁▁
artist_birth_year_sum 5 0.99 5363.21 3947.13 1910 1948.00 3896.0 7845.00 23368 ▇▅▁▁▁
debut_album_release_year 5 0.99 1976.87 14.96 1934 1966.25 1973.0 1989.00 2019 ▁▇▇▅▂
ave_age_at_top_500 5 0.99 29.61 9.35 17 24.04 27.0 31.00 88 ▇▂▁▁▁
years_between 5 0.99 5.93 8.42 0 1.00 3.0 7.00 54 ▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
     ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
     
     # return the new variable
     return(ncol_num)
        
}

flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variables
        if(type_data == "numeric") {
             # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    
        } else if(type_data == "character") {
               # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()
          }
    
   
     # return the new variable
     return(ncol_type)
        
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "character")
## [1] 4
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()    
    
    # return the new variable
    return(nrow_num)
    
    
}

flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "UA")
## [1] 3

Import your data

data <- read.csv("../00_data/myData.csv")

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(type == "Studio") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 608

Turn them into a function

count_num_studio_by_type <- function(data, type) {
    nrow_num <- data %>%
        filter(type == studio) %>%
    nrow()  
    
    return(nrow_num)
}