Import your data

data <- read_excel("../00_data/my.Data.xlsx")
## New names:
## • `` -> `...1`
data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 691
Number of columns 22
_______________________
Column type frequency:
character 18
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
sort_name 0 1 1 34 0 391 0
clean_name 0 1 1 34 0 386 0
album 0 1 1 69 0 685 0
rank_2003 0 1 1 3 0 499 0
rank_2012 0 1 1 3 0 501 0
rank_2020 0 1 1 3 0 501 0
genre 0 1 2 35 0 17 0
type 0 1 4 13 0 5 0
weeks_on_billboard 0 1 1 3 0 169 0
spotify_popularity 0 1 2 2 0 72 0
spotify_url 0 1 2 36 0 656 0
artist_member_count 0 1 1 2 0 13 0
artist_gender 0 1 2 11 0 4 0
artist_birth_year_sum 0 1 2 5 0 271 0
debut_album_release_year 0 1 2 4 0 70 0
ave_age_at_top_500 0 1 2 18 0 173 0
years_between 0 1 1 2 0 46 0
album_id 0 1 6 22 0 691 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1 346.00 199.62 1 173.5 346 518.5 691 ▇▇▇▇▇
differential 0 1 -12.32 199.04 -501 -137.5 -8 106.0 484 ▂▅▇▃▂
release_year 0 1 1982.87 14.55 1955 1971.0 1979 1994.0 2019 ▂▇▃▃▂
peak_billboard_position 0 1 61.19 77.16 1 2.0 17 111.5 201 ▇▁▁▁▂

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- data %>%
  # Select numeric variables
  select(where(is.numeric)) %>%
  # Count columns
  ncol()

ncol_num
## [1] 4

Turn them into a function

count_ncol_numeric <- function(.data) {
  # Count numeric columns
  ncol_num <- .data %>%
    select(where(is.numeric)) %>%
    ncol()
  
  # Return the result
  return(ncol_num)
}

# Test the function
data %>% count_ncol_numeric()
## [1] 4
data %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 0

Adding arguments for details of operation

count_ncol_type <- function(data, type_data = "numeric") {
  if (type_data == "numeric") {
    # Count numeric columns
    ncol_type <- data %>%
      select(where(is.numeric)) %>%
      ncol()
  } else if (type_data == "character") {
    # Count character columns
    ncol_type <- data %>%
      select(where(is.character)) %>%
      ncol()
  }
  
  # Return the result
  return(ncol_type)
}

# Test the function
data %>% count_ncol_type()
## [1] 4
data %>% count_ncol_type(type_data = "character")
## [1] 18
data %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 4

Example 2: count rows

code snippets

nrow_num <- data %>%
  # Filter rows meeting a condition
  filter(clean_name == "Elvis Presley") %>%
  # Count rows
  nrow()

nrow_num
## [1] 3

Turn them into a function

count_num_view_count_by_artist <- function(.data, artist_name) {
  # Count rows for a specific artist
  nrow_num <- .data %>%
    filter(clean_name == artist_name) %>%
    nrow()
  
  # Return the result
  return(nrow_num)
}

# Test the function
data %>% .[1:10, "clean_name"] %>% count_num_view_count_by_artist(artist_name = "Elvis Presley")
## [1] 1

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- data %>%
  # Filter rows meeting a condition
  filter(genre == "Rock n' Roll/Rhythm & Blues") %>%
  # Count rows
  nrow()

nrow_num
## [1] 10

Turn them into a function

count_num_by_genre <- function(.data, genre_name) {
  # Count rows for a specific genre
  nrow_num <- .data %>%
    filter(genre == genre_name) %>%
    nrow()
  
  # Return the result
  return(nrow_num)
}

# Test the function
data %>% count_num_by_genre(genre_name = "Rock n' Roll/Rhythm & Blues")
## [1] 10