data <- read_excel("../00_data/my.Data.xlsx")
## New names:
## • `` -> `...1`
data %>% skimr::skim()
| Name | Piped data |
| Number of rows | 691 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 18 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| sort_name | 0 | 1 | 1 | 34 | 0 | 391 | 0 |
| clean_name | 0 | 1 | 1 | 34 | 0 | 386 | 0 |
| album | 0 | 1 | 1 | 69 | 0 | 685 | 0 |
| rank_2003 | 0 | 1 | 1 | 3 | 0 | 499 | 0 |
| rank_2012 | 0 | 1 | 1 | 3 | 0 | 501 | 0 |
| rank_2020 | 0 | 1 | 1 | 3 | 0 | 501 | 0 |
| genre | 0 | 1 | 2 | 35 | 0 | 17 | 0 |
| type | 0 | 1 | 4 | 13 | 0 | 5 | 0 |
| weeks_on_billboard | 0 | 1 | 1 | 3 | 0 | 169 | 0 |
| spotify_popularity | 0 | 1 | 2 | 2 | 0 | 72 | 0 |
| spotify_url | 0 | 1 | 2 | 36 | 0 | 656 | 0 |
| artist_member_count | 0 | 1 | 1 | 2 | 0 | 13 | 0 |
| artist_gender | 0 | 1 | 2 | 11 | 0 | 4 | 0 |
| artist_birth_year_sum | 0 | 1 | 2 | 5 | 0 | 271 | 0 |
| debut_album_release_year | 0 | 1 | 2 | 4 | 0 | 70 | 0 |
| ave_age_at_top_500 | 0 | 1 | 2 | 18 | 0 | 173 | 0 |
| years_between | 0 | 1 | 1 | 2 | 0 | 46 | 0 |
| album_id | 0 | 1 | 6 | 22 | 0 | 691 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| …1 | 0 | 1 | 346.00 | 199.62 | 1 | 173.5 | 346 | 518.5 | 691 | ▇▇▇▇▇ |
| differential | 0 | 1 | -12.32 | 199.04 | -501 | -137.5 | -8 | 106.0 | 484 | ▂▅▇▃▂ |
| release_year | 0 | 1 | 1982.87 | 14.55 | 1955 | 1971.0 | 1979 | 1994.0 | 2019 | ▂▇▃▃▂ |
| peak_billboard_position | 0 | 1 | 61.19 | 77.16 | 1 | 2.0 | 17 | 111.5 | 201 | ▇▁▁▁▂ |
ncol_num <- data %>%
# Select numeric variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 4
count_ncol_numeric <- function(.data) {
# Count numeric columns
ncol_num <- .data %>%
select(where(is.numeric)) %>%
ncol()
# Return the result
return(ncol_num)
}
# Test the function
data %>% count_ncol_numeric()
## [1] 4
data %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 0
count_ncol_type <- function(data, type_data = "numeric") {
if (type_data == "numeric") {
# Count numeric columns
ncol_type <- data %>%
select(where(is.numeric)) %>%
ncol()
} else if (type_data == "character") {
# Count character columns
ncol_type <- data %>%
select(where(is.character)) %>%
ncol()
}
# Return the result
return(ncol_type)
}
# Test the function
data %>% count_ncol_type()
## [1] 4
data %>% count_ncol_type(type_data = "character")
## [1] 18
data %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")
## [1] 4
nrow_num <- data %>%
# Filter rows meeting a condition
filter(clean_name == "Elvis Presley") %>%
# Count rows
nrow()
nrow_num
## [1] 3
count_num_view_count_by_artist <- function(.data, artist_name) {
# Count rows for a specific artist
nrow_num <- .data %>%
filter(clean_name == artist_name) %>%
nrow()
# Return the result
return(nrow_num)
}
# Test the function
data %>% .[1:10, "clean_name"] %>% count_num_view_count_by_artist(artist_name = "Elvis Presley")
## [1] 1
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()
nrow_num <- data %>%
# Filter rows meeting a condition
filter(genre == "Rock n' Roll/Rhythm & Blues") %>%
# Count rows
nrow()
nrow_num
## [1] 10
count_num_by_genre <- function(.data, genre_name) {
# Count rows for a specific genre
nrow_num <- .data %>%
filter(genre == genre_name) %>%
nrow()
# Return the result
return(nrow_num)
}
# Test the function
data %>% count_num_by_genre(genre_name = "Rock n' Roll/Rhythm & Blues")
## [1] 10