Import data

# excel file
data <- read_excel("../00_data/my.Data.xlsx")
data
## # A tibble: 691 × 22
##     ...1 sort_name   clean_name album rank_2003 rank_2012 rank_2020 differential
##    <dbl> <chr>       <chr>      <chr> <chr>     <chr>     <chr>            <dbl>
##  1     1 Sinatra, F… Frank Sin… "In … 100       101       282               -182
##  2     2 Diddley, Bo Bo Diddley "Bo … 214       216       455               -241
##  3     3 Presley, E… Elvis Pre… "Elv… 55        56        332               -277
##  4     4 Sinatra, F… Frank Sin… "Son… 306       308       NA                -195
##  5     5 Little Ric… Little Ri… "Her… 50        50        227               -177
##  6     6 Beyonce     Beyonce    "Lem… NA        NA        32                 469
##  7     7 Winehouse,… Amy Wineh… "Bac… NA        451       33                 468
##  8     8 Crickets    Buddy Hol… "The… 421       420       NA                 -80
##  9     9 Bush, Kate  Kate Bush  "Hou… NA        NA        68                 433
## 10    10 Davis, Mil… Miles Dav… "Kin… 12        12        31                 -19
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## #   weeks_on_billboard <chr>, peak_billboard_position <dbl>,
## #   spotify_popularity <chr>, spotify_url <chr>, artist_member_count <chr>,
## #   artist_gender <chr>, artist_birth_year_sum <chr>,
## #   debut_album_release_year <chr>, ave_age_at_top_500 <chr>,
## #   years_between <chr>, album_id <chr>

Filter rows

filter(data, album == "beetles")
## # A tibble: 0 × 22
## # ℹ 22 variables: ...1 <dbl>, sort_name <chr>, clean_name <chr>, album <chr>,
## #   rank_2003 <chr>, rank_2012 <chr>, rank_2020 <chr>, differential <dbl>,
## #   release_year <dbl>, genre <chr>, type <chr>, weeks_on_billboard <chr>,
## #   peak_billboard_position <dbl>, spotify_popularity <chr>, spotify_url <chr>,
## #   artist_member_count <chr>, artist_gender <chr>,
## #   artist_birth_year_sum <chr>, debut_album_release_year <chr>,
## #   ave_age_at_top_500 <chr>, years_between <chr>, album_id <chr>

Arrange rows

arrange(data, desc (rank_2020))
## # A tibble: 691 × 22
##     ...1 sort_name   clean_name album rank_2003 rank_2012 rank_2020 differential
##    <dbl> <chr>       <chr>      <chr> <chr>     <chr>     <chr>            <dbl>
##  1     4 "Sinatra, … "Frank Si… "Son… 306       308       NA                -195
##  2     8 "Crickets"  "Buddy Ho… "The… 421       420       NA                 -80
##  3    19 "Charles, … "Ray Char… "The… 263       265       NA                -238
##  4    24 "Waters, M… "Muddy Wa… "Mud… 348       348       NA                -153
##  5    25 "Davis, Mi… "Miles Da… "Ske… 356       358       NA                -145
##  6    30 "Bland, Bo… "Bobby \"… "Two… 215       217       NA                -286
##  7    37 "Howlin' W… "Howlin' … "How… 223       238       NA                -278
##  8    44 "Beatles"   "The Beat… "Ple… 39        39        NA                -462
##  9    45 "Spector, … "Phil Spe… "A C… 142       142       NA                -359
## 10    47 "Beatles"   "The Beat… "Wit… 420       NA        NA                 -81
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## #   weeks_on_billboard <chr>, peak_billboard_position <dbl>,
## #   spotify_popularity <chr>, spotify_url <chr>, artist_member_count <chr>,
## #   artist_gender <chr>, artist_birth_year_sum <chr>,
## #   debut_album_release_year <chr>, ave_age_at_top_500 <chr>,
## #   years_between <chr>, album_id <chr>

Select columns

selected_data <- data %>%
  select(album, rank_2020, genre)

Add columns

data_with_decade <- data %>%
  mutate(decade = floor(release_year / 10) * 10)