Import data
# excel file
data <- read_excel("../00_data/my.Data.xlsx")
data
## # A tibble: 691 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 1 Sinatra, F… Frank Sin… "In … 100 101 282 -182
## 2 2 Diddley, Bo Bo Diddley "Bo … 214 216 455 -241
## 3 3 Presley, E… Elvis Pre… "Elv… 55 56 332 -277
## 4 4 Sinatra, F… Frank Sin… "Son… 306 308 NA -195
## 5 5 Little Ric… Little Ri… "Her… 50 50 227 -177
## 6 6 Beyonce Beyonce "Lem… NA NA 32 469
## 7 7 Winehouse,… Amy Wineh… "Bac… NA 451 33 468
## 8 8 Crickets Buddy Hol… "The… 421 420 NA -80
## 9 9 Bush, Kate Kate Bush "Hou… NA NA 68 433
## 10 10 Davis, Mil… Miles Dav… "Kin… 12 12 31 -19
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <chr>, peak_billboard_position <dbl>,
## # spotify_popularity <chr>, spotify_url <chr>, artist_member_count <chr>,
## # artist_gender <chr>, artist_birth_year_sum <chr>,
## # debut_album_release_year <chr>, ave_age_at_top_500 <chr>,
## # years_between <chr>, album_id <chr>
Filter rows
filter(data, album == "beetles")
## # A tibble: 0 × 22
## # ℹ 22 variables: ...1 <dbl>, sort_name <chr>, clean_name <chr>, album <chr>,
## # rank_2003 <chr>, rank_2012 <chr>, rank_2020 <chr>, differential <dbl>,
## # release_year <dbl>, genre <chr>, type <chr>, weeks_on_billboard <chr>,
## # peak_billboard_position <dbl>, spotify_popularity <chr>, spotify_url <chr>,
## # artist_member_count <chr>, artist_gender <chr>,
## # artist_birth_year_sum <chr>, debut_album_release_year <chr>,
## # ave_age_at_top_500 <chr>, years_between <chr>, album_id <chr>
Arrange rows
arrange(data, desc (rank_2020))
## # A tibble: 691 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 4 "Sinatra, … "Frank Si… "Son… 306 308 NA -195
## 2 8 "Crickets" "Buddy Ho… "The… 421 420 NA -80
## 3 19 "Charles, … "Ray Char… "The… 263 265 NA -238
## 4 24 "Waters, M… "Muddy Wa… "Mud… 348 348 NA -153
## 5 25 "Davis, Mi… "Miles Da… "Ske… 356 358 NA -145
## 6 30 "Bland, Bo… "Bobby \"… "Two… 215 217 NA -286
## 7 37 "Howlin' W… "Howlin' … "How… 223 238 NA -278
## 8 44 "Beatles" "The Beat… "Ple… 39 39 NA -462
## 9 45 "Spector, … "Phil Spe… "A C… 142 142 NA -359
## 10 47 "Beatles" "The Beat… "Wit… 420 NA NA -81
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <chr>, peak_billboard_position <dbl>,
## # spotify_popularity <chr>, spotify_url <chr>, artist_member_count <chr>,
## # artist_gender <chr>, artist_birth_year_sum <chr>,
## # debut_album_release_year <chr>, ave_age_at_top_500 <chr>,
## # years_between <chr>, album_id <chr>
Select columns
selected_data <- data %>%
select(album, rank_2020, genre)
Add columns
data_with_decade <- data %>%
mutate(decade = floor(release_year / 10) * 10)