Import data
# csv file
data <- read_csv("../00_data/myData.csv")
data
## # A tibble: 691 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Sinatra, F… Frank Sin… "In … 100 101 282 -182
## 2 2 Diddley, Bo Bo Diddley "Bo … 214 216 455 -241
## 3 3 Presley, E… Elvis Pre… "Elv… 55 56 332 -277
## 4 4 Sinatra, F… Frank Sin… "Son… 306 308 NA -195
## 5 5 Little Ric… Little Ri… "Her… 50 50 227 -177
## 6 6 Beyonce Beyonce "Lem… NA NA 32 469
## 7 7 Winehouse,… Amy Wineh… "Bac… NA 451 33 468
## 8 8 Crickets Buddy Hol… "The… 421 420 NA -80
## 9 9 Bush, Kate Kate Bush "Hou… NA NA 68 433
## 10 10 Davis, Mil… Miles Dav… "Kin… 12 12 31 -19
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <dbl>, peak_billboard_position <dbl>,
## # spotify_popularity <dbl>, spotify_url <chr>, artist_member_count <dbl>,
## # artist_gender <chr>, artist_birth_year_sum <dbl>,
## # debut_album_release_year <dbl>, ave_age_at_top_500 <dbl>,
## # years_between <dbl>, album_id <chr>
# excel file
Apply the following dplyr verbs to your data
Filter rows
filter(data, sort_name == "Sinatra, Frank")
## # A tibble: 2 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Sinatra, Fr… Frank Sin… In t… 100 101 282 -182
## 2 4 Sinatra, Fr… Frank Sin… Song… 306 308 NA -195
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <dbl>, peak_billboard_position <dbl>,
## # spotify_popularity <dbl>, spotify_url <chr>, artist_member_count <dbl>,
## # artist_gender <chr>, artist_birth_year_sum <dbl>,
## # debut_album_release_year <dbl>, ave_age_at_top_500 <dbl>,
## # years_between <dbl>, album_id <chr>
filter(data, rank_2020 < 100)
## # A tibble: 99 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 6 Beyonce Beyonce Lemo… NA NA 32 469
## 2 7 Winehouse,… Amy Wineh… Back… NA 451 33 468
## 3 9 Bush, Kate Kate Bush Houn… NA NA 68 433
## 4 10 Davis, Mil… Miles Dav… Kind… 12 12 31 -19
## 5 12 Beyonce Beyonce Beyo… NA NA 81 420
## 6 14 Badu, Eryk… Erykah Ba… Badu… NA NA 89 412
## 7 15 Elliott, M… Missy Ell… Supa… NA NA 93 408
## 8 17 Swift, Tay… Taylor Sw… Red NA NA 99 402
## 9 39 Brown, Jam… James Bro… Live… 24 25 65 -41
## 10 81 Dylan, Bob Bob Dylan High… 4 4 18 -14
## # ℹ 89 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <dbl>, peak_billboard_position <dbl>,
## # spotify_popularity <dbl>, spotify_url <chr>, artist_member_count <dbl>,
## # artist_gender <chr>, artist_birth_year_sum <dbl>,
## # debut_album_release_year <dbl>, ave_age_at_top_500 <dbl>,
## # years_between <dbl>, album_id <chr>
Arrange rows
arrange(data, desc(sort_name))
## # A tibble: 691 × 22
## ...1 sort_name clean_name album rank_2003 rank_2012 rank_2020 differential
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 149 Zombies The Zombi… Odes… 80 100 243 -163
## 2 316 ZZ Top ZZ Top Tres… 498 490 NA -3
## 3 517 ZZ Top ZZ Top Elim… 396 398 NA -105
## 4 177 Young, Neil Neil Young Ever… 208 210 407 -199
## 5 193 Young, Neil Neil Young Afte… 71 74 90 -19
## 6 255 Young, Neil Neil Young Harv… 78 82 72 6
## 7 325 Young, Neil Neil Young On t… NA NA 311 190
## 8 353 Young, Neil Neil Young Toni… 331 330 302 29
## 9 465 Young, Neil Neil Young Rust… 350 351 296 54
## 10 491 Yo La Tengo Yo La Ten… I Ca… NA NA 423 78
## # ℹ 681 more rows
## # ℹ 14 more variables: release_year <dbl>, genre <chr>, type <chr>,
## # weeks_on_billboard <dbl>, peak_billboard_position <dbl>,
## # spotify_popularity <dbl>, spotify_url <chr>, artist_member_count <dbl>,
## # artist_gender <chr>, artist_birth_year_sum <dbl>,
## # debut_album_release_year <dbl>, ave_age_at_top_500 <dbl>,
## # years_between <dbl>, album_id <chr>
Select columns
select(data, rank_2003:release_year)
## # A tibble: 691 × 5
## rank_2003 rank_2012 rank_2020 differential release_year
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 100 101 282 -182 1955
## 2 214 216 455 -241 1955
## 3 55 56 332 -277 1956
## 4 306 308 NA -195 1956
## 5 50 50 227 -177 1957
## 6 NA NA 32 469 2016
## 7 NA 451 33 468 2006
## 8 421 420 NA -80 1957
## 9 NA NA 68 433 1985
## 10 12 12 31 -19 1959
## # ℹ 681 more rows
Add columns
mutate(data,
gain = rank_2020 - rank_2003) %>%
select(gain, rank_2020, rank_2003)
## # A tibble: 691 × 3
## gain rank_2020 rank_2003
## <dbl> <dbl> <dbl>
## 1 182 282 100
## 2 241 455 214
## 3 277 332 55
## 4 NA NA 306
## 5 177 227 50
## 6 NA 32 NA
## 7 NA 33 NA
## 8 NA NA 421
## 9 NA 68 NA
## 10 19 31 12
## # ℹ 681 more rows
Summarize by groups
data %>%
group_by(sort_name) %>%
summarise(average_rank_2003 = mean(rank_2003))
## # A tibble: 391 × 2
## sort_name average_rank_2003
## <chr> <dbl>
## 1 2Pac NA
## 2 50 Cent NA
## 3 A Tribe Called Quest NA
## 4 ABBA 180
## 5 AC/DC 136
## 6 Aaliyah NA
## 7 Adele NA
## 8 Aerosmith 202
## 9 Allman Brothers Band 49
## 10 Amos, Tori NA
## # ℹ 381 more rows