Loading packages

library(dplyr)
library(readr)
data_original <-read_csv("C:/Users/kiosh/Downloads/archive (1).zip")
data_original2 <- read_csv("C:/Users/kiosh/Downloads/9912f7a366c62c1f296c-dd94a25492b3062f4ca0dc2bb2cdf23fec0896ea/9912f7a366c62c1f296c-dd94a25492b3062f4ca0dc2bb2cdf23fec0896ea/10000-MTV-Music-Artists-page-1.csv")
data_mani <- data_original
data_mani2 <- data_original2

Glipmse of Data

head(data_mani)
## # A tibble: 6 × 7
##   date        rank song          artist `last-week` `peak-rank` `weeks-on-board`
##   <date>     <dbl> <chr>         <chr>        <dbl>       <dbl>            <dbl>
## 1 2021-11-06     1 Easy On Me    Adele            1           1                3
## 2 2021-11-06     2 Stay          The K…           2           1               16
## 3 2021-11-06     3 Industry Baby Lil N…           3           1               14
## 4 2021-11-06     4 Fancy Like    Walke…           4           3               19
## 5 2021-11-06     5 Bad Habits    Ed Sh…           5           2               18
## 6 2021-11-06     6 Way 2 Sexy    Drake…           6           1                8

working data

music_df = data_mani%>%
  select(date:artist,weeks_popular = `weeks-on-board`)
music_df
## # A tibble: 330,087 × 5
##    date        rank song          artist                           weeks_popular
##    <date>     <dbl> <chr>         <chr>                                    <dbl>
##  1 2021-11-06     1 Easy On Me    Adele                                        3
##  2 2021-11-06     2 Stay          The Kid LAROI & Justin Bieber               16
##  3 2021-11-06     3 Industry Baby Lil Nas X & Jack Harlow                     14
##  4 2021-11-06     4 Fancy Like    Walker Hayes                                19
##  5 2021-11-06     5 Bad Habits    Ed Sheeran                                  18
##  6 2021-11-06     6 Way 2 Sexy    Drake Featuring Future & Young …             8
##  7 2021-11-06     7 Shivers       Ed Sheeran                                   7
##  8 2021-11-06     8 Good 4 U      Olivia Rodrigo                              24
##  9 2021-11-06     9 Need To Know  Doja Cat                                    20
## 10 2021-11-06    10 Levitating    Dua Lipa                                    56
## # ℹ 330,077 more rows

Loading lubridate and stingr

Lubridate is a package which provide us function to manipulate date and time data

Stringr is used for text value extraction from the data

library(lubridate)
library(stringr)

music_df %>%
  mutate(primary_artist =ifelse(str_detect(artist,"Featuring"),
                                str_match(artist,"(.*)\\sFeaturing")[,2],artist))%>%
  select(artist,primary_artist)
## # A tibble: 330,087 × 2
##    artist                              primary_artist               
##    <chr>                               <chr>                        
##  1 Adele                               Adele                        
##  2 The Kid LAROI & Justin Bieber       The Kid LAROI & Justin Bieber
##  3 Lil Nas X & Jack Harlow             Lil Nas X & Jack Harlow      
##  4 Walker Hayes                        Walker Hayes                 
##  5 Ed Sheeran                          Ed Sheeran                   
##  6 Drake Featuring Future & Young Thug Drake                        
##  7 Ed Sheeran                          Ed Sheeran                   
##  8 Olivia Rodrigo                      Olivia Rodrigo               
##  9 Doja Cat                            Doja Cat                     
## 10 Dua Lipa                            Dua Lipa                     
## # ℹ 330,077 more rows
music_df_cleaned = music_df %>%
  mutate(primary_artist =ifelse(str_detect(artist,"Featuring"),
                                str_match(artist,"(.*)\\sFeaturing")[,2],artist),
         featured_artist = str_match(artist,"Featuring\\s(.*)")[,2])

music_df_cleaned  
## # A tibble: 330,087 × 7
##    date        rank song     artist weeks_popular primary_artist featured_artist
##    <date>     <dbl> <chr>    <chr>          <dbl> <chr>          <chr>          
##  1 2021-11-06     1 Easy On… Adele              3 Adele          <NA>           
##  2 2021-11-06     2 Stay     The K…            16 The Kid LAROI… <NA>           
##  3 2021-11-06     3 Industr… Lil N…            14 Lil Nas X & J… <NA>           
##  4 2021-11-06     4 Fancy L… Walke…            19 Walker Hayes   <NA>           
##  5 2021-11-06     5 Bad Hab… Ed Sh…            18 Ed Sheeran     <NA>           
##  6 2021-11-06     6 Way 2 S… Drake…             8 Drake          Future & Young…
##  7 2021-11-06     7 Shivers  Ed Sh…             7 Ed Sheeran     <NA>           
##  8 2021-11-06     8 Good 4 U Olivi…            24 Olivia Rodrigo <NA>           
##  9 2021-11-06     9 Need To… Doja …            20 Doja Cat       <NA>           
## 10 2021-11-06    10 Levitat… Dua L…            56 Dua Lipa       <NA>           
## # ℹ 330,077 more rows

loading tidyr

library(tidyr)

Pivoting the data

music_df_cleaned %>%
  distinct(song,primary_artist,featured_artist)%>%
  pivot_longer(2:3 , names_to = "artist_type" , values_to ="artist_name")%>%
  filter(artist_name == "Ed Sheeran")%>%
  count(artist_type)# counting the number of times Ed was primary or featured artist
## # A tibble: 2 × 2
##   artist_type         n
##   <chr>           <int>
## 1 featured_artist     7
## 2 primary_artist     31

Pivot wider

music_df_cleaned %>%
  filter(rank <= 3)%>%
  select(date, rank , song)%>%
  pivot_wider(names_from = rank,values_from = song)
## # A tibble: 3,301 × 4
##    date       `1`           `2`              `3`          
##    <date>     <chr>         <chr>            <chr>        
##  1 2021-11-06 Easy On Me    Stay             Industry Baby
##  2 2021-10-30 Easy On Me    Stay             Industry Baby
##  3 2021-10-23 Industry Baby Stay             Fancy Like   
##  4 2021-10-16 Stay          Industry Baby    Fancy Like   
##  5 2021-10-09 My Universe   Stay             Industry Baby
##  6 2021-10-02 Stay          Industry Baby    Way 2 Sexy   
##  7 2021-09-25 Stay          Way 2 Sexy       Bad Habits   
##  8 2021-09-18 Way 2 Sexy    Girls Want Girls Fair Trade   
##  9 2021-09-11 Butter        Stay             Bad Habits   
## 10 2021-09-04 Stay          Bad Habits       Good 4 U     
## # ℹ 3,291 more rows

Joining the data

top5_genre <- music_df_cleaned %>%
  inner_join(data_original2, by = c("primary_artist" = "name"))%>%
  select(song,primary_artist,genre)%>%
  distinct()%>%
  count(genre) %>%
  top_n(5)%>%
  pull(genre)

loading ggplot

library(ggplot2)

Set theme

theme_set(theme_bw() +
            theme(title = element_text(colour = "steelblue",
                                       face = "bold")))

Plotting the graph

music_df_cleaned %>%
  inner_join(data_original2, by = c("primary_artist" = "name"))%>%
  mutate(date = floor_date(date, unit = "year"))%>%
  select(date,song,genre)%>%
  filter(genre %in% top5_genre)%>%
  count(date,genre)%>%
 # filter(n >= 300) %>%
  ggplot(aes(date,n,color = genre))+ geom_line()+
  facet_wrap(~genre)+
  labs(title = "Popularity of Genre",
       color = "Genre type",
       x = "Years",
       y = "Popularity")+
  theme(legend.position = c(.88,.100))