library(dplyr)
library(readr)
data_original <-read_csv("C:/Users/kiosh/Downloads/archive (1).zip")
data_original2 <- read_csv("C:/Users/kiosh/Downloads/9912f7a366c62c1f296c-dd94a25492b3062f4ca0dc2bb2cdf23fec0896ea/9912f7a366c62c1f296c-dd94a25492b3062f4ca0dc2bb2cdf23fec0896ea/10000-MTV-Music-Artists-page-1.csv")
data_mani <- data_original
data_mani2 <- data_original2
head(data_mani)
## # A tibble: 6 × 7
## date rank song artist `last-week` `peak-rank` `weeks-on-board`
## <date> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2021-11-06 1 Easy On Me Adele 1 1 3
## 2 2021-11-06 2 Stay The K… 2 1 16
## 3 2021-11-06 3 Industry Baby Lil N… 3 1 14
## 4 2021-11-06 4 Fancy Like Walke… 4 3 19
## 5 2021-11-06 5 Bad Habits Ed Sh… 5 2 18
## 6 2021-11-06 6 Way 2 Sexy Drake… 6 1 8
music_df = data_mani%>%
select(date:artist,weeks_popular = `weeks-on-board`)
music_df
## # A tibble: 330,087 × 5
## date rank song artist weeks_popular
## <date> <dbl> <chr> <chr> <dbl>
## 1 2021-11-06 1 Easy On Me Adele 3
## 2 2021-11-06 2 Stay The Kid LAROI & Justin Bieber 16
## 3 2021-11-06 3 Industry Baby Lil Nas X & Jack Harlow 14
## 4 2021-11-06 4 Fancy Like Walker Hayes 19
## 5 2021-11-06 5 Bad Habits Ed Sheeran 18
## 6 2021-11-06 6 Way 2 Sexy Drake Featuring Future & Young … 8
## 7 2021-11-06 7 Shivers Ed Sheeran 7
## 8 2021-11-06 8 Good 4 U Olivia Rodrigo 24
## 9 2021-11-06 9 Need To Know Doja Cat 20
## 10 2021-11-06 10 Levitating Dua Lipa 56
## # ℹ 330,077 more rows
Lubridate is a package which provide us function to manipulate date and time data
Stringr is used for text value extraction from the data
library(lubridate)
library(stringr)
music_df %>%
mutate(primary_artist =ifelse(str_detect(artist,"Featuring"),
str_match(artist,"(.*)\\sFeaturing")[,2],artist))%>%
select(artist,primary_artist)
## # A tibble: 330,087 × 2
## artist primary_artist
## <chr> <chr>
## 1 Adele Adele
## 2 The Kid LAROI & Justin Bieber The Kid LAROI & Justin Bieber
## 3 Lil Nas X & Jack Harlow Lil Nas X & Jack Harlow
## 4 Walker Hayes Walker Hayes
## 5 Ed Sheeran Ed Sheeran
## 6 Drake Featuring Future & Young Thug Drake
## 7 Ed Sheeran Ed Sheeran
## 8 Olivia Rodrigo Olivia Rodrigo
## 9 Doja Cat Doja Cat
## 10 Dua Lipa Dua Lipa
## # ℹ 330,077 more rows
music_df_cleaned = music_df %>%
mutate(primary_artist =ifelse(str_detect(artist,"Featuring"),
str_match(artist,"(.*)\\sFeaturing")[,2],artist),
featured_artist = str_match(artist,"Featuring\\s(.*)")[,2])
music_df_cleaned
## # A tibble: 330,087 × 7
## date rank song artist weeks_popular primary_artist featured_artist
## <date> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 2021-11-06 1 Easy On… Adele 3 Adele <NA>
## 2 2021-11-06 2 Stay The K… 16 The Kid LAROI… <NA>
## 3 2021-11-06 3 Industr… Lil N… 14 Lil Nas X & J… <NA>
## 4 2021-11-06 4 Fancy L… Walke… 19 Walker Hayes <NA>
## 5 2021-11-06 5 Bad Hab… Ed Sh… 18 Ed Sheeran <NA>
## 6 2021-11-06 6 Way 2 S… Drake… 8 Drake Future & Young…
## 7 2021-11-06 7 Shivers Ed Sh… 7 Ed Sheeran <NA>
## 8 2021-11-06 8 Good 4 U Olivi… 24 Olivia Rodrigo <NA>
## 9 2021-11-06 9 Need To… Doja … 20 Doja Cat <NA>
## 10 2021-11-06 10 Levitat… Dua L… 56 Dua Lipa <NA>
## # ℹ 330,077 more rows
library(tidyr)
music_df_cleaned %>%
distinct(song,primary_artist,featured_artist)%>%
pivot_longer(2:3 , names_to = "artist_type" , values_to ="artist_name")%>%
filter(artist_name == "Ed Sheeran")%>%
count(artist_type)# counting the number of times Ed was primary or featured artist
## # A tibble: 2 × 2
## artist_type n
## <chr> <int>
## 1 featured_artist 7
## 2 primary_artist 31
music_df_cleaned %>%
filter(rank <= 3)%>%
select(date, rank , song)%>%
pivot_wider(names_from = rank,values_from = song)
## # A tibble: 3,301 × 4
## date `1` `2` `3`
## <date> <chr> <chr> <chr>
## 1 2021-11-06 Easy On Me Stay Industry Baby
## 2 2021-10-30 Easy On Me Stay Industry Baby
## 3 2021-10-23 Industry Baby Stay Fancy Like
## 4 2021-10-16 Stay Industry Baby Fancy Like
## 5 2021-10-09 My Universe Stay Industry Baby
## 6 2021-10-02 Stay Industry Baby Way 2 Sexy
## 7 2021-09-25 Stay Way 2 Sexy Bad Habits
## 8 2021-09-18 Way 2 Sexy Girls Want Girls Fair Trade
## 9 2021-09-11 Butter Stay Bad Habits
## 10 2021-09-04 Stay Bad Habits Good 4 U
## # ℹ 3,291 more rows
top5_genre <- music_df_cleaned %>%
inner_join(data_original2, by = c("primary_artist" = "name"))%>%
select(song,primary_artist,genre)%>%
distinct()%>%
count(genre) %>%
top_n(5)%>%
pull(genre)
library(ggplot2)
theme_set(theme_bw() +
theme(title = element_text(colour = "steelblue",
face = "bold")))
music_df_cleaned %>%
inner_join(data_original2, by = c("primary_artist" = "name"))%>%
mutate(date = floor_date(date, unit = "year"))%>%
select(date,song,genre)%>%
filter(genre %in% top5_genre)%>%
count(date,genre)%>%
# filter(n >= 300) %>%
ggplot(aes(date,n,color = genre))+ geom_line()+
facet_wrap(~genre)+
labs(title = "Popularity of Genre",
color = "Genre type",
x = "Years",
y = "Popularity")+
theme(legend.position = c(.88,.100))