Library Updates

library(mongolite)
library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Scrape Process

url <- "https://www.billboard.com/charts/hot-100/"

# Read the HTML content from the URL
bill_100 <- read_html(url)

# Extract the rows containing the chart details
rows <- bill_100 %>% html_elements(".chart-results-list .o-chart-results-list-row")

# Extract the ranks, titles, and artists
ranks <- rows %>% html_attr("data-detail-target") %>% as.numeric()
titles <- rows %>% html_elements(".o-chart-results-list__item h3.c-title") %>% html_text(trim = TRUE)
artists <- rows %>% html_elements(".c-label") %>% html_text(trim = TRUE)
artists_clean <- artists[seq(2,length(artists),8)]

# Create a tibble with the extracted data
music_rank <- tibble(
  rank = ranks[1:5],
  title = titles[1:5],
  artist = artists_clean[1:5]
)


# Print the first few rows of the tibble to verify
print(music_rank)
## # A tibble: 5 × 3
##    rank title                artist                             
##   <dbl> <chr>                <chr>                              
## 1     1 I Had Some Help      Post Malone Featuring Morgan Wallen
## 2     2 Please Please Please NEW                                
## 3     3 Espresso             1                                  
## 4     4 A Bar Song (Tipsy)   9                                  
## 5     5 Million Dollar Baby  9

Input Scrape Data to MongoDB Connection

#MONGODB
message('Input Data to MongoDB Atlas')
## Input Data to MongoDB Atlas
atlas_conn <- mongo(
  collection = "song",
  db = "billboard",
  url = "mongodb+srv://ydth2000rachmat:Ydth1999@cluster0.mxr9e6p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
)

atlas_conn$insert(music_rank)
## List of 5
##  $ nInserted  : num 5
##  $ nMatched   : num 0
##  $ nRemoved   : num 0
##  $ nUpserted  : num 0
##  $ writeErrors: list()
rm(atlas_conn)

Call the Data from MongoDB and Change to Data Frame

collection <- "billboard"
db <- "song"
url <- "mongodb+srv://ydth2000rachmat:Ydth1999@cluster0.mxr9e6p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
billboard <- mongo(collection=collection, db=db, url=url)
data <- billboard$find()
billboard_df <- as.data.frame(data)
billboard_df
##    rank                         title                              artist
## 1     1               I Had Some Help Post Malone Featuring Morgan Wallen
## 2     2          Please Please Please                   Sabrina Carpenter
## 3     3                      Espresso                   Sabrina Carpenter
## 4     4            A Bar Song (Tipsy)                           Shaboozey
## 5     5           Million Dollar Baby                       Tommy Richman
## 6     1               I Had Some Help Post Malone Featuring Morgan Wallen
## 7     2          Please Please Please                   Sabrina Carpenter
## 8     3                      Espresso                   Sabrina Carpenter
## 9     4            A Bar Song (Tipsy)                           Shaboozey
## 10    5           Million Dollar Baby                       Tommy Richman
## 11    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 12    2          Please Please Please                   Sabrina Carpenter
## 13    3                      Espresso                   Sabrina Carpenter
## 14    4            A Bar Song (Tipsy)                           Shaboozey
## 15    5           Million Dollar Baby                       Tommy Richman
## 16    1 The Tortured Poets Department                        Taylor Swift
## 17    2          Hit Me Hard And Soft                       Billie Eilish
## 18    3                          Brat                          Charli XCX
## 19    4           One Thing At A Time                       Morgan Wallen
## 20    5                       Forever                            Bon Jovi
## 21    1 The Tortured Poets Department                        Taylor Swift
## 22    2          Hit Me Hard And Soft                       Billie Eilish
## 23    3                          Brat                          Charli XCX
## 24    4           One Thing At A Time                       Morgan Wallen
## 25    5                       Forever                            Bon Jovi
## 26    1 The Tortured Poets Department                        Taylor Swift
## 27    2          Hit Me Hard And Soft                       Billie Eilish
## 28    3                          Brat                          Charli XCX
## 29    4           One Thing At A Time                       Morgan Wallen
## 30    5                       Forever                            Bon Jovi
## 31    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 32    2          Please Please Please                   Sabrina Carpenter
## 33    3                      Espresso                   Sabrina Carpenter
## 34    4            A Bar Song (Tipsy)                           Shaboozey
## 35    5           Million Dollar Baby                       Tommy Richman
## 36    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 37    2          Please Please Please                   Sabrina Carpenter
## 38    3                      Espresso                   Sabrina Carpenter
## 39    4            A Bar Song (Tipsy)                           Shaboozey
## 40    5           Million Dollar Baby                       Tommy Richman
## 41    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 42    2          Please Please Please                   Sabrina Carpenter
## 43    3                      Espresso                   Sabrina Carpenter
## 44    4            A Bar Song (Tipsy)                           Shaboozey
## 45    5                   Not Like Us                      Kendrick Lamar

Change the type of data to factor

billboard_df[] <- lapply(billboard_df, function(x) {
  if (is.character(x)) {
    as.factor(x)
  } else {
    x
  }
})

# Display the modified data frame
str(billboard_df)
## 'data.frame':    45 obs. of  3 variables:
##  $ rank  : num  1 2 3 4 5 1 2 3 4 5 ...
##  $ title : Factor w/ 11 levels "A Bar Song (Tipsy)",..: 6 10 3 1 7 6 10 3 1 7 ...
##  $ artist: Factor w/ 10 levels "Billie Eilish",..: 6 7 7 8 10 6 7 7 8 10 ...
billboard_df
##    rank                         title                              artist
## 1     1               I Had Some Help Post Malone Featuring Morgan Wallen
## 2     2          Please Please Please                   Sabrina Carpenter
## 3     3                      Espresso                   Sabrina Carpenter
## 4     4            A Bar Song (Tipsy)                           Shaboozey
## 5     5           Million Dollar Baby                       Tommy Richman
## 6     1               I Had Some Help Post Malone Featuring Morgan Wallen
## 7     2          Please Please Please                   Sabrina Carpenter
## 8     3                      Espresso                   Sabrina Carpenter
## 9     4            A Bar Song (Tipsy)                           Shaboozey
## 10    5           Million Dollar Baby                       Tommy Richman
## 11    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 12    2          Please Please Please                   Sabrina Carpenter
## 13    3                      Espresso                   Sabrina Carpenter
## 14    4            A Bar Song (Tipsy)                           Shaboozey
## 15    5           Million Dollar Baby                       Tommy Richman
## 16    1 The Tortured Poets Department                        Taylor Swift
## 17    2          Hit Me Hard And Soft                       Billie Eilish
## 18    3                          Brat                          Charli XCX
## 19    4           One Thing At A Time                       Morgan Wallen
## 20    5                       Forever                            Bon Jovi
## 21    1 The Tortured Poets Department                        Taylor Swift
## 22    2          Hit Me Hard And Soft                       Billie Eilish
## 23    3                          Brat                          Charli XCX
## 24    4           One Thing At A Time                       Morgan Wallen
## 25    5                       Forever                            Bon Jovi
## 26    1 The Tortured Poets Department                        Taylor Swift
## 27    2          Hit Me Hard And Soft                       Billie Eilish
## 28    3                          Brat                          Charli XCX
## 29    4           One Thing At A Time                       Morgan Wallen
## 30    5                       Forever                            Bon Jovi
## 31    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 32    2          Please Please Please                   Sabrina Carpenter
## 33    3                      Espresso                   Sabrina Carpenter
## 34    4            A Bar Song (Tipsy)                           Shaboozey
## 35    5           Million Dollar Baby                       Tommy Richman
## 36    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 37    2          Please Please Please                   Sabrina Carpenter
## 38    3                      Espresso                   Sabrina Carpenter
## 39    4            A Bar Song (Tipsy)                           Shaboozey
## 40    5           Million Dollar Baby                       Tommy Richman
## 41    1               I Had Some Help Post Malone Featuring Morgan Wallen
## 42    2          Please Please Please                   Sabrina Carpenter
## 43    3                      Espresso                   Sabrina Carpenter
## 44    4            A Bar Song (Tipsy)                           Shaboozey
## 45    5                   Not Like Us                      Kendrick Lamar

Artists with Number 1 Rank Visualization

library(ggplot2)
# Filter the data for rank 1
rank_1_data <- billboard_df %>% filter(rank == 1)

# Count the occurrences of each artist
artist_counts <- rank_1_data %>% count(artist)

# Create a bar plot to visualize the counts
ggplot(artist_counts, aes(x = artist, y = n)) +
  geom_bar(stat = "identity") +
  labs(title = "Artists with Number 1 Rank",
       x = "Artist",
       y = "Count of Number 1 Rank") +
  theme_minimal()

Number of Times Each Artist Appeared at Each Rank Visualization

# Count the occurrences of each artist at each rank
artist_rank_counts <- billboard_df %>% 
  group_by(artist, rank) %>% 
  summarise(count = n()) %>% 
  ungroup()
## `summarise()` has grouped output by 'artist'. You can override using the
## `.groups` argument.
# Create a bar plot to visualize the counts
ggplot(artist_rank_counts, aes(x = rank, y = count, fill = artist)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Number of Times Each Artist Appeared at Each Rank",
       x = "Rank",
       y = "Count",
       fill = "Artist") +
  theme_minimal()

Songs with Number 1 Rank Visualization

# Filter the data for rank 1
rank_1_data <- billboard_df %>% filter(rank == 1)

# Count the occurrences of each title
title_counts <- rank_1_data %>% count(title)

# Create a bar plot to visualize the counts
ggplot(title_counts, aes(x = title, y = n)) +
  geom_bar(stat = "identity") +
  labs(title = "Songs with Number 1 Rank",
       x = "Song Title",
       y = "Count of Number 1 Rank") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Most Frequent Artists in the Data Visualization

# Count the occurrences of each artist
artist_counts <- billboard_df %>% count(artist)

# Create a bar plot to visualize the counts with different colors for each artist
ggplot(artist_counts, aes(x = reorder(artist, -n), y = n, fill = artist)) +
  geom_bar(stat = "identity") +
  labs(title = "Most Frequent Artists in the Data",
       x = "Artist",
       y = "Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_discrete(name = "Artist")