Library Updates
library(mongolite)
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Scrape Process
url <- "https://www.billboard.com/charts/hot-100/"
# Read the HTML content from the URL
bill_100 <- read_html(url)
# Extract the rows containing the chart details
rows <- bill_100 %>% html_elements(".chart-results-list .o-chart-results-list-row")
# Extract the ranks, titles, and artists
ranks <- rows %>% html_attr("data-detail-target") %>% as.numeric()
titles <- rows %>% html_elements(".o-chart-results-list__item h3.c-title") %>% html_text(trim = TRUE)
artists <- rows %>% html_elements(".c-label") %>% html_text(trim = TRUE)
artists_clean <- artists[seq(2,length(artists),8)]
# Create a tibble with the extracted data
music_rank <- tibble(
rank = ranks[1:5],
title = titles[1:5],
artist = artists_clean[1:5]
)
# Print the first few rows of the tibble to verify
print(music_rank)
## # A tibble: 5 × 3
## rank title artist
## <dbl> <chr> <chr>
## 1 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 2 2 Please Please Please NEW
## 3 3 Espresso 1
## 4 4 A Bar Song (Tipsy) 9
## 5 5 Million Dollar Baby 9
Call the Data from MongoDB and Change to Data Frame
collection <- "billboard"
db <- "song"
url <- "mongodb+srv://ydth2000rachmat:Ydth1999@cluster0.mxr9e6p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
billboard <- mongo(collection=collection, db=db, url=url)
data <- billboard$find()
billboard_df <- as.data.frame(data)
billboard_df
## rank title artist
## 1 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 2 2 Please Please Please Sabrina Carpenter
## 3 3 Espresso Sabrina Carpenter
## 4 4 A Bar Song (Tipsy) Shaboozey
## 5 5 Million Dollar Baby Tommy Richman
## 6 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 7 2 Please Please Please Sabrina Carpenter
## 8 3 Espresso Sabrina Carpenter
## 9 4 A Bar Song (Tipsy) Shaboozey
## 10 5 Million Dollar Baby Tommy Richman
## 11 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 12 2 Please Please Please Sabrina Carpenter
## 13 3 Espresso Sabrina Carpenter
## 14 4 A Bar Song (Tipsy) Shaboozey
## 15 5 Million Dollar Baby Tommy Richman
## 16 1 The Tortured Poets Department Taylor Swift
## 17 2 Hit Me Hard And Soft Billie Eilish
## 18 3 Brat Charli XCX
## 19 4 One Thing At A Time Morgan Wallen
## 20 5 Forever Bon Jovi
## 21 1 The Tortured Poets Department Taylor Swift
## 22 2 Hit Me Hard And Soft Billie Eilish
## 23 3 Brat Charli XCX
## 24 4 One Thing At A Time Morgan Wallen
## 25 5 Forever Bon Jovi
## 26 1 The Tortured Poets Department Taylor Swift
## 27 2 Hit Me Hard And Soft Billie Eilish
## 28 3 Brat Charli XCX
## 29 4 One Thing At A Time Morgan Wallen
## 30 5 Forever Bon Jovi
## 31 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 32 2 Please Please Please Sabrina Carpenter
## 33 3 Espresso Sabrina Carpenter
## 34 4 A Bar Song (Tipsy) Shaboozey
## 35 5 Million Dollar Baby Tommy Richman
## 36 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 37 2 Please Please Please Sabrina Carpenter
## 38 3 Espresso Sabrina Carpenter
## 39 4 A Bar Song (Tipsy) Shaboozey
## 40 5 Million Dollar Baby Tommy Richman
## 41 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 42 2 Please Please Please Sabrina Carpenter
## 43 3 Espresso Sabrina Carpenter
## 44 4 A Bar Song (Tipsy) Shaboozey
## 45 5 Not Like Us Kendrick Lamar
Change the type of data to factor
billboard_df[] <- lapply(billboard_df, function(x) {
if (is.character(x)) {
as.factor(x)
} else {
x
}
})
# Display the modified data frame
str(billboard_df)
## 'data.frame': 45 obs. of 3 variables:
## $ rank : num 1 2 3 4 5 1 2 3 4 5 ...
## $ title : Factor w/ 11 levels "A Bar Song (Tipsy)",..: 6 10 3 1 7 6 10 3 1 7 ...
## $ artist: Factor w/ 10 levels "Billie Eilish",..: 6 7 7 8 10 6 7 7 8 10 ...
billboard_df
## rank title artist
## 1 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 2 2 Please Please Please Sabrina Carpenter
## 3 3 Espresso Sabrina Carpenter
## 4 4 A Bar Song (Tipsy) Shaboozey
## 5 5 Million Dollar Baby Tommy Richman
## 6 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 7 2 Please Please Please Sabrina Carpenter
## 8 3 Espresso Sabrina Carpenter
## 9 4 A Bar Song (Tipsy) Shaboozey
## 10 5 Million Dollar Baby Tommy Richman
## 11 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 12 2 Please Please Please Sabrina Carpenter
## 13 3 Espresso Sabrina Carpenter
## 14 4 A Bar Song (Tipsy) Shaboozey
## 15 5 Million Dollar Baby Tommy Richman
## 16 1 The Tortured Poets Department Taylor Swift
## 17 2 Hit Me Hard And Soft Billie Eilish
## 18 3 Brat Charli XCX
## 19 4 One Thing At A Time Morgan Wallen
## 20 5 Forever Bon Jovi
## 21 1 The Tortured Poets Department Taylor Swift
## 22 2 Hit Me Hard And Soft Billie Eilish
## 23 3 Brat Charli XCX
## 24 4 One Thing At A Time Morgan Wallen
## 25 5 Forever Bon Jovi
## 26 1 The Tortured Poets Department Taylor Swift
## 27 2 Hit Me Hard And Soft Billie Eilish
## 28 3 Brat Charli XCX
## 29 4 One Thing At A Time Morgan Wallen
## 30 5 Forever Bon Jovi
## 31 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 32 2 Please Please Please Sabrina Carpenter
## 33 3 Espresso Sabrina Carpenter
## 34 4 A Bar Song (Tipsy) Shaboozey
## 35 5 Million Dollar Baby Tommy Richman
## 36 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 37 2 Please Please Please Sabrina Carpenter
## 38 3 Espresso Sabrina Carpenter
## 39 4 A Bar Song (Tipsy) Shaboozey
## 40 5 Million Dollar Baby Tommy Richman
## 41 1 I Had Some Help Post Malone Featuring Morgan Wallen
## 42 2 Please Please Please Sabrina Carpenter
## 43 3 Espresso Sabrina Carpenter
## 44 4 A Bar Song (Tipsy) Shaboozey
## 45 5 Not Like Us Kendrick Lamar
Artists with Number 1 Rank Visualization
library(ggplot2)
# Filter the data for rank 1
rank_1_data <- billboard_df %>% filter(rank == 1)
# Count the occurrences of each artist
artist_counts <- rank_1_data %>% count(artist)
# Create a bar plot to visualize the counts
ggplot(artist_counts, aes(x = artist, y = n)) +
geom_bar(stat = "identity") +
labs(title = "Artists with Number 1 Rank",
x = "Artist",
y = "Count of Number 1 Rank") +
theme_minimal()

Number of Times Each Artist Appeared at Each Rank Visualization
# Count the occurrences of each artist at each rank
artist_rank_counts <- billboard_df %>%
group_by(artist, rank) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'artist'. You can override using the
## `.groups` argument.
# Create a bar plot to visualize the counts
ggplot(artist_rank_counts, aes(x = rank, y = count, fill = artist)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Number of Times Each Artist Appeared at Each Rank",
x = "Rank",
y = "Count",
fill = "Artist") +
theme_minimal()

Songs with Number 1 Rank Visualization
# Filter the data for rank 1
rank_1_data <- billboard_df %>% filter(rank == 1)
# Count the occurrences of each title
title_counts <- rank_1_data %>% count(title)
# Create a bar plot to visualize the counts
ggplot(title_counts, aes(x = title, y = n)) +
geom_bar(stat = "identity") +
labs(title = "Songs with Number 1 Rank",
x = "Song Title",
y = "Count of Number 1 Rank") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Most Frequent Artists in the Data Visualization
# Count the occurrences of each artist
artist_counts <- billboard_df %>% count(artist)
# Create a bar plot to visualize the counts with different colors for each artist
ggplot(artist_counts, aes(x = reorder(artist, -n), y = n, fill = artist)) +
geom_bar(stat = "identity") +
labs(title = "Most Frequent Artists in the Data",
x = "Artist",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_discrete(name = "Artist")
