# Library yang digunakan sebagai berikut
library(mongolite)
library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(jsonlite)
message('Scraping Billboard Data')
## Scraping Billboard Data
# Define the URL of the Billboard Hot 100 chart
url <- "https://www.billboard.com/charts/hot-100/"

# Read the HTML content from the URL
bill_100 <- read_html(url)

# Extract the rows containing the chart details
rows <- bill_100 %>% html_elements(".chart-results-list .o-chart-results-list-row")
# Extract the ranks, titles, and artists
ranks <- rows %>% html_attr("data-detail-target") %>% as.numeric()
titles <- rows %>% html_elements(".o-chart-results-list__item h3.c-title") %>% html_text(trim = TRUE)
artists <- rows %>% html_elements(".c-label") %>% html_text(trim = TRUE)

# Handle artist data correctly
artists_clean <- artists[seq(2, length(artists), 8)]

# Add date field to track when this data was collected
current_date <- Sys.Date()

# Create a tibble with the extracted data
n <- min(100, length(ranks), length(titles), length(artists_clean))  # Increased to get more data
music_rank <- tibble(
  rank = ranks[1:n],
  title = titles[1:n],
  artist = artists_clean[1:n],
  chart_date = rep(as.character(current_date), n)
)

# Print sample data before insertion
print(head(music_rank, 5))
## # A tibble: 5 × 4
##    rank title              artist                 chart_date
##   <dbl> <chr>              <chr>                  <chr>     
## 1     1 Luther             Kendrick Lamar & SZA   2025-05-06
## 2     2 Die With A Smile   Lady Gaga & Bruno Mars 2025-05-06
## 3     3 Ordinary           Alex Warren            2025-05-06
## 4     4 Nokia              Drake                  2025-05-06
## 5     5 A Bar Song (Tipsy) Shaboozey              2025-05-06
# Connect to MongoDB Atlas
message('Connecting to MongoDB Atlas')
## Connecting to MongoDB Atlas
atlas_conn <- mongo(
  collection = "song",
  db = "billboard",
  url = "mongodb+srv://ydth2000rachmat:Ydth1999@cluster0.mxr9e6p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
)

# Check if connection is working
if (atlas_conn$count() >= 0) {
  message("Successfully connected to MongoDB Atlas")
} else {
  message("Failed to connect to MongoDB Atlas")
}
## Successfully connected to MongoDB Atlas
# Insert data into MongoDB
message('Inserting data into MongoDB')
## Inserting data into MongoDB
insert_result <- atlas_conn$insert(music_rank)
print(insert_result)
## List of 5
##  $ nInserted  : num 100
##  $ nMatched   : num 0
##  $ nRemoved   : num 0
##  $ nUpserted  : num 0
##  $ writeErrors: list()
# Verify data was inserted
message('Verifying data insertion')
## Verifying data insertion
retrieved_data <- atlas_conn$find(limit = 5)  # Just get first 5 records as a sample
print(retrieved_data)
##   rank               title                              artist
## 1    1     I Had Some Help Post Malone Featuring Morgan Wallen
## 2    5 Million Dollar Baby                                   9
## 3    3            Espresso                                   1
## 4    4  A Bar Song (Tipsy)                                   9
## 5    4  A Bar Song (Tipsy)                                   9
# Count documents in collection
doc_count <- atlas_conn$count()
message(paste("Collection contains", doc_count, "documents"))
## Collection contains 130 documents
# Kita akan membuat sebuah agregasi untuk pemanggilan data NoSQL mongoDB
message("\n--- MongoDB Aggregation Examples ---")
## 
## --- MongoDB Aggregation Examples ---
# Example 1: Group by artist and count their songs in the chart
message("\n1. Artists with multiple songs in the chart:")
## 
## 1. Artists with multiple songs in the chart:
artist_counts <- atlas_conn$aggregate('[
  { "$group": { 
      "_id": "$artist", 
      "song_count": { "$sum": 1 },
      "average_rank": { "$avg": "$rank" },
      "songs": { "$push": "$title" }
    }
  },
  { "$match": { "song_count": { "$gt": 1 } } },
  { "$sort": { "song_count": -1, "average_rank": 1 } },
  { "$limit": 10 }
]')
print(artist_counts)
##                       _id song_count average_rank
## 1                   Drake          6     17.83333
## 2    Kendrick Lamar & SZA          5      1.00000
## 3  Lady Gaga & Bruno Mars          5      2.00000
## 4             Alex Warren          5      3.00000
## 5               Shaboozey          5      5.00000
## 6                       9          5     15.40000
## 7                     NEW          5     39.60000
## 8                       1          5     40.60000
## 9                       5          5     51.60000
## 10                      6          5     61.60000
##                                                                                                  songs
## 1                                                     Nokia, Nokia, Nokia, Nokia, Nokia, Headphones On
## 2                                                               Luther, Luther, Luther, Luther, Luther
## 3             Die With A Smile, Die With A Smile, Die With A Smile, Die With A Smile, Die With A Smile
## 4                                                     Ordinary, Ordinary, Ordinary, Ordinary, Ordinary
## 5   A Bar Song (Tipsy), A Bar Song (Tipsy), A Bar Song (Tipsy), A Bar Song (Tipsy), A Bar Song (Tipsy)
## 6  Million Dollar Baby, A Bar Song (Tipsy), A Bar Song (Tipsy), Million Dollar Baby, Baile Inolvidable
## 7       Please Please Please, Please Please Please, I Ain't Coming Back, End Of The World, House Again
## 8                                         Espresso, Espresso, Lose Control, Whiskey Drink, like JENNIE
## 9                                            I Had Some Help, Azizam, Holy Smokes, Te Queria Ver, Loco
## 10                                     Residuals, The Giver, Peekaboo, Friends Like That, Happen To Me
# Example 2: Find distribution of songs by rank range
message("\n2. Distribution of songs by chart position:")
## 
## 2. Distribution of songs by chart position:
rank_distribution <- atlas_conn$aggregate('[
  { "$bucket": {
      "groupBy": "$rank",
      "boundaries": [1, 11, 26, 51, 76, 101],
      "default": "other",
      "output": {
        "count": { "$sum": 1 },
        "songs": { "$push": { "title": "$title", "artist": "$artist" } }
      }
    }
  }
]')
print(rank_distribution)
##   _id count
## 1   1    40
## 2  11    15
## 3  26    25
## 4  51    25
## 5  76    25
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  songs
## 1 I Had Some Help, Million Dollar Baby, Espresso, A Bar Song (Tipsy), A Bar Song (Tipsy), I Had Some Help, Please Please Please, Million Dollar Baby, Espresso, Please Please Please, Luther, Die With A Smile, Ordinary, Nokia, A Bar Song (Tipsy), Luther, Die With A Smile, Ordinary, Nokia, A Bar Song (Tipsy), Luther, Die With A Smile, Ordinary, Nokia, A Bar Song (Tipsy), Luther, Die With A Smile, Ordinary, Nokia, A Bar Song (Tipsy), Luther, Die With A Smile, Ordinary, Nokia, A Bar Song (Tipsy), Pink Pony Club, I'm The Problem, I Ain't Coming Back, Lose Control, Beautiful Things, Post Malone Featuring Morgan Wallen, 9, 1, 9, 9, Post Malone Featuring Morgan Wallen, NEW, 9, 1, NEW, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Alex Warren, Drake, Shaboozey, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Alex Warren, Drake, Shaboozey, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Alex Warren, Drake, Shaboozey, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Alex Warren, Drake, Shaboozey, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Alex Warren, Drake, Shaboozey, Chappell Roan, Morgan Wallen, NEW, 1, 88
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             All The Way, Anxiety, Birds Of A Feather, Just In Case, I Had Some Help, APT., Love Somebody, Mutt, Espresso, That's So True, Not Like Us, Messy, TV Off, 30 For 30, Sports Car, 65, 3, 7, 49, 5, 50, 27, 27, 12, 54, 27, 50, 21, 22, 18
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Abracadabra, Squabble Up, Stargazing, No One Noticed, Bad Dreams, I Never Lie, Timeless, Taste, Sorry I'm Here For Someone Else, Blue Strips, Worst Way, Good News, Sailor Song, Weren't For The Wind, Hard Fought Hallelujah, I'm A Little Crazy, Azizam, Wildflower, Cry For Me, Dark Thoughts, Residuals, Denial Is A River, DTMF, Indigo, Am I Okay?, 13, 12, 22, 49, 30, 21, 31, 30, 35, 8, 2, 15, 22, 36, 12, 10, 5, 3, 45, 12, 6, 33, 16, 16, 20
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Back To Friends, Me Jalo, Love Me Not, Smile, Twilight Zone, Somebody Loves Me, Undressed, Revolving Door, Baile Inolvidable, Rather Lie, The Giver, Peekaboo, BMF, Fix What You Didn't Break, Holy Smokes, Somethin' 'Bout A Woman, Te Queria Ver, Friends Like That, Loco, Texas, Happen To Me, Your Way's Better, Backseat Driver, Busy Woman, Are You Even Real, 33, 4, 10, 4, 17, 4, 10, 3, 9, 16, 6, 6, 22, 18, 5, 14, 5, 6, 5, 17, 6, -, 72, 73, 27
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Ojos Tristes, EOO, Steve's Lava Chicken, The Largest, No Pole, Feel It, Die Trying, Like Him, Morena, Haunted, Gimme A Hug, Headphones On, End Of The World, Whiskey Drink, Call Me When You Break Up, Forever To Me, Whim Whamiee, Chest Pain (I Love), Nuevayol, Party 4 U, Actin Up, Tattoo, House Again, like JENNIE, Caramel, 59, 56, 24, Jack Black, BigXthaPlug, Don Toliver, d4vd, PARTYNEXTDOOR, Drake & Yebba, Tyler, The Creator Featuring Lola Young, Neton Vega & Peso Pluma, Kane Brown With Jelly Roll, Drake, NEW, 1, 3, 2, 8, -, 92, 68, 8, Charli xcx, NEW, 1, 8
# Example 3: Track historical performance for specific artists (if you have data from multiple dates)
message("\n3. Historical chart data (if available):")
## 
## 3. Historical chart data (if available):
historical_data <- atlas_conn$aggregate('[
  { "$match": { "artist": { "$in": ["Taylor Swift", "Drake", "The Weeknd"] } } },
  { "$sort": { "chart_date": -1, "rank": 1 } },
  { "$group": {
      "_id": "$artist",
      "appearances": { "$sum": 1 },
      "best_rank": { "$min": "$rank" },
      "latest_songs": { "$push": { "title": "$title", "rank": "$rank", "date": "$chart_date" } }
    }
  }
]')
print(historical_data)
##     _id appearances best_rank
## 1 Drake           6         4
##                                                                                                  latest_songs
## 1 Nokia, Headphones On, Nokia, Nokia, Nokia, Nokia, 4, 87, 4, 4, 4, 4, 2025-05-06, 2025-05-06, NA, NA, NA, NA
# Example 4: Custom pipeline for analyzing data
message("\n4. Create a custom aggregation pipeline:")
## 
## 4. Create a custom aggregation pipeline:
# Define a complex aggregation pipeline as a JSON string
custom_pipeline <- '[
  { "$facet": {
      "top10artists": [
        { "$sort": { "rank": 1 } },
        { "$limit": 10 },
        { "$project": { "_id": 0, "artist": 1, "title": 1, "rank": 1 } }
      ],
      "chartStats": [
        { "$group": {
            "_id": null,
            "avgRank": { "$avg": "$rank" },
            "totalSongs": { "$sum": 1 },
            "uniqueArtists": { "$addToSet": "$artist" }
          }
        },
        { "$project": {
            "_id": 0,
            "avgRank": 1,
            "totalSongs": 1,
            "uniqueArtistCount": { "$size": "$uniqueArtists" }
          }
        }
      ]
    }
  }
]'

custom_results <- atlas_conn$aggregate(custom_pipeline)
print(custom_results)
##                                                                                                                                                                                                                                                                                                                                                                                                                   top10artists
## 1 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, Luther, Luther, Luther, I Had Some Help, I Had Some Help, Luther, Luther, Die With A Smile, Die With A Smile, Die With A Smile, Kendrick Lamar & SZA, Kendrick Lamar & SZA, Kendrick Lamar & SZA, Post Malone Featuring Morgan Wallen, Post Malone Featuring Morgan Wallen, Kendrick Lamar & SZA, Kendrick Lamar & SZA, Lady Gaga & Bruno Mars, Lady Gaga & Bruno Mars, Lady Gaga & Bruno Mars
##                      chartStats
## 1 39.53846, 130.00000, 58.00000