data(cars)

library(jsonlite)


url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"

btc_data <- fromJSON(url)

btc_df <- btc_data$Data$Data

max_close <- max(btc_df$close, na.rm = TRUE)

max_close

## [1] 124723

Music and Popularity

1 How do the characteristics of music influence its popularity?

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(broom)

spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)

spotify_clean <- spotify %>%
  rename(artists = artist_name) %>%
  mutate(
    year = if ("year" %in% colnames(.)) year else NA_real_,
    decade = floor(year / 10) * 10
  ) %>%
  select(track_name, artists, genre, popularity, danceability, energy, valence,
         loudness, tempo, acousticness, year, decade) %>%
  filter(!is.na(popularity))

model <- lm(popularity ~ danceability + energy + valence + loudness + tempo + acousticness,
            data = spotify_clean)

coeff_summary <- broom::tidy(model) %>%
  filter(term != "(Intercept)") %>%
  mutate(term = recode(term,
                       danceability = "Danceability",
                       energy = "Energy",
                       valence = "Valence (Happiness)",
                       loudness = "Loudness",
                       tempo = "Tempo (BPM)",
                       acousticness = "Acousticness"))

ggplot(coeff_summary, aes(x = reorder(term, estimate), y = estimate, fill = estimate > 0)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("TRUE" = "seagreen3", "FALSE" = "tomato")) +
  labs(title = "Influence of Song Characteristics on Popularity",
       x = "Song Feature",
       y = "Effect Size (Regression Coefficient)") +
  theme_minimal() +
  theme(legend.position = "none")

According to the graph, the characteristics of music that positively influence its popularity are danceability and loudness. Tempo (BPM) is neutral. The valence (happiness), energy, and acousticness had negative influences on the popularity of music.

2 Are happier songs more popular across different genres?

library(tidyverse)

spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)

spotify_clean <- spotify %>%
  select(genre, valence, popularity) %>%
  drop_na() %>%
  mutate(
    valence = as.numeric(valence),
    popularity = as.numeric(popularity)
  )

genre_stats <- spotify_clean %>%
  group_by(genre) %>%
  summarise(
    avg_valence = mean(valence, na.rm = TRUE),
    avg_popularity = mean(popularity, na.rm = TRUE)
  )

ggplot(genre_stats, aes(x = avg_valence, y = avg_popularity)) +
  geom_point(color = "steelblue", size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(
    title = "Relationship Between Genre Happiness and Popularity",
    x = "Average Happiness (Valence)",
    y = "Average Popularity"
  ) +
  theme_minimal(base_size = 13)

## `geom_smooth()` using formula = 'y ~ x'

There seems to be little correlation between a songs happiness and its popularity.

3 Do songs with more lyrics tend to be more or less popular?

library(tidyverse)

spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)

spotify_clean <- spotify %>%
  select(genre, speechiness, popularity) %>%
  drop_na() %>%
  mutate(
    speechiness = as.numeric(speechiness),
    popularity = as.numeric(popularity)
  )

# Correlation between speechiness and popularity
cor_speech_pop <- cor(spotify_clean$speechiness, spotify_clean$popularity)
print(paste("Correlation between speechiness and popularity:", round(cor_speech_pop, 3)))

## [1] "Correlation between speechiness and popularity: -0.151"

# Scatterplot with regression line
ggplot(spotify_clean, aes(x = speechiness, y = popularity)) +
  geom_point(alpha = 0.3, color = "steelblue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(
    title = "Do Songs with More Speech Tend to Be More or Less Popular?",
    x = "Speechiness (0 = Musical, 1 = Spoken Word)",
    y = "Popularity"
  ) +
  theme_minimal(base_size = 13)

## `geom_smooth()` using formula = 'y ~ x'

The results show a slightly negative correlation between lyrics and popularity. Songs with more lyrics are slightly less popular than those without.

4 Which genres have produced the most popular songs overall?

library(tidyverse)

spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)

genre_popularity <- spotify %>%
  select(genre, popularity) %>%
  drop_na() %>%
  group_by(genre) %>%
  summarise(
    avg_popularity = mean(popularity, na.rm = TRUE),
    song_count = n()
  ) %>%
  arrange(desc(avg_popularity)) %>%
  slice_head(n = 15)  # top 15 genres

ggplot(genre_popularity, aes(x = reorder(genre, avg_popularity), y = avg_popularity, fill = song_count)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(
    title = "Which Genres Have Produced the Most Popular Songs?",
    x = "Genre",
    y = "Average Popularity",
    fill = "Number of Songs"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

The results, unsurprisingly, found that Pop music is the genre with the most popular songs. This is followed by Rap and then Rock, with Reggaeton in last.

Mini Project

2025-10-16

Music and Popularity

1 How do the characteristics of music influence its popularity?

According to the graph, the characteristics of music that positively influence its popularity are danceability and loudness. Tempo (BPM) is neutral. The valence (happiness), energy, and acousticness had negative influences on the popularity of music.

2 Are happier songs more popular across different genres?

There seems to be little correlation between a songs happiness and its popularity.

3 Do songs with more lyrics tend to be more or less popular?

The results show a slightly negative correlation between lyrics and popularity. Songs with more lyrics are slightly less popular than those without.

4 Which genres have produced the most popular songs overall?

The results, unsurprisingly, found that Pop music is the genre with the most popular songs. This is followed by Rap and then Rock, with Reggaeton in last.