data(cars)
library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
btc_data <- fromJSON(url)
btc_df <- btc_data$Data$Data
max_close <- max(btc_df$close, na.rm = TRUE)
max_close
## [1] 124723
Music and Popularity
1 How do the characteristics of music influence its popularity?
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(broom)
spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)
spotify_clean <- spotify %>%
rename(artists = artist_name) %>%
mutate(
year = if ("year" %in% colnames(.)) year else NA_real_,
decade = floor(year / 10) * 10
) %>%
select(track_name, artists, genre, popularity, danceability, energy, valence,
loudness, tempo, acousticness, year, decade) %>%
filter(!is.na(popularity))
model <- lm(popularity ~ danceability + energy + valence + loudness + tempo + acousticness,
data = spotify_clean)
coeff_summary <- broom::tidy(model) %>%
filter(term != "(Intercept)") %>%
mutate(term = recode(term,
danceability = "Danceability",
energy = "Energy",
valence = "Valence (Happiness)",
loudness = "Loudness",
tempo = "Tempo (BPM)",
acousticness = "Acousticness"))
ggplot(coeff_summary, aes(x = reorder(term, estimate), y = estimate, fill = estimate > 0)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("TRUE" = "seagreen3", "FALSE" = "tomato")) +
labs(title = "Influence of Song Characteristics on Popularity",
x = "Song Feature",
y = "Effect Size (Regression Coefficient)") +
theme_minimal() +
theme(legend.position = "none")

According to the graph, the characteristics of music that positively
influence its popularity are danceability and loudness. Tempo (BPM) is
neutral. The valence (happiness), energy, and acousticness had negative
influences on the popularity of music.
2 Are happier songs more popular across different genres?
library(tidyverse)
spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)
spotify_clean <- spotify %>%
select(genre, valence, popularity) %>%
drop_na() %>%
mutate(
valence = as.numeric(valence),
popularity = as.numeric(popularity)
)
genre_stats <- spotify_clean %>%
group_by(genre) %>%
summarise(
avg_valence = mean(valence, na.rm = TRUE),
avg_popularity = mean(popularity, na.rm = TRUE)
)
ggplot(genre_stats, aes(x = avg_valence, y = avg_popularity)) +
geom_point(color = "steelblue", size = 3, alpha = 0.8) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(
title = "Relationship Between Genre Happiness and Popularity",
x = "Average Happiness (Valence)",
y = "Average Popularity"
) +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'

There seems to be little correlation between a songs happiness and
its popularity.
3 Do songs with more lyrics tend to be more or less popular?
library(tidyverse)
spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)
spotify_clean <- spotify %>%
select(genre, speechiness, popularity) %>%
drop_na() %>%
mutate(
speechiness = as.numeric(speechiness),
popularity = as.numeric(popularity)
)
# Correlation between speechiness and popularity
cor_speech_pop <- cor(spotify_clean$speechiness, spotify_clean$popularity)
print(paste("Correlation between speechiness and popularity:", round(cor_speech_pop, 3)))
## [1] "Correlation between speechiness and popularity: -0.151"
# Scatterplot with regression line
ggplot(spotify_clean, aes(x = speechiness, y = popularity)) +
geom_point(alpha = 0.3, color = "steelblue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(
title = "Do Songs with More Speech Tend to Be More or Less Popular?",
x = "Speechiness (0 = Musical, 1 = Spoken Word)",
y = "Popularity"
) +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'

The results show a slightly negative correlation between lyrics and
popularity. Songs with more lyrics are slightly less popular than those
without.
4 Which genres have produced the most popular songs overall?
library(tidyverse)
spotify <- read_csv("SpotifyFeatures.csv", show_col_types = FALSE)
genre_popularity <- spotify %>%
select(genre, popularity) %>%
drop_na() %>%
group_by(genre) %>%
summarise(
avg_popularity = mean(popularity, na.rm = TRUE),
song_count = n()
) %>%
arrange(desc(avg_popularity)) %>%
slice_head(n = 15) # top 15 genres
ggplot(genre_popularity, aes(x = reorder(genre, avg_popularity), y = avg_popularity, fill = song_count)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(
title = "Which Genres Have Produced the Most Popular Songs?",
x = "Genre",
y = "Average Popularity",
fill = "Number of Songs"
) +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))

The results, unsurprisingly, found that Pop music is the genre with
the most popular songs. This is followed by Rap and then Rock, with
Reggaeton in last.