Introduction

This document analyzes Spotify’s most-streamed songs, showcasing visual trends in streams, danceability, and other metrics.

Dataset

The data includes details on streams, danceability, release years, and more for popular Spotify tracks.

Findings

Data Processing

# Load the dataset
import pandas as pd
spotify_data = pd.read_csv('C:/Users/leoan/OneDrive/Desktop/Spotify Most Streamed Songs.csv')
spotify_data.head()
##                             track_name  ...                                          cover_url
## 0  Seven (feat. Latto) (Explicit Ver.)  ...                                          Not Found
## 1                                 LALA  ...  https://i.scdn.co/image/ab67616d0000b2730656d5...
## 2                              vampire  ...  https://i.scdn.co/image/ab67616d0000b273e85259...
## 3                         Cruel Summer  ...  https://i.scdn.co/image/ab67616d0000b273e787cf...
## 4                       WHERE SHE GOES  ...  https://i.scdn.co/image/ab67616d0000b273ab5c9c...
## 
## [5 rows x 25 columns]

Visualization 1

import matplotlib
matplotlib.use("Agg") 
import matplotlib.pyplot as plt

# Visualization code from notebook
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the CSV file
file_path = 'C:/Users/leoan/OneDrive/Desktop/Spotify Most Streamed Songs.csv'
spotify_data = pd.read_csv(file_path)

# Display the first few rows to verify successful import
spotify_data.head()
##                             track_name  ...                                          cover_url
## 0  Seven (feat. Latto) (Explicit Ver.)  ...                                          Not Found
## 1                                 LALA  ...  https://i.scdn.co/image/ab67616d0000b2730656d5...
## 2                              vampire  ...  https://i.scdn.co/image/ab67616d0000b273e85259...
## 3                         Cruel Summer  ...  https://i.scdn.co/image/ab67616d0000b273e787cf...
## 4                       WHERE SHE GOES  ...  https://i.scdn.co/image/ab67616d0000b273ab5c9c...
## 
## [5 rows x 25 columns]


# Save the visualization as an image
plt.savefig('visualization1.png', dpi=300, bbox_inches='tight')

Visualization 2

import matplotlib
matplotlib.use("Agg") 
import matplotlib.pyplot as plt

# Visualization code from notebook
# Filter the top 20 songs by streams
top_20_songs = spotify_data.nlargest(20, 'streams')
## TypeError: Column 'streams' has dtype object, cannot use method 'nlargest' with this dtype
# Scatterplot: Streams vs Danceability for Top 20 Songs with color by energy
plt.figure(figsize=(10, 6))
plt.scatter(top_20_songs['streams'] / 1e9, top_20_songs['danceability_%'], 
            c=top_20_songs['energy_%'], cmap='viridis', alpha=0.5)
## NameError: name 'top_20_songs' is not defined
plt.colorbar(label='Energy (%)')  # Adding color bar for energy levels
## RuntimeError: No mappable was found to use for colorbar creation. First define a mappable such as an image (with imshow) or a contour set (with contourf).
plt.title('Scatterplot: Top 20 Songs - Streams vs Danceability')
plt.xlabel('Streams (Billions)')
plt.ylabel('Danceability (%)')
plt.grid(True)
plt.show()




# Save the visualization as an image
plt.savefig('visualization2.png', dpi=300, bbox_inches='tight')

Visualization 3

import matplotlib
matplotlib.use("Agg")  # Use non-interactive backend
import matplotlib.pyplot as plt

# Visualization code from notebook


# Filter the top 10 songs by streams
top_10_songs = spotify_data.nlargest(10, 'streams')
## TypeError: Column 'streams' has dtype object, cannot use method 'nlargest' with this dtype
# Bar Chart: Top 10 Songs by Streams with lime green color
plt.figure(figsize=(10, 6))
plt.barh(top_10_songs['track_name'], top_10_songs['streams'] / 1e9, color='limegreen')
## NameError: name 'top_10_songs' is not defined
plt.title('Top 10 Songs by Streams')
plt.xlabel('Streams (Billions)')
plt.ylabel('Track Name')
plt.gca().invert_yaxis()  # To display the highest stream at the top
plt.grid(True, axis='x')

# Adding labels to the end of each bar
for index, value in enumerate(top_10_songs['streams'] / 1e9):
    plt.text(value, index, f"{value:.2f}", va='center')
## NameError: name 'top_10_songs' is not defined
plt.show()



# Save the visualization as an image
plt.savefig('visualization3.png', dpi=300, bbox_inches='tight')

Visualization 4

import matplotlib
matplotlib.use("Agg")  # Use non-interactive backend
import matplotlib.pyplot as plt

# Visualization code from notebook
import matplotlib.pyplot as plt
import pandas as pd


# Ensure columns are numeric
spotify_data['streams'] = pd.to_numeric(spotify_data['streams'], errors='coerce')
spotify_data['danceability_%'] = pd.to_numeric(spotify_data['danceability_%'], errors='coerce')
spotify_data['energy_%'] = pd.to_numeric(spotify_data['energy_%'], errors='coerce')

# Filter the top 10 songs by streams
top_10_songs = spotify_data.nlargest(10, 'streams')

# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 6))

# Set the positions for each group of bars
x = np.arange(len(top_10_songs['track_name']))
width = 0.35 

# Plot danceability and energy side-by-side
bars1 = ax.bar(x - width/2, top_10_songs['danceability_%'], width, label='Danceability (%)', color='limegreen')
bars2 = ax.bar(x + width/2, top_10_songs['energy_%'], width, label='Energy (%)', color='skyblue')

# Add labels inside the bars
for bar in bars1:
    ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() - 5, f"{bar.get_height():.1f}%", 
            ha='center', va='top', color='white', fontsize=10)
for bar in bars2:
    ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() - 5, f"{bar.get_height():.1f}%", 
            ha='center', va='top', color='white', fontsize=10)

# Customize the plot
ax.set_xlabel('Track Name')
ax.set_ylabel('Percentage')
ax.set_title('Top 10 Songs: Danceability and Energy (%) Side-by-Side')
ax.set_xticks(x)
ax.set_xticklabels(top_10_songs['track_name'], rotation=45, ha='right')

# Add a legend
ax.legend()

# Show the plot
plt.tight_layout()
plt.show()



# Save the visualization as an image
plt.savefig('visualization4.png', dpi=300, bbox_inches='tight')

Visualization 5

import matplotlib
matplotlib.use("Agg")  # Use non-interactive backend
import matplotlib.pyplot as plt

# Visualization code from notebook

# Ensure that 'streams' and 'danceability_%' columns are numeric
spotify_data['danceability_%'] = pd.to_numeric(spotify_data['danceability_%'], errors='coerce')

# Filter the top 25 songs by streams
top_25_songs = spotify_data.nlargest(25, 'streams')

# Create the figure and axis
fig, ax1 = plt.subplots(figsize=(12, 6))

# Line plot for streams, converting to billions
ax1.plot(top_25_songs['track_name'], top_25_songs['streams'] / 1e9, color='gold', marker='o', label='Streams')
ax1.set_xlabel('Track Name')
ax1.set_ylabel('Streams (Billions)', color='gold')
ax1.tick_params(axis='y', labelcolor='gold')

# Set ticks and labels for the x-axis
ax1.set_xticks(range(len(top_25_songs['track_name'])))
ax1.set_xticklabels(top_25_songs['track_name'], rotation=45, ha='right')

# Create a second y-axis sharing the same x-axis
ax2 = ax1.twinx()
ax2.plot(top_25_songs['track_name'], top_25_songs['danceability_%'], color='blue', marker='o', label='Danceability (%)')
ax2.set_ylabel('Danceability (%)', color='blue')
ax2.tick_params(axis='y', labelcolor='blue')

# Add a title
plt.title('Top 25 Songs: Streams (Billions) vs Danceability (%)')

# Show the plot
plt.tight_layout()
plt.show()



# Save the visualization as an image
plt.savefig('visualization5.png', dpi=300, bbox_inches='tight')

Visualization 6

import matplotlib
matplotlib.use("Agg")  # Use non-interactive backend
import matplotlib.pyplot as plt

# Visualization code from notebook
# Filter the top 10 songs by streams
top_10_songs = spotify_data.nlargest(10, 'streams')

# Calculate total streams for the top 10 songs
total_streams = top_10_songs['streams'].sum()

# Create a donut chart for streams
fig, ax = plt.subplots(figsize=(8, 8))

# Data for the chart
labels = top_10_songs['track_name']
sizes = top_10_songs['streams']

# Function to display both percentage and stream count
def autopct_with_counts(pct, sizes):
    absolute = int(round(pct/100.*sum(sizes)))
    return f"{pct:.1f}%\n({absolute/1e6:.1f}M)"

# Create a pie chart
ax.pie(sizes, labels=labels, autopct=lambda pct: autopct_with_counts(pct, sizes), 
       startangle=90, wedgeprops={'edgecolor': 'white'}, colors=plt.cm.Paired.colors)
## ([<matplotlib.patches.Wedge object at 0x000001251368B860>, <matplotlib.patches.Wedge object at 0x0000012513616A20>, <matplotlib.patches.Wedge object at 0x000001251369BE00>, <matplotlib.patches.Wedge object at 0x0000012513698EF0>, <matplotlib.patches.Wedge object at 0x000001251369A810>, <matplotlib.patches.Wedge object at 0x000001251369B170>, <matplotlib.patches.Wedge object at 0x000001251369B9E0>, <matplotlib.patches.Wedge object at 0x00000125136D4530>, <matplotlib.patches.Wedge object at 0x00000125136D4B00>, <matplotlib.patches.Wedge object at 0x00000125136D5190>], [Text(-0.43023059961185184, 1.0123742544917007, 'Blinding Lights'), Text(-1.021289608288332, 0.40861661248934233, 'Shape of You'), Text(-1.0444473794024767, -0.34515166472045195, 'Someone You Loved'), Text(-0.6459022115927794, -0.8903989740894565, 'Dance Monkey'), Text(-0.013316800542019355, -1.0999193892387407, 'Sunflower - Spider-Man: Into the Spider-Verse'), Text(0.6092468892691175, -0.9158702025483217, 'One Dance'), Text(1.013086009951762, -0.42855190635443285, 'STAY (with Justin Bieber)'), Text(1.0840654863321506, 0.18655299875219908, 'Believer'), Text(0.8175004285127022, 0.7359979955010397, 'Closer'), Text(0.3022380266807345, 1.0576635453811083, 'Starboy')], [Text(-0.23467123615191915, 0.5522041388136548, '12.8%\n(3703.9M)'), Text(-0.5570670590663629, 0.22288178863055033, '12.3%\n(3562.5M)'), Text(-0.569698570583169, -0.18826454439297377, '10.0%\n(2887.2M)'), Text(-0.3523102972324251, -0.48567216768515803, '9.9%\n(2864.8M)'), Text(-0.007263709386556011, -0.5999560304938585, '9.7%\n(2808.1M)'), Text(0.33231648505588224, -0.49956556502635724, '9.4%\n(2713.9M)'), Text(0.5525923690645974, -0.23375558528423607, '9.2%\n(2665.3M)'), Text(0.5913084470902639, 0.10175618113756311, '9.0%\n(2594.0M)'), Text(0.4459093246432921, 0.40145345209147615, '8.9%\n(2591.2M)'), Text(0.1648571054622188, 0.5769073883896954, '8.9%\n(2565.5M)')])
# Add a circle in the center to make it a donut chart
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig.gca().add_artist(centre_circle)

# Add the total streams text in the center
ax.text(0, 0, f'Total\n{total_streams / 1e9:.2f}B', ha='center', va='center', fontsize=14, fontweight='bold')

# Equal aspect ratio ensures that the pie is drawn as a circle.
ax.axis('equal')
## (-1.099999997255285, 1.0999999443185973, -1.0999999996362797, 1.09999999998268)
# Add a title
plt.title('Top 10 Most-Streamed Songs (Donut Chart)')

# Show the plot
plt.tight_layout()
plt.show()



# Save the visualization as an image
plt.savefig('visualization6.png', dpi=300, bbox_inches='tight')

Conclusion

This analysis highlights the most-streamed songs on Spotify, offering insights into their popularity and characteristics. Visual trends showcase the correlation between song attributes like danceability and streaming counts.