import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'c:/ProgramData/Anaconda3/Library/plugins/platforms'
Here is the setup of my data set to better understand my visualizations. This dataset includes the Top 1,000 Highest Grossing Movies of all time, ranked by the total global revenue the movie made from the box office. Although it specifies 1000 movies, there are 918 unique values included within the dataset.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
path = "//apporto.com/dfs/LOYOLA/Users/cwsciotto_loyola/Documents/Python/"
filename = "HighestGrossingMovies.csv"
df = pd.read_csv(path + filename, nrows=3, skiprows=0)
del df['Unnamed: 0']
pd.set_option('display.max_columns', None)
print(df)
## Title \
## 0 Star Wars: Episode VII - The Force Awakens (2015)
## 1 Avengers: Endgame (2019)
## 2 Avatar (2009)
##
## Movie Info \
## 0 As a new threat to the galaxy rises, Rey, a de...
## 1 After the devastating events of Avengers: Infi...
## 2 A paraplegic Marine dispatched to the moon Pan...
##
## Distributor Release Date \
## 0 Walt Disney Studios Motion Pictures December 16, 2015
## 1 Walt Disney Studios Motion Pictures April 24, 2019
## 2 Twentieth Century Fox December 16, 2009
##
## Domestic Sales (in $) International Sales (in $) World Sales (in $) \
## 0 936662225 1132859475 2069521700
## 1 858373000 1939128328 2797501328
## 2 760507625 2086738578 2847246203
##
## Genre Movie Runtime License
## 0 ['Action', 'Adventure', 'Sci-Fi'] 2 hr 18 min PG-13
## 1 ['Action', 'Adventure', 'Drama', 'Sci-Fi'] 3 hr 1 min PG-13
## 2 ['Action', 'Adventure', 'Fantasy', 'Sci-Fi'] 2 hr 42 min PG-13
When exploring this dataset, I first noticed that there was a lot of columns that would be great for grouping. Some of those including: Release Date (by parsing using datetime), Genre, Distributor, and possibly the Duration of the film. My main goal for this project was to be able to visualize when movies made the most revenue, grouped by several different columns to see the patterns & trends of high revenue in movies.
Plot 1 - Scatterplot
For my first visualization, I wanted to see how much revenue these movies, grouped by the month of the year. To make this happen I used datetime to parse the month out of each of the movie’s release date and create a new ‘Month Released’ column. I also separated the Global Revenue into levels to be able to create counts for the amount of movies that fell into each level.
# Plot 1 Setup - Scatterplot
df = pd.read_csv(path + filename, usecols= ['Release Date', 'World Sales (in $)'])
df = df[df['Release Date'].notna() & df['World Sales (in $)'].notna()]
# Convert to datetime object
df['Release Date'] = pd.to_datetime(df['Release Date'], format='%B %d, %Y')
# New column for Month
df['Month Released'] = df['Release Date'].dt.month
df['World Sales (in $)'] = pd.to_numeric(df['World Sales (in $)'])
df = pd.read_csv(path + filename, usecols= ['Release Date', 'World Sales (in $)'])
df = df[df['Release Date'].notna() & df['World Sales (in $)'].notna()]
# Convert to datetime object
df['Release Date'] = pd.to_datetime(df['Release Date'], format='%B %d, %Y')
# New column for Month
df['Month Released'] = df['Release Date'].dt.month
df['World Sales (in $)'] = pd.to_numeric(df['World Sales (in $)'])
# Create levels for World Sales on y-axis
conditions = [
(df['World Sales (in $)'] <= 250000000),
(df['World Sales (in $)'] <= 500000000),
(df['World Sales (in $)'] <= 750000000),
(df['World Sales (in $)'] <= 1000000000),
(df['World Sales (in $)'] <= 1250000000),
(df['World Sales (in $)'] <= 1500000000),
(df['World Sales (in $)'] <= 1750000000),
(df['World Sales (in $)'] <= 2000000000),
(df['World Sales (in $)'] <= 2250000000),
(df['World Sales (in $)'] <= 2500000000),
(df['World Sales (in $)'] <= 2750000000),
(df['World Sales (in $)'] <= 3000000000),
]
values = [
250000000,
500000000,
750000000,
1000000000,
1250000000,
1500000000,
1750000000,
2000000000,
2250000000,
2500000000,
2750000000,
3000000000
]
df['World Sales (Levels)'] = np.select(conditions, values)
x = df.groupby(['Month Released', 'World Sales (Levels)'])['Month Released'].count().reset_index(name='count')
x = pd.DataFrame(x)
x2 = x.groupby(['World Sales (Levels)', 'Month Released'])['count'].sum().reset_index()
x2 = pd.DataFrame(x2)
plt.figure();
plt.scatter(x2['Month Released'], x2['World Sales (Levels)'], marker='8',
cmap='plasma', c=x2['count'], s=x2['count']*6, edgecolors='black');
plt.title('Highest Grossing Movies by Month Released', pad=20);
plt.xlabel('Months of the Year', fontsize=14);
plt.ylabel('Global Sales ($ in billions)');
cbar = plt.colorbar();
cbar.set_label('Number of Movies', rotation=270, fontsize=14, color='black', labelpad=20);
colorbar_ticks = [*range(0, int(x2['count'].max()), 5)];
cbar.set_ticks(colorbar_ticks);
plt.xticks(range(x2['Month Released'].min(), x2['Month Released'].max()+1), fontsize=14, color='black');
plt.yticks(range(250000000, 3000000000+1, 250000000), fontsize=14, color='black');
plt.show()
It’s clear that based on my scatterplot that there seems to be a high number of movies that are within the Top 1000 Highest Grossing Movies that are released around June and also in December.
Plot 2 - Stacked Bar Horizontal
For my second plot I wanted to group by the Distributor of the movies, to visualize which Distributor has created the largest share of movies within the Top 1000 Highest Grossing. I also wanted to be able to see the count of movies by decade. To do this I created a new column called ‘Decade’ and grouped each movie into a separate decade based on when they were released. I chose a horizontal plot to be able to see all of the long distributor names horizontal rather than trying to fit squeeze them in vertically on the x-axis
df = pd.read_csv(path + filename, usecols=['Title', 'Distributor'])
df['Year Released'] = df['Title'].str.extract("\((.*)\)")
df['Year Released'] = pd.to_numeric(df['Year Released'])
stack_conditions = [
((df['Year Released'] >= 1930) & (df['Year Released'] < 1940)),
((df['Year Released'] >= 1940) & (df['Year Released'] < 1950)),
((df['Year Released'] >= 1950) & (df['Year Released'] < 1960)),
((df['Year Released'] >= 1960) & (df['Year Released'] < 1970)),
((df['Year Released'] >= 1970) & (df['Year Released'] < 1980)),
((df['Year Released'] >= 1980) & (df['Year Released'] < 1990)),
((df['Year Released'] >= 1990) & (df['Year Released'] < 2000)),
((df['Year Released'] >= 2000) & (df['Year Released'] < 2010)),
((df['Year Released'] >= 2010) & (df['Year Released'] < 2020)),
((df['Year Released'] >= 2020) & (df['Year Released'] < 2030))
]
stack_values = [
1930,
1940,
1950,
1960,
1970,
1980,
1990,
2000,
2010,
2020,
]
df['Decade'] = np.select(stack_conditions, stack_values)
dist_count = df.groupby(['Distributor']).agg({'Distributor':['count']}).reset_index()
dist_count.columns = ['Distributor', 'Count']
dist_count = pd.DataFrame(dist_count)
dist_count = dist_count.sort_values('Count', ascending=True)
distributors = dist_count['Distributor'].to_list()
stacked_df = df.groupby(['Distributor', 'Decade'])['Distributor'].count().reset_index(name="Count of Dist")
df_sorted = stacked_df.sort_values('Count of Dist', ascending=False)
df_sorted.reset_index(inplace=True, drop=True)
df_sorted = df_sorted.pivot(index='Distributor', columns='Decade', values='Count of Dist').reindex(distributors, axis='rows')
fig = plt.figure(figsize=(14,6));
ax = fig.add_subplot(1, 1, 1);
df_sorted.plot(kind='barh', stacked=True, ax=ax);
# for row_counter, value_at_row_counter in enumerate(df_sorted['Count of Dist']):
# ax.text(value_at_row_counter+2, row_counter, str(value_at_row_counter), color='black', size=8, fontweight='bold')
# plt.xlim(0, df_sorted['Count of Dist'].max()*1.1)
plt.xlabel('Count of Movies', fontsize=18, labelpad=5);
plt.title('Count of Movies by Distributor and Decade \n Stacked Bar Plot', fontsize=24, pad=10);
plt.yticks(rotation=0, verticalalignment= 'center');
ax.set_ylabel('Distributor Name', fontsize=18);
legend_labels = plt.gca().get_legend_handles_labels()[1];
new_leg_labels = []
for item in legend_labels:
new_item = item + "s"
# print(new_item)
new_leg_labels.append(new_item)
plt.legend(new_leg_labels);
plt.show()
Plot 3 - Donut Plot
For my donut plot I thought it would be best to show global revenue by quarter to visualize which quarters of the calendar year have the largest percentage of high grossing movies released during them. When splitting the inner circle into the separate months of the year, I was not surprised to see that quarters 2 and 4 had the highest percentage, as June and December fell into these, which checks out from my first visualization.
df3 = pd.read_csv(path + filename, usecols=['Title', 'Release Date', 'World Sales (in $)'])
df3['Release Date'] = pd.to_datetime(df3['Release Date'], format='%B %d, %Y')
df3['Quarter'] = 'Quarter ' + df3['Release Date'].dt.quarter.astype('str')
df3['Month'] = df3['Release Date'].dt.month
df3['MonthNameAbbr'] = df3['Release Date'].dt.strftime('%b')
df3['World Sales (in $)'] = pd.to_numeric(df3['World Sales (in $)'])
donut_df = df3.groupby(['Quarter', 'MonthNameAbbr','Month'])['World Sales (in $)'].sum().reset_index(name='Total Global Sales')
donut_df.sort_values(by=['Month'], inplace=True)
donut_df.reset_index(inplace=True, drop=True)
del donut_df['Month']
# Color reference set up
num_out_colors = len(donut_df['Quarter'].unique())
out_color_ref_num = np.arange(num_out_colors)*4
num_in_colors = len(donut_df['MonthNameAbbr'].unique())
all_color_ref_num = np.arange(num_out_colors + num_in_colors)
in_color_ref_num = []
for each in all_color_ref_num:
if each not in out_color_ref_num:
in_color_ref_num.append(each)
# print(in_color_ref_num)
# print(out_color_ref_num)
# Pie chart
fig = plt.figure(figsize=(10,10));
ax = fig.add_subplot(1, 1, 1);
colormap = plt.get_cmap('tab20c');
out_colors = colormap(out_color_ref_num)
donut_df
## Quarter MonthNameAbbr Total Global Sales
## 0 Quarter 1.0 Jan 5287296730
## 1 Quarter 1.0 Feb 11260832653
## 2 Quarter 1.0 Mar 22169992002
## 3 Quarter 2.0 Apr 21252169673
## 4 Quarter 2.0 May 46246645644
## 5 Quarter 2.0 Jun 53318848335
## 6 Quarter 3.0 Jul 40063199122
## 7 Quarter 3.0 Aug 12804437681
## 8 Quarter 3.0 Sep 9526481345
## 9 Quarter 4.0 Oct 18645872414
## 10 Quarter 4.0 Nov 34871622684
## 11 Quarter 4.0 Dec 44607288309
total_revenue = donut_df['Total Global Sales'].sum()
donut_df.groupby(['Quarter'])['Total Global Sales'].sum().plot(
kind='pie', radius=1, colors = out_colors, pctdistance=.85, labeldistance=1.1,
wedgeprops = dict(edgecolor='white'), textprops={'fontsize':16},
autopct= lambda r: '{:.2f}%\n(${:.1f}B)'.format(r,(r/100)*total_revenue/1e+9),
startangle=90);
in_colors = colormap(in_color_ref_num)
donut_df['Total Global Sales'].plot(
kind='pie', radius=0.7, colors = in_colors, pctdistance=.76, labeldistance=.89,
wedgeprops = dict(edgecolor='White'), textprops={'fontsize':12},
labels = donut_df['MonthNameAbbr'],
autopct= '%1.2f%%',
startangle=90);
hole = plt.Circle((0,0), 0.3, fc="white");
fig1 = plt.gcf();
fig1.gca().add_artist(hole);
ax.yaxis.set_visible(False);
plt.title("Total Global Revenue by Quarter and Month");
ax.text(0,0, 'Total Revenue\n' + '$' + str(round(total_revenue/1e9,2)) + "B", ha='center', va='center', fontsize=18);
ax.axis('equal');
plt.tight_layout();
plt.show()
Plot 4 - Bump Chart
My Bump Chart depicts the ranking of the total global revenue for each month by each decade. Months in Decades where little to no movies showed as 0 Billion dollars grossed since there was not enough data for these months. There is clearly a trend of increasing global revenue as decades pass, with a sharp decrease in the ranks of movies in 2020, considering the drop in movie production due to the pandemic, and that this decade has just begun.
df = pd.read_csv(path + filename, usecols=['Title', 'Release Date', 'World Sales (in $)'])
df['Year Released'] = df['Title'].str.extract("\((.*)\)")
df['Year Released'] = pd.to_numeric(df['Year Released'])
df['Release Date'] = pd.to_datetime(df['Release Date'], format='%B %d, %Y')
df['MonthNameAbbr'] = df['Release Date'].dt.strftime('%b')
stack_conditions = [
((df['Year Released'] >= 1930) & (df['Year Released'] < 1940)),
((df['Year Released'] >= 1940) & (df['Year Released'] < 1950)),
((df['Year Released'] >= 1950) & (df['Year Released'] < 1960)),
((df['Year Released'] >= 1960) & (df['Year Released'] < 1970)),
((df['Year Released'] >= 1970) & (df['Year Released'] < 1980)),
((df['Year Released'] >= 1980) & (df['Year Released'] < 1990)),
((df['Year Released'] >= 1990) & (df['Year Released'] < 2000)),
((df['Year Released'] >= 2000) & (df['Year Released'] < 2010)),
((df['Year Released'] >= 2010) & (df['Year Released'] < 2020)),
((df['Year Released'] >= 2020) & (df['Year Released'] < 2030))
]
stack_values = [
1930,
1940,
1950,
1960,
1970,
1980,
1990,
2000,
2010,
2020,
]
df['Decade'] = np.select(stack_conditions, stack_values)
# bump_df = df.groupby(['Decade', 'MonthNameAbbr']).agg({'World Sales (in $)':['sum']}).reset_index()
bump_df = df.groupby(['Decade', 'MonthNameAbbr'])['World Sales (in $)'].sum().reset_index(name='TotalWorldRev')
# print(bump_df)
bump_df = bump_df.pivot(index='Decade', columns='MonthNameAbbr', values = 'TotalWorldRev')
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
bump_df = bump_df.reindex(columns=month_order)
bump_df = bump_df.fillna(0)
bump_df_ranked = bump_df.rank(0, ascending=False, method='min')
bump_df_ranked = bump_df_ranked.T
fig = plt.figure(figsize=(18,10));
ax = fig.add_subplot(1,1,1);
bump_df_ranked.plot(kind='line', ax=ax, marker='o', markeredgewidth=1,
linewidth=6, markersize=45, markerfacecolor='white')
ax.invert_yaxis();
num_rows = bump_df_ranked.shape[0]
num_cols = bump_df_ranked.shape[1]
plt.ylabel('Monthly Ranking', fontsize=18, labelpad=10);
plt.title('Ranking of Total World Revenue by Month and Decade \n Bump Chart', fontsize=18, pad=15);
plt.xticks(np.arange(num_rows), month_order, fontsize=14);
plt.yticks(range(1, num_cols+1, 1), fontsize=14);
ax.set_xlabel('Month', fontsize=18);
handles, labels = ax.get_legend_handles_labels()
handles = [handles[5], handles[4], handles[3], handles[2], handles[1], handles[0]]
labels = [labels[5], labels[4], labels[3], labels[2], labels[1], labels[0]]
ax.legend(handles, labels, fontsize=14,
labelspacing=0.8,
markerscale=0.4,
borderpad=0.5,
handletextpad=0.5);
i = 0
j = 0
for eachcol in bump_df_ranked.columns:
for eachrow in bump_df_ranked.index:
this_rank = bump_df_ranked.iloc[i, j]
ax.text(i, this_rank, '$' + str(round(bump_df.iloc[j, i]/1e9,1)) + 'B', ha='center', va='center', fontsize=11.5);
i += 1
j += 1
i = 0
plt.show()
Plot 5 - HeatMap
My Final Visualization shows the Global Revenue of movies grouped by year and also by Genre. This visualization was my most difficult to create, as the Genre column of my original dataframe contained lists of genres classifying each movie. To maneuver my way around this, I created a large amount of separate columns for each genre represented within the df, and counted the occurrence of each genre within the top movies. I then was able to separate the revenue for all of the movies in each of the genres, and also group this by year for the last 11 years.
Based on this information, it is clear that movies that are included within the genre ‘Adventure’ have proven to produce the most global revenue in the last 11 years, with ‘Action’ in close second.
df = pd.read_csv(path + filename, usecols=['Title', 'Genre', 'Release Date', 'World Sales (in $)'])
df['Release Date'] = pd.to_datetime(df['Release Date'], format='%B %d, %Y')
df['Year'] = df['Title'].str.extract("\((.*)\)")
df['Year'] = pd.to_numeric(df['Year'])
# for i, l in enumerate(df['Genre']):
# print("list",i,"is", type(1))
df['Genre'] = df['Genre'].apply(eval)
genre_dict = {}
for i in df['Genre']:
for j in i:
if j not in genre_dict:
genre_dict[j] = 1
else:
genre_dict[j] += 1
def to_1D(series):
return pd.Series([x for _list in series for x in _list])
adventure = []
for i in df['Genre']:
if "Adventure" in i:
adventure.append(1)
else:
adventure.append(0)
df['Adventure'] = adventure
new_df = df.groupby(['Adventure', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df = new_df[new_df['Adventure'] != 0]
new_df = new_df[new_df['Year'] >= 2010]
new_df['Adventure'] = np.where(new_df['Adventure'], 'Adventure', 1)
new_df.rename(columns={'Adventure':'Genre'}, inplace=True)
comedy = []
for i in df['Genre']:
if "Comedy" in i:
comedy.append(1)
else:
comedy.append(0)
df['Comedy'] = comedy
new_df2 = df.groupby(['Comedy', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df2 = new_df2[new_df2['Comedy'] != 0]
new_df2 = new_df2[new_df2['Year'] >= 2010]
new_df2['Comedy'] = np.where(new_df2['Comedy'], 'Comedy', 1)
new_df2.rename(columns={'Comedy':'Genre'}, inplace=True)
action = []
for i in df['Genre']:
if "Action" in i:
action.append(1)
else:
action.append(0)
df['Action'] = action
new_df3 = df.groupby(['Action', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df3 = new_df3[new_df3['Action'] != 0]
new_df3 = new_df3[new_df3['Year'] >= 2010]
new_df3['Action'] = np.where(new_df3['Action'], 'Action', 1)
new_df3.rename(columns={'Action':'Genre'}, inplace=True)
drama = []
for i in df['Genre']:
if "Drama" in i:
drama.append(1)
else:
drama.append(0)
df['Drama'] = drama
new_df4 = df.groupby(['Drama', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df4 = new_df4[new_df4['Drama'] != 0]
new_df4 = new_df4[new_df4['Year'] >= 2010]
new_df4['Drama'] = np.where(new_df4['Drama'], 'Drama', 1)
new_df4.rename(columns={'Drama':'Genre'}, inplace=True)
thriller = []
for i in df['Genre']:
if "Thriller" in i:
thriller.append(1)
else:
thriller.append(0)
df['Thriller'] = thriller
new_df5 = df.groupby(['Thriller', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df5 = new_df5[new_df5['Thriller'] != 0]
new_df5 = new_df5[new_df5['Year'] >= 2010]
new_df5['Thriller'] = np.where(new_df5['Thriller'], 'Thriller', 1)
new_df5.rename(columns={'Thriller':'Genre'}, inplace=True)
sci_fi = []
for i in df['Genre']:
if "SciFi" in i:
sci_fi.append(1)
else:
sci_fi.append(0)
df['SciFi'] = sci_fi
new_df6 = df.groupby(['SciFi', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df6 = new_df6[new_df6['SciFi'] != 0]
new_df6 = new_df6[new_df6['Year'] >= 2010]
new_df6['SciFi'] = np.where(new_df6['SciFi'], 'SciFi', 1)
new_df6.rename(columns={'SciFi':'Genre'}, inplace=True)
family = []
for i in df['Genre']:
if "Family" in i:
family.append(1)
else:
family.append(0)
df['Family'] = family
new_df7 = df.groupby(['Family', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df7 = new_df7[new_df7['Family'] != 0]
new_df7 = new_df7[new_df7['Year'] >= 2010]
new_df7['Family'] = np.where(new_df7['Family'], 'Family', 1)
new_df7.rename(columns={'Family':'Genre'}, inplace=True)
fantasy = []
for i in df['Genre']:
if "Fantasy" in i:
fantasy.append(1)
else:
fantasy.append(0)
df['Fantasy'] = fantasy
new_df8 = df.groupby(['Fantasy', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df8 = new_df8[new_df8['Fantasy'] != 0]
new_df8 = new_df8[new_df8['Year'] >= 2010]
new_df8['Fantasy'] = np.where(new_df8['Fantasy'], 'Fantasy', 1)
new_df8.rename(columns={'Fantasy':'Genre'}, inplace=True)
romance = []
for i in df['Genre']:
if "Romance" in i:
romance.append(1)
else:
romance.append(0)
df['Romance'] = romance
new_df9 = df.groupby(['Romance', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df9 = new_df9[new_df9['Romance'] != 0]
new_df9 = new_df9[new_df9['Year'] >= 2010]
new_df9['Romance'] = np.where(new_df9['Romance'], 'Romance', 1)
new_df9.rename(columns={'Romance':'Genre'}, inplace=True)
animation = []
for i in df['Genre']:
if "Animation" in i:
animation.append(1)
else:
animation.append(0)
df['Animation'] = animation
new_df10 = df.groupby(['Animation', 'Year'])['World Sales (in $)'].sum().reset_index(name='GlobalRev')
new_df10 = new_df10[new_df10['Animation'] != 0]
new_df10 = new_df10[new_df10['Year'] >= 2010]
new_df10['Animation'] = np.where(new_df10['Animation'], 'Animation', 1)
new_df10.rename(columns={'Animation':'Genre'}, inplace=True)
# new_df10
genre_df = pd.concat([new_df, new_df2, new_df3, new_df4, new_df5, new_df6, new_df7, new_df8, new_df9, new_df10], axis=0)
# genre_df
genre_order = ['Adventure', 'Comedy', 'Action', 'Drama', 'Thriller',
'SciFi', 'Family', 'Fantasy', 'Romance', 'Animation']
heat_df = genre_df.groupby(['Year', 'Genre'])['GlobalRev'].sum().reset_index(name="GlobalRev")
heat_df['RevBillions'] = round(heat_df['GlobalRev']/1000000000, 1)
heat_df['GlobalRev'] = heat_df['GlobalRev'].astype('int')
# heat_df
#create heatmap df
hm_df = pd.pivot_table(heat_df, index='Year', columns='Genre', values='RevBillions')
hm_df = hm_df.fillna(0)
import seaborn as sns
from matplotlib.ticker import FuncFormatter
fig = plt.figure(figsize=(18,10));
ax = fig.add_subplot(1,1,1);
comma_fmt = FuncFormatter(lambda x, p: format(str(int(x)) + " B"));
ax = sns.heatmap(hm_df, linewidth=0.2, annot=True, cmap='YlOrBr',
fmt=',.1f', square=True, annot_kws={'size': 11},
cbar_kws={'format': comma_fmt, 'orientation':'vertical'});
for t in ax.texts:
t.set_text(t.get_text() + "B");
plt.title("HeatMap of Global Revenue by Year Released and by Genre\n (2010-2021)", fontsize=18, pad=15);
plt.xlabel("Movie Genre", fontsize=18, labelpad=10);
plt.ylabel("Year Released", fontsize=18, labelpad=10);
plt.yticks(rotation=0, fontsize=14);
plt.xticks(size=14);
ax.invert_yaxis();
cbar = ax.collections[0].colorbar;
cbar.set_label("Global Revenue (Billions $)", rotation=270, fontsize=14, color='black', labelpad=20);
plt.show()