import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
This dataset was aquired from Kaggle.com and downloaded as a CSV file. The file was then implemented into Jupyter notebook, where the following code was written in Python. I set my path and file name accordingly, and set up the groundwork to dig into the Earthquakes dataset.
The Earthquake dataset contains 114.57 kb of data, including 19 columns. These columns are title, magnitude, data_time, cdi, mmi, alert, tsunami, sig, net, nst, dmin, gap, magType, depth, latitude/longitude, location, continent, and country. The summary function outputs information like minimum, maximum, Q1, Q3, median, mean, and max. It also includes descriptions of the columns in the dataset such as their class, length, and mode.
Below are the five unique visualizations that look at different variables within the Earthquake dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import folium
import warnings
warnings.filterwarnings("ignore")
path = "U:/"
filename = path + "earthquake_data.csv"
df = pd.read_csv(filename)
This visualization contains two vertical bar charts. The first one, is the top 250 earthquakes in the dataset. The y-axis represents the earthquake magnitude. There is a horizontal line across the bar chart that illustrates the mean magnitude, which in this visualization is 6.92. The other bar chart represents the top 10 earthquakes in the dataset. Similarly, the y-axis shows the earthquake magnitude. This bar chart has the same horizontal line that represents the mean magnitude. Except in this case, it is 6.80. Both graphs have a legend that indicate earthquakes that are above, within 1%, and below average.
loc_df = df[['location', 'magnitude']]
loc_df = df[['location', 'magnitude']].copy()
loc_df.dropna(inplace=True)
loc_avg_mag = loc_df.groupby('location')['magnitude'].mean().reset_index()
def pick_colors_according_to_magnitude(this_data):
colors= []
avg = this_data.magnitude.mean()
for each in this_data.magnitude:
if each > avg*1.01:
colors.append('coral')
elif each < avg*0.99:
colors.append('orangered')
else:
colors.append('peachpuff')
return colors
bottom1 = 0
top1 = 250
d1 = loc_avg_mag[bottom1:top1]
my_colors1 = pick_colors_according_to_magnitude(d1)
bottom2 = 10
top2 = 20
d2 = loc_avg_mag[bottom2:top2]
my_colors2 = pick_colors_according_to_magnitude(d2)
Above = mpatches.Patch(color='coral', label='Above Average')
At = mpatches.Patch(color='orangered', label='Within 1% of the Average')
Below = mpatches.Patch(color='peachpuff', label='Below Average')
fig = plt.figure(figsize=(18,16))
ax1 = fig.add_subplot(2,1,1)
ax1.bar(d1.location, d1.magnitude, label ='Earthquake Magnitude', color=my_colors1)
ax1.legend(handles=[Above, At, Below],fontsize = 14)
plt.axhline(d1.magnitude.mean(), color='black', linestyle = 'dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.axes.xaxis.set_visible(False)
ax1.set_title("Top " + str(top1) + " Earthquakes", size =20)
ax1.text(top1-10, d1.magnitude.mean()+1.5, 'Mean = ' + str(round(d1.magnitude.mean(), 2)), rotation=0, fontsize=14)
ax2 = fig.add_subplot(2,1,2)
ax2.bar(d2.location, d2.magnitude, label ='Earthquake Magnitude', color=my_colors2)
ax2.legend(handles=[Above, At, Below],fontsize = 14)
plt.axhline(d2.magnitude.mean(), color='black', linestyle = 'dashed')
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.axes.xaxis.set_visible(True)
ax2.set_title("Top 10 Earthquakes", size =20)
ax2.tick_params(axis='x', rotation=45)
ax2.text(top2-10, d2.magnitude.mean()+0.2, 'Mean = ' + str(round(d2.magnitude.mean(), 2)), rotation=0, fontsize=14)
fig.subplots_adjust(hspace = 0.35)
plt.tight_layout()
plt.show()
This donut chart represents earthquake alerts by percentage. There are four alerts - green, red, orange, and yellow. Each alert is indicated with its respective color. The green alert accounts for 78.3% of earthquake alerts. The red alert accounts for 2.9%, the orange 5.3%, and the yellow 13.5%.
df = df.dropna(subset=['alert'])
number_outside_colors = df['alert'].nunique()
outside_color_ref_number = [2,5,4,0]
pie_df = df.groupby(['alert'])['alert'].count().reset_index(name = 'n')
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1,1,1)
colormap = plt.get_cmap("Pastel1")
outer_colors = colormap(outside_color_ref_number)
total_alerts = pie_df.alert.sum()
alert_counts = df['alert'].value_counts().to_dict()
labels = ['green', 'yellow', 'orange', 'red']
counts = [alert_counts.get(label, 0) for label in labels]
plt.pie(counts, labels=labels, autopct='%1.1f%%', colors = outer_colors, pctdistance = 0.85,
textprops = {'fontsize': 18}, wedgeprops = dict(edgecolor='white'))
plt.title('Earthquake Alerts by Percentage', fontsize = 18)
ax.text(0,0, 'Total Alerts: 4', ha= 'center', va = 'center', fontsize = 16)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.axis('equal')
plt.tight_layout()
plt.show()
The world map shows each continent and country that an earthquake occurred. These are indicated by the colorful dots on the map. The red dot, indicates an earthquake with a magnitude of 8 and above. The orange dot, indicates a magnitude that is between 7 and 8. The green dot, shows any earthquakes that had a magnitude of 6.5 or less. The map is interactive and can be zoomed in and out. You can also choose from 4 map styles on the upper right hand corner. Each dot is interactive, and will show the exact magnitude once hovered over.
map_df = df[['latitude', 'longitude', 'country', 'continent', 'magnitude']]
center_of_map = [0.0,0.0] # Gulf of Guinia is center
my_map = folium.Map(location = center_of_map,
zoom_start = 2,
width ='90%',
height = '100%',
left = '5%',
right = '5%',
top = '0%')
tiles = ['cartodbpositron','openstreetmap','stamenterrain','stamentoner']
for tile in tiles:
folium.TileLayer(tile).add_to(my_map)
folium.LayerControl().add_to(my_map)
for i in range(0,len(map_df)):
try:
mag = map_df.loc[i, 'magnitude']
except KeyError:
print(f"KeyError occurred for index {i}. Skipping this entry.")
continue
if mag >= 8:
color = 'red'
elif mag >= 7 and mag < 8:
color = 'orange'
elif mag <= 6.5:
color = 'green'
else:
color = 'black'
try:
if color != 'black':
folium.Circle(location = [map_df.loc[i, 'latitude'], map_df.loc[i, 'longitude']],
tooltip = map_df.loc[i, 'magnitude'],
popup = 'Continent: {}: \n Country: {}:'.format(map_df.loc[i, 'continent'], map_df.loc[i, 'country']),
radius = 50,
color = color,
fill = True,
fill_color = color,
fill_opacity = 0.5).add_to(my_map)
except Exception as e:
print(f"An error occurred while processing index {i}: {e}")
continue
my_map
This visualization illustrates earthquake significance by alert. As you’ve seen in the donut chart, there are 4 alerts. Each alert is plotted by its respective color. The y-axis shows the significance of the alert. The red alert, which indicates a higher earthquake magnitude, shows a higher significance. This number represents how significant the event is. Larger numbers indicate a more significant event. The value is determined by magnitude, maximum MMI, felt reports, and estimated impact.
line_plot_df = df[['sig', 'alert', 'magnitude']]
fig = plt.figure(figsize = (20, 10))
ax = fig.add_subplot(1,1,1)
my_colors = {'green':'green',
'red':'red',
'yellow':'yellow',
'orange':'orange'}
for key, grp in line_plot_df.groupby(['alert']):
grp.plot(ax=ax, kind='line', x='alert', y='sig', color=my_colors[key], label= key, marker='8')
plt.title('Earthquake Significance by Alert', fontsize=18)
ax.set_xlabel('')
ax.set_ylabel('Significance', fontsize = 14, labelpad = 30)
ax.set_xticks([])
plt.show()
The stacked bar plot visually shows the different countries in the dataset. They are organized according to the continent. There are six continents, and 32 countries that have experienced earthquakes. The y-axis represents how many earthquakes the continent has experienced. We can see that Africa had one recorded earthquake in this dataset, while Asia had over 40.
stacked_df = df.loc[:, ['continent', 'country']]
stacked_df = stacked_df.dropna()
grouped_df = stacked_df.groupby(['continent', 'country']).size().reset_index(name='count')
pivot_df = grouped_df.pivot(index='continent', columns='country', values='count').fillna(0)
fig = plt.figure(figsize = (18,10))
ax = fig.add_subplot(1,1,1)
bottom = np.zeros(len(pivot_df.index))
for i, country in enumerate(pivot_df.columns):
ax.bar(pivot_df.index, pivot_df[country], bottom=bottom, label=country)
bottom += pivot_df[country]
ax.set_xlabel('Continent', fontsize=18, horizontalalignment = 'center', labelpad = 20)
plt.ylabel('Earthquakes Per Continent', fontsize=18, labelpad = 20, horizontalalignment = 'center')
plt.title('Number of Countries in Continent that Experienced an Earthquake \n Stacked Bar Plot', fontsize = 18)
plt.xticks(rotation =0, horizontalalignment = 'center', fontsize=14)
plt.yticks(fontsize = 14)
plt.legend(loc='upper right', ncol=4)
plt.show()
Thank you so much for taking the time to view my visualizations. I hope you enjoyed looking through the different graphs / charts as much as I enjoyed making them!