import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'U:/ProgramData/Anaconda3/Library/plugins/platforms'
This data is based upon results from six La Liga One soccer seasons from 2014-2020. The data included Home and Away Team information and match statistics. The data is focused around in game statistics and contains more knowledge about the game scores, offensive, defensive, penalty, and team statistics. There is little information about the location or time of the games, in fact the data only includes the year of the match. However, there is a wealth of in game data which allows for some interesting insights into what makes certain teams so special. There is some missing data as well based upon teams who may have been relegated to or promoted from the La Liga Two League over the span of these six years.
Let’s now look at the results.
This bar chart actually contains two bar charts. The first bar chart is showing the top 30 home teams within this six year span and the frequency count of number of games each team has had. It is color coded based on the percentage a team is above or below the mean. The teams above are represented by the green color and those below are represented by the coral color. The bar chart below this one represents the top 10 home teams with the most number of games. This again is color coded based on their closeness to the mean, though this mean is clearly much higher than the top 30 diagram.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = "U:/"
filename = path + 'combined_data_laliga.csv'
df = pd.read_csv(filename, nrows=5)
df = pd.read_csv(filename, usecols= ['Home Team', 'Home Team Goals Scored', 'year'])
x=df.groupby(['Home Team']).agg({'Home Team':['count'], 'Home Team Goals Scored':['sum','mean']}).reset_index()
x.columns = ['HomeTeam', 'Count', 'TotGoals', 'AvgGoals']
x = x.sort_values('Count', ascending=False)
x.reset_index(inplace=True, drop=True)
def pick_colors_according_to_mean_count(this_data):
colors=[]
avg = this_data.Count.mean()
for each in this_data.Count:
if each > avg*1.01:
colors.append('lightcoral')
elif each < avg*0.99:
colors.append('green')
else:
colors.append('black')
return colors
import matplotlib.patches as mpatches
bottom1=0
top1=30
d1 = x.loc[bottom1:top1]
my_colors1 = pick_colors_according_to_mean_count(d1)
bottom2=0
top2=10
d2 = x.loc[bottom2:top2]
my_colors2 = pick_colors_according_to_mean_count(d2)
Above = mpatches.Patch(color='lightcoral', label='Above Average')
At = mpatches.Patch(color='black', label='Within 1% of the Average')
Below = mpatches.Patch(color='green', label='Below Average')
fig = plt.figure(figsize=(18, 16))
fig.suptitle('Frequency of La Liga Game Analysis By Team:\n Top ' + str(top1) + ' and Top ' + str(top2),
fontsize=18, fontweight='bold')
ax1 = fig.add_subplot(2, 1, 1)
ax1.bar(d1.HomeTeam, d1.Count, label='Count', color=my_colors1)
ax1.legend(handles=[Above, At, Below], fontsize=14)
plt.axhline(d1.Count.mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
#ax1.axes.xaxis.set_visible(False)
ax1.set_title('Top ' +str(top1) + ' Teams ', size=20)
ax1.text(top1-10, d1.Count.mean()+2, 'Mean =' + str(d1.Count.mean()),
rotation=0, fontsize=14)
plt.xticks(rotation=90, fontsize=9)
ax2 = fig.add_subplot(2, 1, 2)
ax2.bar(d2.HomeTeam, d2.Count, label='Count', color=my_colors2)
#ax2.legend(handles=[Above, At, Below], fontsize=8)
plt.axhline(d2.Count.mean(), color='black', linestyle='solid')
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
#ax2.axes.xaxis.set_visible(False)
ax2.set_title('Top ' +str(top2) + ' Teams', size=20)
ax2.text(top2-1, d2.Count.mean()+2, 'Mean =' + str(d2.Count.mean()), rotation=0, fontsize=14)
fig.subplots_adjust(hspace=0.3)
plt.show()
This stacked bar chart shows the correlation between the amount of goals scored by each home team within a given year. The years are stacked on top of each other and show in different colors based on the key for each given year. Some teams have much more data than others given there amount of time in La Liga One and the color coordination shows what years these teams were competing and which ones where they were in a lower division.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
stacked_df = df.groupby(['Home Team', 'year'])['Home Team Goals Scored'].sum().reset_index(name='TotGoals')
stacked_df = stacked_df.pivot(index='Home Team', columns='year', values='TotGoals')
stacked_df
fig = plt.figure(figsize=(18, 10))
ax = fig.add_subplot(1, 1, 1)
stacked_df.plot(kind='bar', stacked=True, ax=ax)
plt.ylabel('Total Goals Scored', fontsize=18, labelpad=10)
plt.title('Total Goals Scored by Home Team and by Year \n Stacked Bar Plot', fontsize=18)
plt.xticks(rotation=90, horizontalalignment = 'center', fontsize=6)
plt.yticks (fontsize=14)
plt.xlabel('La Liga Home Team', fontsize=14, labelpad=3)
plt.show()
This pie chart shows a different way of representing the number of goals data. This data looks at the league as a whole isntead of each individual team, though the data is stil focusd on home team games. This pie chart shows the total number of goals scored by every team each year as well as the percentage of the six years of data that each individual year makes up. In the center of this pie chart is the total number of home game goals which occured in this six year data set.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
pie_df = df.groupby(['year'])['Home Team Goals Scored'].sum().reset_index(name='HomeTeamTotalGoals')
pie_df
number_outside_colors = len(pie_df.year.unique())
outside_color_ref_number = np.arange(number_outside_colors)*4
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20")
outer_colors = colormap(outside_color_ref_number)
all_goals = pie_df.HomeTeamTotalGoals.sum()
pie_df.groupby(['year'])['HomeTeamTotalGoals'].sum().plot(
kind='pie', radius=1, colors = outer_colors, pctdistance=0.85,
labeldistance= 1.1, wedgeprops = dict(edgecolor='white'), textprops={'fontsize':18},
autopct = lambda p: '{:.2f}%\n({:.1f})'.format(p,(p/100)*all_goals),
startangle=90)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Total La Liga Home Game Goals by Year', fontsize=18)
ax.text(0,0, 'Total Home Game Goals\n' + str(round(all_goals)), size=14, ha='center', va='center')
ax.axis('equal')
plt.tight_layout()
plt.show()
This scatterplot shows the number of home team shots on target each team has per season based. Some teams based on their league status do not have plots for every year. The years are color coded based on the key, the home team names run along the x axis with the year along the y year. The size of each dot is based on the number of shots every year that each home team had.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
df = pd.read_csv(filename, usecols = ['Home Team', 'year', 'Home Team On Target Shots'])
x = df.groupby(['Home Team', 'year'])['Home Team On Target Shots'].sum().reset_index(name = "Total Shots")
plt.figure(figsize=(12,8))
plt.scatter(x['Home Team'], x['year'], marker='8', cmap='tab10',
c=x['year'], s= x['Total Shots'], edgecolors='black')
plt.title('Shots On Target By Home Team and By Year', fontsize=18)
plt.xlabel('La Liga Teams', fontsize=10, labelpad=1)
plt.ylabel('Year', fontsize=14)
plt.colorbar()
plt.xticks(rotation=90, fontsize=5)
plt.yticks(rotation=0)
plt.show()
This heatmap like the scatterplot shows the amount of shots on target each home team had in a given year. This plot shows the team name the and year as well though the key is a cool to warm color palette. The high the number of shots the more red the plot will appear the less shots on target the more blue the plot will be. This gives a more detailed display of the overview the scatterplot above gives. There is again some missing data for teams based on the status in La Liga One on for the given year.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
df = pd.read_csv(filename, usecols = ['Home Team', 'year', 'Home Team On Target Shots'])
x = df.groupby(['Home Team', 'year'])['Home Team On Target Shots'].sum().reset_index(name = "Total Shots")
hm_df = pd.pivot_table(x, index='year', columns='Home Team', values='Total Shots')
import seaborn as sns
from matplotlib.ticker import FuncFormatter
fig = plt.figure(figsize=(18, 10))
ax = fig.add_subplot (1, 1, 1)
comma_fmt = FuncFormatter(lambda x, p: format(int(x), ','))
ax = sns.heatmap(hm_df, linewidth = 0.2, annot = True, cmap = 'coolwarm', fmt=',.0f',
square = True, annot_kws={'size': 11},
cbar_kws = {'format': comma_fmt, 'orientation':'vertical'})
plt.title('Heatmap of Number of On Target Shots On Goal by La Liga Home Team and Year (2014-2020)', fontsize=18, pad=15)
plt.xlabel('La Liga Home Team', fontsize=16, labelpad=10)
plt.ylabel('Year', fontsize=16, labelpad=10)
plt.yticks(rotation=360, size=14)
plt.xticks(size=12)
ax.invert_yaxis()
cbar = ax.collections[0].colorbar
max_count = hm_df.to_numpy().max()
cbar.set_label('Number of On Target Shots', rotation=270, fontsize=14, color='black', labelpad=20)
plt.show()
This concludes the analysis of the La Liga One (2014-2020) data set. Thank you for your viewership.