import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
import numpy as np
import pandas as pd
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
from IPython.display import display
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
##Data Source Data provided py baseballsavant.mlb.com caputured via pybaseball
Washington Nationals vs. Detroit Tigers, May 11th, 2016
Max Scherzer strikes out 20 batters in historic performance.
# The dataframe
mydf = pd.read_csv("max data v2.csv")
column_headers = list(mydf.columns.values)
column_headers
list(mydf)
max_scherzer_pitches = mydf[mydf['pitcher']==453286]
##Visualizations
Pitch Location - Right Handed Batters vs. Left Handed Batters
Scherzer dominated both righties and lefties, but he used different weapons in arsenal for righties vs. lefties. Max punished right-handed batters with slider low and away, and his 95 MPH fastball high in the zone.Scherzer relied his sinker, throwing it away to lefties. The recipe worked phenomenally well as he struck out the Tigers 1-2-3 batter three times each, including legendary hitter and former teammate, Miguel Cabrera.
# Set up plot constants
platewidthInFeet = 17/12
expandedPlateInFeet = 20/12
szHeightInFeet = 3.6 - 1.5
ballInFeet = 2.9/12
halfBallInFeet = ballInFeet/2
# Define the pitch groups
grouped_pitches = max_scherzer_pitches.groupby(['pitch_name', 'stand'])
# Plot the pitch locations for each group in a different color
colors = ['red', 'blue', 'green', 'orange', 'purple']
fig, ax = plt.subplots(figsize=(20, 10), dpi=100, subplot_kw=dict(aspect='equal'))
for i, group in enumerate(grouped_pitches):
name, pitches = group
if name[1] == 'R':
# Load the right-handed batter image
rhb = mpimg.imread('rhb.png')
# Plot the right-handed batter image on the left of the plot
ax.imshow(rhb, extent=[-3, -1, 0.0, 6])
# Plot the strike zone rectangle
ax.add_patch(patches.Rectangle((expandedPlateInFeet/-2, 1.5 - halfBallInFeet), expandedPlateInFeet, szHeightInFeet + ballInFeet, color='lightblue'))
ax.add_patch(patches.Rectangle((platewidthInFeet/-2,1.5), platewidthInFeet, szHeightInFeet, color='#D3D3D3'))
# Plot the pitch locations
ax.scatter(x=pitches['plate_x'], y=pitches['plate_z'], s=8, color=colors[i%len(colors)], label=name[0])
ax.set_ylim([0, 6.5])
ax.set_xlim([-3.5, 3.5])
ax.set_title(f"Max Scherzer K's 20 Detroit Tigers - May, 11th 2016\nPitch Locations for Right-Handed Batters", fontsize=16)
ax.set_xlabel('Horizontal Location (feet)', fontsize=12)
ax.set_ylabel('Vertical Location (feet)', fontsize=12)
ax.legend()
plt.show()
colors = ['red', 'blue', 'green', 'orange', 'purple']
fig, ax = plt.subplots(figsize=(20, 10), dpi=100, subplot_kw=dict(aspect='equal'))
for i, group in enumerate(grouped_pitches):
name, pitches = group
if name[1] == 'L':
# Load the left-handed batter image
lhb = mpimg.imread('lhb.png')
# Plot the left-handed batter image on the left of the plot
ax.imshow(lhb, extent=[1, 3, 0.0, 6])
# Plot the strike zone rectangle
ax.add_patch(patches.Rectangle((expandedPlateInFeet/-2, 1.5 - halfBallInFeet), expandedPlateInFeet, szHeightInFeet + ballInFeet, color='lightblue'))
ax.add_patch(patches.Rectangle((platewidthInFeet/-2,1.5), platewidthInFeet, szHeightInFeet, color='#D3D3D3'))
# Plot the pitch locations
ax.scatter(x=pitches['plate_x'], y=pitches['plate_z'], s=8, color=colors[i%len(colors)], label=name[0])
ax.set_ylim([0, 6.5])
ax.set_xlim([-3.5, 3.5])
ax.set_title(f"Max Scherzer K's 20 Detroit Tigers - May, 11th 2016\nPitch Locations for Left-Handed Batters", fontsize=16)
ax.set_xlabel('Horizontal Location (feet)', fontsize=12)
ax.set_ylabel('Vertical Location (feet)', fontsize=12)
ax.legend()
plt.show()
Zone Maps Scherzer lived in the strikzone all night. Staying ahead in
the count allowed Scherzer to be pitch efficient. For example, in the
2nd inning, Max struck out three batters on nine pitches.
# Group the pitches by zone
grouped_by_zone = max_scherzer_pitches.groupby('zone')
# Create a 2D array of zeros to represent the strike zone
strike_zone = np.zeros((3, 3))
# Define the indices of the strike zone for each zone
zone_indices = {
1: (0, 0),
2: (0, 1),
3: (0, 2),
4: (1, 0),
5: (1, 1),
6: (1, 2),
7: (2, 0),
8: (2, 1),
9: (2, 2),
}
# Loop through the groups and add the pitches to the strike zone
for name, pitches in grouped_by_zone:
zone = int(pitches.iloc[0]['zone'])
if zone in zone_indices:
i, j = zone_indices[zone]
strike_zone[i, j] += len(pitches)
# Create a figure and axis for the heat map
fig, ax = plt.subplots(figsize=(8, 6))
# Create the heat map
im = ax.imshow(strike_zone, cmap='coolwarm')
# Add a color bar to the heat map
cbar = ax.figure.colorbar(im, ax=ax)
# Set the axis labels
ax.set_xticks([0, 1, 2])
ax.set_yticks([0, 1, 2])
ax.set_xticklabels(['L', 'M', 'R'])
ax.set_yticklabels(['U', 'M', 'L'])
ax.set_xlabel('Horizontal Zone')
ax.set_ylabel('Vertical Zone')
# Add the counts for each zone to the heat map
for i in range(3):
for j in range(3):
text = ax.text(j, i, int(strike_zone[i, j]),
ha="center", va="center", color="black")
# Add a title to the plot
ax.set_title("Pitch Counts by Zone")
# Show the plot
plt.show()
Approach per Batter Scherzer three 96 strikes on 119 pitches. He used
his fastball to establish early count leverage, and struck many batters
with his wipeout slider.
# Define a list of batters with their corresponding IDs
batters = {
578428: "Jose Iglesias",
408234: "Miguel Cabrera",
519455: "Jordan Zimmermann",
400121: "Victor Martinez",
543510: "James McCann",
543238: "Anthony Gose",
502110: "J.D. Martinez",
457708: "Justin Upton",
435079: "Ian Kinsler",
457454: "Jarrod Saltalamacchia"
}
# Group the pitches by batter
pitches_by_batter_id = max_scherzer_pitches.groupby('batter')
# Loop through each batter and create a heatmap for their pitches
for batter_id, pitches in pitches_by_batter_id:
batter_name = batters[batter_id]
strike_zone = np.zeros((3, 3))
zone_indices = {
1: (0, 0),
2: (0, 1),
3: (0, 2),
4: (1, 0),
5: (1, 1),
6: (1, 2),
7: (2, 0),
8: (2, 1),
9: (2, 2),
}
for name, pitches in pitches.groupby('zone'):
zone = int(pitches.iloc[0]['zone'])
if zone in zone_indices:
i, j = zone_indices[zone]
strike_zone[i, j] += len(pitches)
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(strike_zone, cmap='coolwarm')
cbar = ax.figure.colorbar(im, ax=ax)
ax.set_xticks([0, 1, 2])
ax.set_yticks([0, 1, 2])
ax.set_xticklabels(['L', 'M', 'R'])
ax.set_yticklabels(['U', 'M', 'L'])
ax.set_xlabel('Horizontal Zone')
ax.set_ylabel('Vertical Zone')
for i in range(3):
for j in range(3):
text = ax.text(j, i, int(strike_zone[i, j]),
ha="center", va="center", color="black")
ax.set_title(f"Pitch Counts by Zone for {batter_name}")
plt.show()
Vertical Movement Max has one the consistent release points in all of
MLB, which makes it more difficult for hitters to pickup and discern the
pitch type before the ball arrives to the plate. The vertical moment
chart shows the change in elevation from the release point to the front
of home plate. Scherzer devastated batters by throwing pitches across a
wide vertical plane. There’s almost a foot of separation from his
fastball and his changeup.
fig, ax = plt.subplots(figsize=(20, 7))
# Calculate the mean release and plate z for each pitch type
mean_release_z = max_scherzer_pitches.groupby('pitch_type')['release_pos_z'].mean()
mean_plate_z = max_scherzer_pitches.groupby('pitch_type')['plate_z'].mean()
# Calculate the change in vertical movement from mean release z to mean plate z for each pitch type
vertical_movement = mean_plate_z - mean_release_z
# Define the x coordinates for the line plot
x_coords = np.array([0, 60.5])
# Define the labels for the x axis
pitch_types = list(vertical_movement.index)
# Create the line plot for each pitch type
for i, pitch_type in enumerate(pitch_types):
y_coords = np.array([mean_release_z[pitch_type], mean_plate_z[pitch_type]])
vertical_change = y_coords[1] - y_coords[0]
plt.plot(x_coords, y_coords, label=f"{pitch_type} ({vertical_change:.2f} feet)")
# Set the axis labels and title
plt.xlabel('Distance from Mound to Plate (feet)')
plt.ylabel('Vertical Movement (feet)')
plt.title('Change in Vertical Movement by Pitch Type')
# Add a legend to the plot
plt.legend()
# Show the plot
plt.show()
Horizontal Movement In addition to vertical drop, Scherzer had
tremendous horizontal movement on his pitches. Again, Scherzer’s
extremely consistent release point made each pitch more deceptive. There
is nearly 1 foot gap from the average placement of his curve ball to the
average location of slider.
fig, ax = plt.subplots(figsize=(6, 9))
# Calculate the mean release and plate x for each pitch type
mean_release_x = max_scherzer_pitches.groupby('pitch_type')['release_pos_x'].mean()
mean_plate_x = max_scherzer_pitches.groupby('pitch_type')['plate_x'].mean()
# Calculate the horizontal distance from release point to plate for each pitch type
horizontal_distance = mean_plate_x - mean_release_x
# Define the y coordinates for the line plot
y_coords = np.array([0, 60.5])
# Define the labels for the y axis
pitch_types = list(horizontal_distance.index)
# Create the line plot for each pitch type
for i, pitch_type in enumerate(pitch_types):
x_coords = np.array([mean_release_x[pitch_type], mean_plate_x[pitch_type]])
horizontal_change = x_coords[1] - x_coords[0]
plt.plot(x_coords[::-1], y_coords, label=f"{pitch_type} ({horizontal_change:.2f} feet)")
# Set the axis labels and title
plt.xlabel('Horizontal Distance from Relase to Where the Ball Crosses the Plate')
plt.ylabel('Distance between Pitching Mount and Home Plate (feet)')
plt.title('Top/Down View - Change in Horizontal Distance by Pitch Type')
# Set the x-axis limit and ticks
plt.xlim(-3, 3)
plt.xticks(np.arange(-3.5, 3.5, 0.5), fontsize = 9)
plt.ylim(-2, 65)
plt.yticks(np.arange(0, 65, 10), fontsize = 9)
# Add a legend to the plot
plt.legend()
# Show the plot
plt.show()
Pitch Probabilities This chart shows Max’s pitch preference based on the pre-pitch count and the bater. When Scherzer got into favorable counts vs. righties he elected to blow them away with is slider, while favoring the sinker vs. lefties in similar counts.
#Group the pitches by count and batter stance, and calculate the percentage of pitches in each category
pitch_counts = max_scherzer_pitches.groupby(['balls', 'strikes', 'stand'])['pitch_type'].value_counts(normalize=True).rename('percentage').reset_index()
# Pivot the data to create a stacked bar chart
pivot_table = pitch_counts.pivot_table(index=['balls', 'strikes', 'stand'], columns='pitch_type', values='percentage')
# Create the stacked bar chart
fig, ax = plt.subplots(figsize=(10, 8))
pivot_table.plot(kind='barh', stacked=True, ax=ax)
# Set the axis labels and title
ax.set_xlabel('Percentage of Pitches')
ax.set_ylabel('Balls-Strikes-Batter Position')
ax.set_title('Probability of Next Pitch by Count and Batter Position (R=RHB L=LHB)')
# Show the plot
plt.show()
Mean Pitch Release Speed (MPH) This distribution futher illustrates why
hitters struggled so mightily vs. Scherzer. His pitch repititoire
include a 95 mph fastball and mid-nineties sinker. Both pitches look
identical coming of his hand but have different movement. Scherzers
incredible consistency in delivery and velocity leaves hitters
guesssing.
# Set the style and context of the plot
sns.set_style('white')
sns.set_context('paper', font_scale=1.5)
# Create a facet grid of distribution plots by pitch name
g = sns.FacetGrid(max_scherzer_pitches, col='pitch_name', col_wrap=3, height=4, aspect=1.2)
g.map(sns.histplot, 'release_speed', kde=True, edgecolor='white')
# Set the title and axis labels
g.set_titles('{col_name}')
g.set_xlabels('Release Speed (mph)')
g.set_ylabels('Count')
plt.show()