import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
import numpy as np
import pandas as pd
import matplotlib.patches as patches 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
from IPython.display import display
import warnings

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")

##Data Source Data provided py baseballsavant.mlb.com caputured via pybaseball

Washington Nationals vs. Detroit Tigers, May 11th, 2016

Max Scherzer strikes out 20 batters in historic performance.

# The dataframe
mydf = pd.read_csv("max data v2.csv")

column_headers = list(mydf.columns.values)
column_headers
list(mydf)
max_scherzer_pitches = mydf[mydf['pitcher']==453286]

##Visualizations

Pitch Location - Right Handed Batters vs. Left Handed Batters

Scherzer dominated both righties and lefties, but he used different weapons in arsenal for righties vs. lefties. Max punished right-handed batters with slider low and away, and his 95 MPH fastball high in the zone.Scherzer relied his sinker, throwing it away to lefties. The recipe worked phenomenally well as he struck out the Tigers 1-2-3 batter three times each, including legendary hitter and former teammate, Miguel Cabrera.

# Set up plot constants
platewidthInFeet = 17/12
expandedPlateInFeet = 20/12 
szHeightInFeet = 3.6 - 1.5
ballInFeet = 2.9/12
halfBallInFeet = ballInFeet/2

# Define the pitch groups
grouped_pitches = max_scherzer_pitches.groupby(['pitch_name', 'stand'])

# Plot the pitch locations for each group in a different color
colors = ['red', 'blue', 'green', 'orange', 'purple']
fig, ax = plt.subplots(figsize=(20, 10), dpi=100, subplot_kw=dict(aspect='equal'))
for i, group in enumerate(grouped_pitches):
    name, pitches = group
    if name[1] == 'R':
        # Load the right-handed batter image
        rhb = mpimg.imread('rhb.png')

        # Plot the right-handed batter image on the left of the plot
        ax.imshow(rhb, extent=[-3, -1, 0.0, 6])

        # Plot the strike zone rectangle
        ax.add_patch(patches.Rectangle((expandedPlateInFeet/-2, 1.5 - halfBallInFeet), expandedPlateInFeet, szHeightInFeet + ballInFeet, color='lightblue'))
        ax.add_patch(patches.Rectangle((platewidthInFeet/-2,1.5), platewidthInFeet, szHeightInFeet, color='#D3D3D3'))

        # Plot the pitch locations
        ax.scatter(x=pitches['plate_x'], y=pitches['plate_z'], s=8, color=colors[i%len(colors)], label=name[0])

ax.set_ylim([0, 6.5])
ax.set_xlim([-3.5, 3.5])
ax.set_title(f"Max Scherzer K's 20 Detroit Tigers - May, 11th 2016\nPitch Locations for Right-Handed Batters", fontsize=16)
ax.set_xlabel('Horizontal Location (feet)', fontsize=12)
ax.set_ylabel('Vertical Location (feet)', fontsize=12)
ax.legend()

plt.show()

colors = ['red', 'blue', 'green', 'orange', 'purple']
fig, ax = plt.subplots(figsize=(20, 10), dpi=100, subplot_kw=dict(aspect='equal'))
for i, group in enumerate(grouped_pitches):
    name, pitches = group
    if name[1] == 'L':
        # Load the left-handed batter image
        lhb = mpimg.imread('lhb.png')

        # Plot the left-handed batter image on the left of the plot
        ax.imshow(lhb, extent=[1, 3, 0.0, 6])

        # Plot the strike zone rectangle
        ax.add_patch(patches.Rectangle((expandedPlateInFeet/-2, 1.5 - halfBallInFeet), expandedPlateInFeet, szHeightInFeet + ballInFeet, color='lightblue'))
        ax.add_patch(patches.Rectangle((platewidthInFeet/-2,1.5), platewidthInFeet, szHeightInFeet, color='#D3D3D3'))

        # Plot the pitch locations
        ax.scatter(x=pitches['plate_x'], y=pitches['plate_z'], s=8, color=colors[i%len(colors)], label=name[0])

ax.set_ylim([0, 6.5])
ax.set_xlim([-3.5, 3.5])
ax.set_title(f"Max Scherzer K's 20 Detroit Tigers - May, 11th 2016\nPitch Locations for Left-Handed Batters", fontsize=16)
ax.set_xlabel('Horizontal Location (feet)', fontsize=12)
ax.set_ylabel('Vertical Location (feet)', fontsize=12)
ax.legend()

plt.show()

Zone Maps Scherzer lived in the strikzone all night. Staying ahead in the count allowed Scherzer to be pitch efficient. For example, in the 2nd inning, Max struck out three batters on nine pitches.



# Group the pitches by zone
grouped_by_zone = max_scherzer_pitches.groupby('zone')

# Create a 2D array of zeros to represent the strike zone
strike_zone = np.zeros((3, 3))

# Define the indices of the strike zone for each zone
zone_indices = {
    1: (0, 0),
    2: (0, 1),
    3: (0, 2),
    4: (1, 0),
    5: (1, 1),
    6: (1, 2),
    7: (2, 0),
    8: (2, 1),
    9: (2, 2),
}

# Loop through the groups and add the pitches to the strike zone
for name, pitches in grouped_by_zone:
    zone = int(pitches.iloc[0]['zone'])
    if zone in zone_indices:
        i, j = zone_indices[zone]
        strike_zone[i, j] += len(pitches)

# Create a figure and axis for the heat map
fig, ax = plt.subplots(figsize=(8, 6))

# Create the heat map
im = ax.imshow(strike_zone, cmap='coolwarm')

# Add a color bar to the heat map
cbar = ax.figure.colorbar(im, ax=ax)

# Set the axis labels
ax.set_xticks([0, 1, 2])
ax.set_yticks([0, 1, 2])
ax.set_xticklabels(['L', 'M', 'R'])
ax.set_yticklabels(['U', 'M', 'L'])
ax.set_xlabel('Horizontal Zone')
ax.set_ylabel('Vertical Zone')

# Add the counts for each zone to the heat map
for i in range(3):
    for j in range(3):
        text = ax.text(j, i, int(strike_zone[i, j]),
                       ha="center", va="center", color="black")

# Add a title to the plot
ax.set_title("Pitch Counts by Zone")

# Show the plot
plt.show()   

Approach per Batter Scherzer three 96 strikes on 119 pitches. He used his fastball to establish early count leverage, and struck many batters with his wipeout slider.

# Define a list of batters with their corresponding IDs
batters = {
    578428: "Jose Iglesias",
    408234: "Miguel Cabrera",
    519455: "Jordan Zimmermann",
    400121: "Victor Martinez",
    543510: "James McCann",
    543238: "Anthony Gose",
    502110: "J.D. Martinez",
    457708: "Justin Upton",
    435079: "Ian Kinsler",
    457454: "Jarrod Saltalamacchia"
}

# Group the pitches by batter
pitches_by_batter_id = max_scherzer_pitches.groupby('batter')

# Loop through each batter and create a heatmap for their pitches
for batter_id, pitches in pitches_by_batter_id:
    batter_name = batters[batter_id]
    strike_zone = np.zeros((3, 3))
    zone_indices = {
        1: (0, 0),
        2: (0, 1),
        3: (0, 2),
        4: (1, 0),
        5: (1, 1),
        6: (1, 2),
        7: (2, 0),
        8: (2, 1),
        9: (2, 2),
    }
    for name, pitches in pitches.groupby('zone'):
        zone = int(pitches.iloc[0]['zone'])
        if zone in zone_indices:
            i, j = zone_indices[zone]
            strike_zone[i, j] += len(pitches)
    fig, ax = plt.subplots(figsize=(8, 6))
    im = ax.imshow(strike_zone, cmap='coolwarm')
    cbar = ax.figure.colorbar(im, ax=ax)
    ax.set_xticks([0, 1, 2])
    ax.set_yticks([0, 1, 2])
    ax.set_xticklabels(['L', 'M', 'R'])
    ax.set_yticklabels(['U', 'M', 'L'])
    ax.set_xlabel('Horizontal Zone')
    ax.set_ylabel('Vertical Zone')
    for i in range(3):
        for j in range(3):
            text = ax.text(j, i, int(strike_zone[i, j]),
                           ha="center", va="center", color="black")
    ax.set_title(f"Pitch Counts by Zone for {batter_name}")
    plt.show()

Vertical Movement Max has one the consistent release points in all of MLB, which makes it more difficult for hitters to pickup and discern the pitch type before the ball arrives to the plate. The vertical moment chart shows the change in elevation from the release point to the front of home plate. Scherzer devastated batters by throwing pitches across a wide vertical plane. There’s almost a foot of separation from his fastball and his changeup.

fig, ax = plt.subplots(figsize=(20, 7))
# Calculate the mean release and plate z for each pitch type
mean_release_z = max_scherzer_pitches.groupby('pitch_type')['release_pos_z'].mean()
mean_plate_z = max_scherzer_pitches.groupby('pitch_type')['plate_z'].mean()

# Calculate the change in vertical movement from mean release z to mean plate z for each pitch type
vertical_movement = mean_plate_z - mean_release_z

# Define the x coordinates for the line plot
x_coords = np.array([0, 60.5])

# Define the labels for the x axis
pitch_types = list(vertical_movement.index)

# Create the line plot for each pitch type
for i, pitch_type in enumerate(pitch_types):
    y_coords = np.array([mean_release_z[pitch_type], mean_plate_z[pitch_type]])
    vertical_change = y_coords[1] - y_coords[0]
    plt.plot(x_coords, y_coords, label=f"{pitch_type} ({vertical_change:.2f} feet)")

# Set the axis labels and title
plt.xlabel('Distance from Mound to Plate (feet)')
plt.ylabel('Vertical Movement (feet)')
plt.title('Change in Vertical Movement by Pitch Type')

# Add a legend to the plot
plt.legend()

# Show the plot
plt.show()

Horizontal Movement In addition to vertical drop, Scherzer had tremendous horizontal movement on his pitches. Again, Scherzer’s extremely consistent release point made each pitch more deceptive. There is nearly 1 foot gap from the average placement of his curve ball to the average location of slider.

fig, ax = plt.subplots(figsize=(6, 9))
# Calculate the mean release and plate x for each pitch type
mean_release_x = max_scherzer_pitches.groupby('pitch_type')['release_pos_x'].mean()
mean_plate_x = max_scherzer_pitches.groupby('pitch_type')['plate_x'].mean()

# Calculate the horizontal distance from release point to plate for each pitch type
horizontal_distance = mean_plate_x - mean_release_x

# Define the y coordinates for the line plot
y_coords = np.array([0, 60.5])

# Define the labels for the y axis
pitch_types = list(horizontal_distance.index)

# Create the line plot for each pitch type
for i, pitch_type in enumerate(pitch_types):
    x_coords = np.array([mean_release_x[pitch_type], mean_plate_x[pitch_type]])
    horizontal_change = x_coords[1] - x_coords[0]
    plt.plot(x_coords[::-1], y_coords, label=f"{pitch_type} ({horizontal_change:.2f} feet)")

# Set the axis labels and title
plt.xlabel('Horizontal Distance from Relase to Where the Ball Crosses the Plate')
plt.ylabel('Distance between Pitching Mount and Home Plate (feet)')
plt.title('Top/Down View - Change in Horizontal Distance by Pitch Type')

# Set the x-axis limit and ticks
plt.xlim(-3, 3)
plt.xticks(np.arange(-3.5, 3.5, 0.5), fontsize = 9)
plt.ylim(-2, 65)
plt.yticks(np.arange(0, 65, 10), fontsize = 9)

# Add a legend to the plot
plt.legend()


# Show the plot
plt.show()

Pitch Probabilities This chart shows Max’s pitch preference based on the pre-pitch count and the bater. When Scherzer got into favorable counts vs. righties he elected to blow them away with is slider, while favoring the sinker vs. lefties in similar counts.

#Group the pitches by count and batter stance, and calculate the percentage of pitches in each category
pitch_counts = max_scherzer_pitches.groupby(['balls', 'strikes', 'stand'])['pitch_type'].value_counts(normalize=True).rename('percentage').reset_index()

# Pivot the data to create a stacked bar chart
pivot_table = pitch_counts.pivot_table(index=['balls', 'strikes', 'stand'], columns='pitch_type', values='percentage')

# Create the stacked bar chart
fig, ax = plt.subplots(figsize=(10, 8))
pivot_table.plot(kind='barh', stacked=True, ax=ax)

# Set the axis labels and title
ax.set_xlabel('Percentage of Pitches')
ax.set_ylabel('Balls-Strikes-Batter Position')
ax.set_title('Probability of Next Pitch by Count and Batter Position (R=RHB L=LHB)')

# Show the plot
plt.show()

Mean Pitch Release Speed (MPH) This distribution futher illustrates why hitters struggled so mightily vs. Scherzer. His pitch repititoire include a 95 mph fastball and mid-nineties sinker. Both pitches look identical coming of his hand but have different movement. Scherzers incredible consistency in delivery and velocity leaves hitters guesssing.

# Set the style and context of the plot
sns.set_style('white')
sns.set_context('paper', font_scale=1.5)

# Create a facet grid of distribution plots by pitch name
g = sns.FacetGrid(max_scherzer_pitches, col='pitch_name', col_wrap=3, height=4, aspect=1.2)
g.map(sns.histplot, 'release_speed', kde=True, edgecolor='white')

# Set the title and axis labels

g.set_titles('{col_name}')

g.set_xlabels('Release Speed (mph)')

g.set_ylabels('Count')

plt.show()