DS736 Module 1

Data and Findings

While the total number of free throws taken each season fluctuates and overall saw a decrease over this decade, the percent made stays about the same, near 75%. This number also remained consistent when broken across the different quarters of a game.

Important details to note re: this data:

2011-2012 included a lockout that forced the cancellation of several regulation season games
Seattle (SEA) moved to Oklahoma City (OKC) beginning with the 2008-2009 season
New Jersey (NJ) moved to Brooklyn (BKN) beginning with the 2012-2013 season

Tab 1

A Look at Free Throw Totals

In this dashboard, I focused on the overall number of free throws. The first plot shows the total number of free throws made and free throws missed per season. The second plot shows the percentage of free throws made and missed per quarter

The first plot shows that in the first half of the decade, more free throws were taken each season. Following that lockout, there was a decrease overall. Perhaps there was a rule change that came out of the lockout negotiations? But while the total number was down, the percentage mae remained about the same each season.

The second plot shows that the percentage made per quarter appears to be close to the same, suggesting increased pressure of the final quarter or overtime do not affect the overall free throw numbers.

# a single file
xfun::embed_file("C:/Users/phill/OneDrive/Documents/Loyola/Data_Visualization/R/R_datafiles/FT-Dashboard.Rmd")

Download FT-Dashboard.Rmd

#-----------Total Free Throws by Season--------------

ft_shots <- data.frame(count(df1, season))

makes <- c(1)
misses <- c(0)

ft_taken <- data.frame(df1$season, df1$shot_made)
made <- ft_taken[ft_taken$df1.shot_made %in% makes,]

missed <- ft_taken[ft_taken$df1.shot_made %in% misses,]

madeFT_season <- data.frame(count(made, df1.season))
colnames(madeFT_season)[1] = "season"
colnames(madeFT_season)[2] = "count"
madeFT_season$type <- "Made"

missedFT_season <- data.frame(count(missed, df1.season))
colnames(missedFT_season)[1] = "season"
colnames(missedFT_season)[2] = "count"
missedFT_season$type <- "Missed"

ft_df <- rbind(madeFT_season, missedFT_season)

made_miss_totals <- ggplot(ft_df, aes(x=season, y=count, fill = type, label = count)) +
  geom_bar(stat="identity", position = position_stack(reverse = TRUE)) +
  coord_flip() + 
  labs(title="Total Free Throws by Season (2006-2016)", x = "Season", y = "Free Throws Taken") +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(labels=comma) +
  geom_text(size = 4.5, position = position_stack(reverse = TRUE, 0.5), label = scales::comma(ft_df$count))
made_miss_totals

#------------------- Makes/Misses by Quarter-------------------------------


ft_shots__period <- data.frame(count(df1, period))

makes <- c(1)
misses <- c(0)

ft_taken_period <- data.frame(df1$period, df1$shot_made)
made_period <- ft_taken_period[ft_taken_period$df1.shot_made %in% makes,]
missed_period <- ft_taken_period[ft_taken_period$df1.shot_made %in% misses,]

madeFT_period <- data.frame(count(made_period, df1.period))
colnames(madeFT_period)[1] = "Period"
colnames(madeFT_period)[2] = "count"
madeFT_period$type <- "Made"

regulation_periods <- madeFT_period$Period[1:4]

madeFT_regulation <- madeFT_period %>%
  filter(Period %in% regulation_periods) %>%
  select(Period, count, type) %>%
  data.frame()

overtime_periods <- madeFT_period$Period[5:8]

made_OT <- sum(madeFT_period[!madeFT_period$Period %in% regulation_periods, "count"]) 
total_madeFT_period <- rbind(madeFT_regulation, c("OT", made_OT, "Made"))

missedFT_period <- data.frame(count(missed_period, df1.period))
colnames(missedFT_period)[1] = "Period"
colnames(missedFT_period)[2] = "count"
missedFT_period$type <- "Missed"

missedFT_regulation <- missedFT_period %>%
  filter(Period %in% regulation_periods) %>%
  select(Period, count, type) %>%
  data.frame()

missed_OT <- sum(missedFT_period[!missedFT_period$Period %in% regulation_periods, "count"]) 
total_missedFT_period <- rbind(missedFT_regulation, c("OT", missed_OT, "Missed"))

ft_period_df <- rbind(total_madeFT_period, total_missedFT_period)

period_pie <- plot_ly(hole=0.86) %>%
  layout(title="Free Thows by Quarter (2006-2016)") %>%
  add_trace(data = ft_period_df[ft_period_df$Period == "1",],
            labels = ~type,
            values = ~ft_period_df[ft_period_df$Period == "1","count"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Quarter: 1<br>Type:%{label}<br>Percent:%{percent}<br>Shot Count: %{value}<extra></extra>") %>%
  add_trace(data = ft_period_df[ft_period_df$Period == "2",],
            labels = ~type,
            values = ~ft_period_df[ft_period_df$Period == "2","count"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Quarter: 2<br>Type:%{label}<br>Percent:%{percent}<br>Shot Count: %{value}<extra></extra>",
            domain = list(
              x = c(0.08, 0.92), 
              y = c(0.08, 0.92))) %>%
  add_trace(data = ft_period_df[ft_period_df$Period == "3",],
            labels = ~type,
            values = ~ft_period_df[ft_period_df$Period == "3","count"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Quarter: 3<br>Type:%{label}<br>Percent:%{percent}<br>Shot Count: %{value}<extra></extra>",
            domain = list(
              x = c(0.15, 0.85), 
              y = c(0.15, 0.85))) %>%
  add_trace(data = ft_period_df[ft_period_df$Period == "4",],
            labels = ~type,
            values = ~ft_period_df[ft_period_df$Period == "4","count"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Quarter: 4<br>Type:%{label}<br>Percent:%{percent}<br>Shot Count: %{value}<extra></extra>",
            domain = list(
              x = c(0.21, 0.79), 
              y = c(0.21, 0.79))) %>%
  add_trace(data = ft_period_df[ft_period_df$Period == "OT",],
            labels = ~type,
            values = ~ft_period_df[ft_period_df$Period == "OT","count"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Quarter: OT<br>Type:%{label}<br>Percent:%{percent}<br>Shot Count: %{value}<extra></extra>",
            domain = list(
              x = c(0.26, 0.74), 
              y = c(0.26, 0.74)))
period_pie

Tab 2

Home Court - Free Throw Heatmap on Home Court by Year

In this plot, I considered the number of free throws made on each home court per season. When college basketball teams play in those larger arenas, commentators always discuss how the depth perception can affect shots, so I wanted to see if there was any noticeable difference between free throws made and the court. There are three cities which seems to consistly have the largest number of free throws made between 2006-2007 and 2009-2010: Utah Jazz, Los Angeles Lakers, and the Boston Celtics. Interestingly, after the lockout in 2011-2012, there was a decrease in the number of free throws made as no team broke 2000 free throws made after that point, which mirrors the same story told by the chart on the previous tab.

df1_city <- separate(df1, game, c("Home", "Away"), sep=" - ", remove = TRUE)

city_ft <- df1_city %>% 
  select(season, Home, shot_made) %>%
  group_by(Home, season) %>%
  summarize(sum(shot_made), .groups = 'keep') %>%
  data.frame()

city_ft2 <- city_ft[-c(85, 86, 87, 88, 89, 296),] 
colnames(city_ft2)[3] = "FT_made"

breaks <- c(seq(0, max(city_ft2$FT_made), by=100))

city_heatmap <- ggplot(city_ft2, aes(x = Home, y = season, fill=FT_made)) +
  geom_tile(color="black") +
  geom_text(aes(label=comma(FT_made))) +
  coord_equal(ratio=1) +
  labs(title="Heatmap: Free Throws Made on Home Court by Season",
       y = "Season",
       x = "Home Court",
       fill = "Free Throws Made") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5)) +
  scale_y_discrete(limits = rev(levels(city_ft2$season))) +
  scale_fill_continuous(low="white", high="orange", breaks=breaks) +
  guides(fill = guide_legend(reverse = TRUE, override.aes = list(colour="black")))
city_heatmap

Tab 3

Top Five Percentage Free Throw Shooters

For this plot, I considered each NBA player in this data set, subset the data to look at players with at least 1000 free throws taken (assumption that they would have average at least 100 free throws each season). From there I sorted the players by their free throw percentage and found those with the five highest. Comparing these 5, there are a couple of seasons where there is a more dramatic dip or increase in their percentage, but for the most part across the decade, these players are between 88%-93%.

player_shots <- data.frame(df1$player, df1$shot_made)
colnames(player_shots)[1] = "player"
colnames(player_shots)[2] = "shots_made"

player_shots_decade <- df1 %>% 
  select(player, shot_made) %>%
  group_by(player) %>%
  summarise(shots_taken = length(player), .groups = 'keep') %>%
  data.frame()

player_made_decade <- df1 %>% 
  select(season, player, shot_made) %>%
  group_by(player) %>%
  summarise(shots_made = sum(shot_made), .groups = 'keep') %>%
  data.frame()

player_decade_totals <- merge(x=player_shots_decade, y=player_made_decade, by="player",all=TRUE)
player_decade_totals$percentage <- round(player_decade_totals$shots_made/player_decade_totals$shots_taken*100, 2)

#players over 1000 free throws taken
player_over1000 <- player_decade_totals[which(player_decade_totals$shots_taken > 1000),]

#top 5 percentage shoots
player_top5_percent <- player_over1000[order(player_over1000$percentage, decreasing = TRUE),]
player_top5_percent <- player_top5_percent[1:5,]

#top 5 names
top5 <- player_top5_percent$player

player_df <- df1 %>% 
  filter(player %in% top5) %>%
  select(season, player, shot_made) %>%
  group_by(player, season) %>%
  summarise(shots_taken = length(player), .groups = 'keep') %>%
  data.frame()

player_df_made <- df1 %>% 
  filter(player %in% top5) %>%
  select(season, player, shot_made) %>%
  group_by(player, season) %>%
  summarise(shots_made = sum(shot_made), .groups = 'keep') %>%
  data.frame()

top_5_decade <- player_df %>% inner_join(player_df_made, by=c("player","season"))

top_5_decade$percentage <- round(top_5_decade$shots_made/top_5_decade$shots_taken, 2)


top_5_ft_decade <- ggplot(top_5_decade, aes(x = season, y = percentage, group=player)) +
  geom_line(aes(color=player), size=2) +
  labs(title = "Top 5 Free Throw Shooters by % at Least 1000 Attempts (2006-2016)",
       x = "Season", y = "Percentage") +
  theme_light() +
  theme(plot.title = element_text(hjust=0.5)) +
  geom_point(shape=21, size=3, color="black", fill="white") +
  scale_color_brewer(palette = "Dark2", name = "Year") +
  scale_y_continuous(labels = scales::percent)
top_5_ft_decade

Tab 4

Dirk Nowitzki’s Percentages of Makes and Misses

From the data in the previous data set, I saw that Dirk Nowitzki took the most free throws out of the group and was the only one to play each of the 10 seasons. From these pie charts showing his free throw percentage by season, he was normally around 89%, but in his best FT-shooting season he almost made 92% and in a down year, he was closer to 86%.

df1$shot_type <- ifelse(df1$shot_made==1, "Made", "Missed")

player_df_DN <- df1 %>% 
  filter(player=="Dirk Nowitzki") %>%
  select(season, player, shot_made, shot_type) %>%
  group_by(shot_type, season) %>%
  summarise(shots=length(shot_made), .groups='keep') %>%
  group_by(season) %>%
  mutate(percent_of_total = round(100*shots/sum(shots),1)) %>%
  ungroup() %>%
  data.frame()

player_df_DN$season = factor(player_df_DN$season, levels=c("2006 - 2007", 
                                                           "2007 - 2008", 
                                                           "2008 - 2009",
                                                           "2009 - 2010",
                                                           "2010 - 2011",
                                                           "2011 - 2012",
                                                           "2012 - 2013",
                                                           "2013 - 2014",
                                                           "2014 - 2015",
                                                           "2015 - 2016"))


DN_pie <- ggplot(data=player_df_DN, aes(x="", y=shots, fill=shot_type)) +
  geom_bar(stat="identity", position="fill") +
  coord_polar(theta = "y", start=0) +
  labs(fill = "Made/Miss", x = NULL, y= NULL, 
       title = "Dirk Nowitzki's Free Thows by Season") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank()) +
  facet_wrap(~season, ncol=4, nrow=3) +
  scale_fill_brewer(palette = "PuBu") +
  geom_text(aes(x=1.7, label=paste0(percent_of_total,"%")),
          size=4,
          position=position_fill(vjust=0.5))
DN_pie

DS736 Module 1 - CBrown

Catherine Brown

2023-02-27

NBA Free Throws (2006-2016)

Introduction