Sports Analytics Demo of RMarkdown

#here is my code
library(data.table)
library(dplyr)
library(ggplot2)

Introduction

Here is an intro section. Here you can explain at a high level what you did to determine who is the best kicker.

Description of Project

Talk about your data. Go into detail on what you did to subset your data, what type of results you considered, etc.

Data Visualization

Talk about your visualizations, what they show, and what you conclude. If you need to print stuff out on the screen to show the top kicker(s), talk about that also.

setwd("C:/Users/jason/Desktop/F24 Classes/IS470")

library(ggplot2)
library(data.table)
library(dplyr)
library(scales)
library(tidytext)
library(RColorBrewer)
library(kableExtra)

df <- fread("Data/NFLBDB2022/GamesGithub.csv")

df1 <- df %>%
  select(season, away_team, home_team, away_score, home_score) %>%
  filter(season != 2024) %>%
  mutate( away_team = recode(away_team, "OAK" = "LV", "SD" = "LAC", "STL" = "LA"),
          home_team = recode(home_team, "OAK" = "LV", "SD" = "LAC", "STL" = "LA"),
          winner = ifelse(away_score > home_score, away_team, home_team),
          loser  = ifelse(away_score < home_score, away_team, home_team),
          won_by = ifelse(winner == home_team, home_score - away_score, away_score - home_score)) %>%
  arrange(winner) %>%
  data.frame()

winners <- df1 %>%
  group_by(season, winner) %>%
  summarise( games_won = n(),
             tot_won_by = sum(won_by), .groups = "keep" ) %>%
  arrange(winner, season) %>%
  data.frame()

losers <- df1 %>%
  group_by(season, loser) %>%
  summarise( games_lost = n(),
             tot_lost_by = sum(won_by), .groups = "keep" ) %>%
  arrange(loser, season) %>%
  data.frame()

wins_df <- winners %>%
  inner_join(losers, by = c("season" = "season", "winner" = "loser")) %>%
  group_by(winner) %>%
  summarise( n_wins = sum(games_won),
             n_losses = sum(games_lost),
             point_diff = sum(tot_won_by) - sum(tot_lost_by),
             winrate = n_wins / (n_wins + n_losses) ) %>%
  arrange(-winrate) %>%
  data.frame()

  knitr::kable(head(wins_df, 10), caption = 'Top 10 NFL Teams by Win Rate') %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Top 10 NFL Teams by Win Rate
winner	n_wins	n_losses	point_diff	winrate
NE	304	141	3043	0.6831461
PIT	268	164	1462	0.6203704
GB	265	171	1554	0.6077982
IND	256	174	920	0.5953488
BAL	257	176	1892	0.5935335
PHI	250	184	1287	0.5760369
KC	242	187	1000	0.5641026
SEA	242	189	917	0.5614849
NO	233	189	920	0.5521327
DAL	222	196	635	0.5311005

Visualization 1: A graph that shows …

# vertical bar chart
ggplot(data = wins_df, aes(x = reorder(winner, -n_wins), y = n_wins, fill = winrate)) +
  geom_bar(stat = "identity") +
  labs(x = "Teams", y = "Number of Wins", title = "Win Rates by Team (1999 to 2024)", fill = "Win %") +
  geom_text(aes( label =  label_percent(accuracy = 1L) (winrate)), vjust = -0.5 ) +
  scale_fill_continuous(breaks = seq(.1, 1, 0.2),
                        limits = c(0, 1),
                        labels = paste0(100*seq(.1, 1, 0.2), "%"),
                        low = "red",
                        high = "forestgreen") +
  theme(plot.title = element_text(hjust = 0.50, face="bold", size=18),
        axis.text.x=element_text(size=12),
        axis.text.y=element_text(size=15),
        axis.title=element_text(size=18,face="bold"))

Visualization 2: A graph that shows …

# horizontal bar chart
ggplot(data = wins_df, aes(x = reorder(winner, n_wins), y = n_wins, fill = winrate)) +
  geom_bar(stat = "identity") +
  labs(x = "Teams", y = "Number of Wins", title = "Win Rates by Team (1999 to 2024)", fill = "Win %") +
  coord_flip() +
  geom_text(aes( label =  label_percent(accuracy = 1L) (winrate)), hjust = -0.2 ) +
  scale_fill_continuous(breaks = seq(.1, 1, 0.2),
                        limits = c(0, 1),
                        labels = paste0(100*seq(.1, 1, 0.2), "%"),
                        low = "red",
                        high = "forestgreen") +
  theme(plot.title = element_text(hjust = 0.50, face="bold", size=18),
        axis.text.x=element_text(size=12),
        axis.text.y=element_text(size=15),
        axis.title=element_text(size=18,face="bold"))

Visualization 3: A graph that shows …

wins_yr_df <- winners %>%
  inner_join(losers, by = c("season" = "season", "winner" = "loser")) %>%
  filter(season >= 2021) %>%
  group_by(winner, season) %>%
  summarise( n_wins = sum(games_won),
             n_losses = sum(games_lost),
             point_diff = sum(tot_won_by) - sum(tot_lost_by),
             winrate = n_wins / (n_wins + n_losses) ) %>%
  arrange(season, -winrate) %>%
  data.frame()

ggplot(data = wins_yr_df, aes(x = reorder_within(winner, -n_wins, season), y = n_wins, fill = winrate)) +
  geom_bar(stat = "identity") +
  labs(x = "Teams", y = "Number of Wins", title = "Win Rates by Team by Year", fill = "Win %") +
  geom_text(aes( label =  label_percent(accuracy = 1L) (winrate)), vjust = -0.5 ) +
  scale_fill_continuous(breaks = seq(.1, 1, 0.2),
                        limits = c(0, 1),
                        labels = paste0(100*seq(.1, 1, 0.2), "%"),
                        low = "red",
                        high = "forestgreen") +
  theme(plot.title = element_text(hjust = 0.50)) +
  scale_y_continuous(limits = c(0, 20)) +
  scale_x_reordered() +
  facet_wrap(~season, scales = "free", ncol=1) +
  theme(plot.title = element_text(hjust = 0.50, face="bold", size=18),
        axis.text.x=element_text(size=12),
        axis.text.y=element_text(size=15),
        axis.title=element_text(size=18,face="bold"),
        strip.text = element_text(size = 18))

Visualization 4: A graph that shows …

# create a bump chart
ranking_df <- wins_yr_df %>%
  group_by(season) %>%
  mutate(rank = rank(-winrate, ties.method = "random")) %>%
  data.frame()

cols <- colorRampPalette( brewer.pal(8, "Set2") )
myPal <- cols(32)

ggplot(ranking_df, aes(x = season, y = rank, group = winner)) +
  geom_line(aes(color = winner), linewidth = 2) +
  geom_point(shape = 21, size = 4, fill = "white") +
  scale_y_reverse(breaks = seq(32, 1, -1) ) +
  geom_text(data = ranking_df %>% filter(season == min(ranking_df$season)),
            aes(x = season, y = rank, label = winner),
            size = 3,
            hjust = 1.5) +
  geom_text(data = ranking_df %>% filter(season == max(ranking_df$season)),
            aes(x = season, y = rank, label = winner),
            size = 3,
            hjust = -0.5) +
  scale_x_continuous(breaks = min(ranking_df$season):max(ranking_df$season),
                     labels = as.character(min(ranking_df$season):max(ranking_df$season))) +
  scale_color_manual(values = myPal) +
  labs(x = "Season",
       y = "Ranking",
       title = "Team Ranking by Win Rates over Time",
       colour = "Teams") +
  theme(plot.title = element_text(hjust = 0.50, face="bold", size=18),
        axis.text.x=element_text(size=12),
        axis.text.y=element_text(size=15),
        axis.title=element_text(size=18,face="bold"))

Conclusion

Here are some general takeaways from your project.

Note

You can add captions at the bottom of images. To add a caption, include the words fig.cap=“blah blah” inside the {….} at the top of the RMarkdown code you are using to include the image. You will notice here that I can also add lines of HTML code directly into the text. Here’s something bold, something italics, something underline, and something blue.