For this assignment, I will use dplyr, stringr, and ggplot2 packages from the tidyverse library. The dataset used was obtained from Kaggle, detailing the amount of money American sports owners have contributed to political campaigns and organizations during the 2016, 2018 and 2020 election cycles.

df <- read.csv("https://raw.githubusercontent.com/moham6839/SPRING2023TIDYVERSE/main/sports-political-donations.csv", check.names = FALSE)
head(df)
##         Owner         Team League                    Recipient  Amount
## 1 Adam Silver Commissioner    NBA                  WRIGHT 2016 $4,000 
## 2 Adam Silver Commissioner    NBA          BIDEN FOR PRESIDENT $2,800 
## 3 Adam Silver Commissioner    NBA                    CORY 2020 $2,700 
## 4 Adam Silver Commissioner    NBA Kamala Harris for the People $2,700 
## 5 Adam Silver Commissioner    NBA              Win The Era PAC $2,700 
## 6 Adam Silver Commissioner    NBA            KOHL FOR CONGRESS $2,000 
##   Election Year    Party
## 1          2016 Democrat
## 2          2020 Democrat
## 3          2020 Democrat
## 4          2020 Democrat
## 5          2020 Democrat
## 6          2018 Democrat
glimpse(df)
## Rows: 2,798
## Columns: 7
## $ Owner           <chr> "Adam Silver", "Adam Silver", "Adam Silver", "Adam Sil…
## $ Team            <chr> "Commissioner", "Commissioner", "Commissioner", "Commi…
## $ League          <chr> "NBA", "NBA", "NBA", "NBA", "NBA", "NBA", "NBA", "NBA"…
## $ Recipient       <chr> "WRIGHT 2016", "BIDEN FOR PRESIDENT", "CORY 2020", "Ka…
## $ Amount          <chr> "$4,000 ", "$2,800 ", "$2,700 ", "$2,700 ", "$2,700 ",…
## $ `Election Year` <int> 2016, 2020, 2020, 2020, 2020, 2018, 2018, 2018, 2018, …
## $ Party           <chr> "Democrat", "Democrat", "Democrat", "Democrat", "Democ…
df$Amount <- str_replace_all(df$Amount, "\\$", "")
df$Amount <- str_replace_all(df$Amount, ",", "")
df$Amount <- as.integer(df$Amount)

Which Political Party received the most contributors?

new_df <- df %>%
  filter(`Party` == "Republican" | `Party` == "Democrat") %>%
  dplyr::count(Party) %>%
  group_by(Party) %>%
  arrange(desc(n))
new_df
## # A tibble: 2 × 2
## # Groups:   Party [2]
##   Party          n
##   <chr>      <int>
## 1 Republican  1625
## 2 Democrat     921
ggplot(new_df, aes(x=`n`, y=`Party`)) +
  geom_bar(stat="identity", position="dodge", width=0.7) + 
  labs(title="Number of Contributors for Each Political Party",
       x="Number of Contributors",
       y="Political Party") 

Which sports owner donated the most?

owner_df <- df %>%
  filter(Team != "Commissioner") %>%
  group_by(Owner, League) %>%
  dplyr::summarise(Amount = sum(Amount)) %>%
  arrange(desc(Amount)) 
DT::datatable(owner_df)
ggplot(owner_df[tail(order(owner_df$Amount), 10), ], aes(x=`Amount`, y=`Owner`, fill=`League`)) +
  geom_bar(stat="identity", position="dodge", width=0.7) + 
  labs(title="American Sports Owners That Politically Contributed the Most Money",
       x="Name of Sports Owners",
       y="Amount Contributed (in millions $)") 

Which Leagues provided the most political donations?

league_df <- df %>%
  group_by(League) %>%
  dplyr::summarise(Amount = sum(Amount)) %>%
  arrange(desc(Amount))
DT::datatable(league_df)
ggplot(league_df, aes(x=`Amount`, y=`League`)) +
  geom_bar(stat="identity", position="dodge", width=0.7) + 
  labs(title="Number of Contributors for Each Political Party",
       x="Amount Contributed (in millions $)",
       y="Professional American Sports Leagues") 

Which recipient received the most political donations?

recipient_df <- df %>%
  group_by(Recipient, Amount) %>%
  dplyr::summarise(Amount = sum(Amount)) %>%
  #dplyr::count(Recipient) %>% 
  arrange(desc(Amount)) 
DT::datatable(recipient_df)
#setDT(recipient_df) [order(-n), .SD[1:10]]
ggplot(recipient_df[tail(order(recipient_df$Amount), 10), ], aes(x=`Amount`, y=`Recipient`)) +
  geom_bar(stat="identity", position="dodge", width=0.7) + 
  labs(title="Top 10 Recipients That Received the Most Money",
       x="Amount Contributed (in millions $)",
       y="Recipients") 

Which Political Party received the most amount of money in each Election Year?

year_df <- df %>%
  filter(Party == "Republican" | Party == "Democrat") %>%
  group_by(`Election Year`, `Party`) %>%
  dplyr::summarise(Amount = sum(Amount)) %>%
  arrange(desc(Amount))
year_df
## # A tibble: 6 × 3
## # Groups:   Election Year [3]
##   `Election Year` Party        Amount
##             <int> <chr>         <int>
## 1            2016 Republican 12940514
## 2            2018 Republican 11282570
## 3            2020 Republican 10022932
## 4            2018 Democrat    4174212
## 5            2016 Democrat    4065094
## 6            2020 Democrat    1874333
ggplot(year_df, aes(`Election Year`, `Amount`, fill=`Party`)) +
  geom_bar(stat="identity", position="dodge", width=0.7) +
  labs(title="Total Amount Contributed to Each Political Party",
       x="Amount Contributed (in millions $)",
       y="Election Year")