# Load necessary libraries
pacman::p_load(pacman, readr, ggplot2, dplyr, scales, knitr, RColorBrewer)

# Read the CSV files
Marijuana_Legal_Status <- read_csv("Marijuana_Legal_Status.csv")
## Rows: 51 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): STATE, MARIJUANA LEGALIZATION STATUS
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
California_Marijuana_Tax_Revenue <- read_csv("California_Marijuana_Tax_Revenue.csv")
## Rows: 6 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): CALIFORNIA STATE MARIJUANA TAX REVENUE
## dbl (1): YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Total_Revenue <- read_csv("Total_Revenue.csv")
## Rows: 20 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): STATE, TOTAL REVENUE COLLECTED
## dbl (1): FIRST YEAR OF TAX COLLECTION
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
X2023_Marijuana_Tax_Revenue <- read_csv("2023_Marijuana_Tax_Revenue.csv")
## Rows: 19 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): STATE, 2023 MARIJUANA TAX REVENUE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Bar Plot for 2023 Marijuana Tax Revenue
# Ensure that revenue is numeric
X2023_Marijuana_Tax_Revenue <- X2023_Marijuana_Tax_Revenue %>%
  mutate(`2023 MARIJUANA TAX REVENUE` = as.numeric(gsub("[\\$,]", "", `2023 MARIJUANA TAX REVENUE`)))

ggplot(X2023_Marijuana_Tax_Revenue, aes(x = reorder(STATE, `2023 MARIJUANA TAX REVENUE`), y = `2023 MARIJUANA TAX REVENUE`)) +
  geom_bar(stat = "identity", fill = "#33cc33") +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1)) +
  labs(title = "USA: 2023 Marijuana Tax Revenue by State",
       x = "State",
       y = "Revenue ($)") +
  theme() +
  theme(legend.position = "none") # Remove legend

# Line Plot for California Marijuana Tax Revenue Over the Years
# Rename columns and ensure that revenue is numeric
California_Marijuana_Tax_Revenue <- California_Marijuana_Tax_Revenue %>%
  rename(YEAR = `YEAR`, REVENUE = `CALIFORNIA STATE MARIJUANA TAX REVENUE`) %>%
  mutate(REVENUE = as.numeric(gsub("[\\$,]", "", REVENUE)))

ggplot(California_Marijuana_Tax_Revenue, aes(x = YEAR, y = REVENUE)) +
  geom_line(color = "#33cc33", size = 1.5) + # Increase line thickness
  geom_point(color = "#000000") +
  scale_y_continuous(labels = scales::comma) +  # Scale y-axis with commas
  labs(title = "California Marijuana Tax Revenue Over the Years",
       x = "Year",
       y = "Revenue ($)") +
  theme()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Bar Plot for Total Tax Revenue Collected by State
# Rename columns and ensure that revenue is numeric
Total_Revenue <- Total_Revenue %>%
  rename(STATE = `STATE`, TOTAL_REVENUE = `TOTAL REVENUE COLLECTED`, FIRST_YEAR = `FIRST YEAR OF TAX COLLECTION`) %>%
  mutate(TOTAL_REVENUE = as.numeric(gsub("[\\$,]", "", TOTAL_REVENUE)))

# Create a colour palette for the years using RColorBrewer
year_colours <- scale_fill_brewer(palette = "Set3")  # Using 'Set3' palette for distinct colours

ggplot(Total_Revenue, aes(x = reorder(STATE, TOTAL_REVENUE), y = TOTAL_REVENUE, fill = factor(FIRST_YEAR))) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::comma) +
  year_colours +  # Apply the colour palette
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1)) +
  labs(title = "USA: Total Marijuana Tax Revenue Collected by State",
       x = "State",
       y = "Total Revenue ($)",
       fill = "First Year of Tax Collection") +  # Add legend title
  theme() +
  theme(legend.position = "right") # Show legend on the right

# Pie Chart for 2023 Marijuana Tax Revenue Distribution (Top 10 only)
# Rename columns and ensure that revenue is numeric
X2023_Marijuana_Tax_Revenue <- X2023_Marijuana_Tax_Revenue %>%
  rename(STATE = `STATE`, REVENUE_2023 = `2023 MARIJUANA TAX REVENUE`) %>%
  mutate(REVENUE_2023 = as.numeric(gsub("[\\$,]", "", REVENUE_2023))) %>%
  arrange(desc(REVENUE_2023)) %>%
  mutate(PERCENTAGE = REVENUE_2023 / sum(REVENUE_2023) * 100) %>%
  top_n(10, wt = REVENUE_2023) # Select top 10 states

ggplot(X2023_Marijuana_Tax_Revenue, aes(x = "", y = PERCENTAGE, fill = STATE)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  theme_void() +
  labs(title = "USA: 2023 Marijuana Tax Revenue Distribution (Top 10 States)") +
  theme(legend.position = "right") +
  geom_text(aes(label = paste0("$", scales::label_number(scale = 1e-6, suffix = "M")(REVENUE_2023))),
            position = position_stack(vjust = 0.5),
            size = 4) # Add data labels with a dollar sign

# Pie Chart for Total Tax Revenue Collected by State (Top 7 only)
Total_Revenue_Top10 <- Total_Revenue %>%
  arrange(desc(TOTAL_REVENUE)) %>%
  mutate(PERCENTAGE = TOTAL_REVENUE / sum(TOTAL_REVENUE) * 100) %>%
  top_n(7, wt = TOTAL_REVENUE)

ggplot(Total_Revenue_Top10, aes(x = "", y = PERCENTAGE, fill = STATE)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  theme_void() +
  labs(title = "USA: Total Marijuana Tax Revenue Distribution (Top 7 States)") +
  theme(legend.position = "right") +
  geom_text(aes(label = paste0("$", scales::label_number(scale = 1e-6, suffix = "M")(TOTAL_REVENUE))),
            position = position_stack(vjust = 0.5),
            size = 4)

# Create a Table for Marijuana Legalisation Status
# Print the table 
kable(Marijuana_Legal_Status, caption = "Marijuana Legalisation Status by State")
Marijuana Legalisation Status by State
STATE MARIJUANA LEGALIZATION STATUS
Alabama Medical only
Alaska Recreational legalized
Arizona Recreational legalized
Arkansas Medical only
California Recreational legalized
Colorado Recreational legalized
Connecticut Recreational legalized
Delaware Recreational legalized
District of Columbia Recreational legalized
Florida Medical only
Georgia Medical CBD oil only
Hawaii Medical only
Idaho Illegal
Illinois Recreational legalized
Indiana Medical CBD oil only
Iowa Medical CBD oil only
Kansas Illegal
Kentucky Medical only effective Jan. 1, 2025
Louisiana Medical only
Maine Recreational legalized
Maryland Recreational legalized
Massachusetts Recreational legalized
Michigan Recreational legalized
Minnesota Recreational legalized
Mississippi Medical only
Missouri Recreational legalized
Montana Recreational legalized
Nebraska Illegal
Nevada Recreational legalized
New Hampshire Medical only
New Jersey Recreational legalized
New Mexico Recreational legalized
New York Recreational legalized
North Carolina Illegal
North Dakota Medical only
Ohio Medical only
Oklahoma Medical only
Oregon Recreational legalized
Pennsylvania Medical only
Rhode Island Recreational legalized
South Carolina Illegal
South Dakota Medical only
Tennessee Medical CBD oil only
Texas Medical CBD oil only
Utah Medical only
Vermont Recreational legalized
Virginia Recreational legalized
Washington Recreational legalized
West Virginia Medical only
Wisconsin Medical CBD oil only
Wyoming Illegal