# Define recession period for ggplot as dataframe, shows cause 

recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)
k_plot <- df_k_plot %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_k_plot$monthly_spending, na.rm = TRUE),
    label = "  Recession",
    color = "black",
    fontface = "bold",
    size = 4
  ) +
  
  # Lines and actual data points
  geom_line(size = 1, alpha = 0.8) +
  geom_point(size = 2) +
  
  # Labels
  labs(
    title = "K-Shaped Economic Recovery",
    x = "Date",
    y = "Daily Spending Index",
    color = "Income Group",
    caption  = "Source: Affinity, Economic COVID Tracker"
  ) +
  
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() + scale_colour_economist() +
  theme(
    axis.text.x = element_text(angle = 90, size = 10),
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "bottom",
    legend.key.size = unit(0.7,"line"), 
    legend.title = element_text(size = 10), 
    legend.text = element_text(size = 8) 
    )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(k_plot)

save(k_plot, file = "k_plot.RData")
recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)

interactive_daily_spending <- df_k_plot %>%
  ggplot( aes(date, monthly_spending, color=income_group)) +
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_k_plot$monthly_spending, na.rm = TRUE) * 0.95,
    label = "Recession",
    color = "black",
    fontface = "italic",
    size = 4
  ) +
  
  # Lines and actual data points
  geom_line(size = 1, alpha = 0.8) +
  geom_point(size = 2) +
  
  # Labels
  labs(
    title = "K-Shaped Economic Recovery",
    subtitle = "Monthly Spending",
    x = "Date",
    y = "Daily Spending Index",
    color = "Income Group",
    caption  = "Source: Affinity, Economic COVID Tracker"
  ) +
  
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +

  # Theme
  theme_economist() + scale_colour_economist() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "top"
  )
ggplotly(interactive_daily_spending)
##fix formatting?

ggplotly(interactive_daily_spending) %>%
  layout(legend = list(orientation = "h",   
                       x = 0.5,             
                       xanchor = "center", 
                       y = -0.3))       
interactive_spend_all <- plot_spend_all %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Lines and actual data points
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 2) +
  
  # Labels
  labs(
    title = "K-Shaped Economy: All Spending",
    x = "Date",
    y = "All Spending Index",
    color = "Income Group",
     caption = "Source: Affinity, Economic COVID Tracker"
  ) +
  
  # x-axis formatting
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
coord_cartesian(
    ylim = c(min(plot_spend_all$monthly_spending, na.rm = TRUE), max(plot_spend_all$monthly_spending, na.rm = TRUE))
  ) +
  
  # Theme
  theme_economist() + scale_colour_economist() + 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "bottom",
    legend.text = element_text()
  )

ggplotly(interactive_spend_all) %>%
  layout(legend = list(orientation = "h",
                       x = .5,  # center
                       xanchor = "center", 
                       y = -0.3))  
## Employment download
employment_data_file <- list.files(
  data_path, 
  pattern = "employment_nat_weekly.csv", 
  full.names = TRUE
)


# Load the data
employment_data <- read_csv(
  employment_data_file, 
  na = c("NA", "*", "", "."),
  show_col_types = FALSE
)

# Select the date and ALL FOUR quartile columns
date_emp <- employment_data %>%
# 1. 
  select(
    year, 
    month, 
    Q1_Low_Emp_Wage = emp_incq1,
    Q2_Low_Emp_Middle_Wage = emp_incq2,
    Q3_High_Emp_Middle_Wage = emp_incq3,
    Q4_High_Emp_Wage = emp_incq4
  ) %>%
  
  # 2. convert format
  pivot_longer(
    cols = starts_with("Q"),
    names_to = "wage_group",
    values_to = "employment_index"
  ) %>%
  
  # 3. Group by Year and Month to calculate monthly averages
  group_by(year, month, wage_group) %>%
  summarise(
    avg_employment_index = mean(employment_index, na.rm = TRUE),
    .groups = 'drop' 
  ) %>%
  
  # 4. Create the date column 
  mutate(
    date_monthly = ymd(paste(year, month, 1, sep = '-')),
    avg_employment_index = avg_employment_index * 1000  # Scale 
  ) %>%
  filter(!is.na(avg_employment_index))


# plot

employment_full_plot <- ggplot(
  date_emp, 
  aes(x = date_monthly, y = avg_employment_index, color = wage_group)
) +
   geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  geom_line(linewidth = 1, alpha = 0.5) +
    geom_point(size = 1) +
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +

  
  labs(
    title = "Monthly Employment Recovery By Wage Quartile",
    x = "Date",
    y = "Employment Level Loss",
    color = "Wage Group",
     caption = "Source: Affinity Income Shares and Daily Total Spending"
  ) +
  
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  theme_economist() + scale_colour_economist() +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    legend.text = element_text(size = 6)
  )

##Print plot
print(employment_full_plot)

ggplotly(employment_full_plot) %>%
  layout(legend = list(orientation = "h",
                       x = .5,  # center
                       xanchor = "center", 
                       y = -0.3))
# Define recession period for ggplot as dataframe, shows cause 

recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)

df_plot_bus_vs_rev <- small_business_raw %>%
  # 1. Create neew date column
  mutate(monthly_date = as.Date(paste(year, month, "01", sep = "-"))) %>% 

  # 2. Select columns
  select(
    monthly_date, year, month, 
    starts_with("merchants_"), 
    starts_with("revenue_")
  ) %>%
  # 3. Group and Summarize 
  group_by(year, month) %>%
  summarise(
    # Apply mean() to ALL columns starting with 'merchants_' AND 'revenue_'
    across(
        c(starts_with("merchants_"), starts_with("revenue_")),
        ~mean(.x, na.rm = TRUE),
        .names = "{.col}" # Keep original column names for now
    ),
    # Capture the unique date key
    date_key = first(monthly_date),
    .groups = "drop"
  ) %>%
  
  # 4. Convert to long format for ggplot (Dynamic Pivot)
  pivot_longer(
    # Exclude the date/grouping columns
    cols = c(starts_with("merchants_"), starts_with("revenue_")), 
    names_to = "indicator_category",
    values_to = "monthly_index_value"
  ) %>%
  
  # 5. Create final clean variables (Metric Type and Category Label)
  mutate(
    # Separate the Metric (Merchants/Revenue) from the Industry (retail, health, etc.)
    Metric_Type = if_else(
        str_detect(indicator_category, "^merchants"),
        "A_Merchants Open", 
        "B_Revenue Change"
    ),
    # Clean up Industry Names for the chart labels
    Industry_Group = indicator_category %>% 
        str_remove("merchants_|revenue_") %>% 
        str_replace_all("_", " ") %>% 
        str_to_title()
  ) %>%
  # Final select to keep only necessary columns for the plot function
  select(date = date_key, monthly_index_value, Metric_Type, Industry_Group) %>%
  filter(!is.na(monthly_index_value))
df_revenue <- df_plot_bus_vs_rev %>%
  filter(Metric_Type == "B_Revenue Change")

df_revenue_interactive <- df_revenue %>%
  mutate(isAll = (Industry_Group == "All")) %>%   #add column that makes All highlighted
  ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
  
    # 1. Recession Shade
  
    geom_rect(
      data = recession_df,
      inherit.aes = FALSE,
      aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
      fill = "grey10",
      alpha = 0.2
    ) +
    
    # 2. Add the baseline (0% change)
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    
    geom_line(size = 1, alpha = 0.8, linetype = "dashed") +

    geom_line(
      data = filter(df_revenue, Industry_Group == "All"),
      aes(color = Industry_Group), 
      size = 1, 
      alpha = 0.8, 
      linetype = "solid"
    ) +
    # 4. Labels and Titles
    labs(
      title = "Small Business Revenue Change",
      subtitle = "Percentage Change in Net Revenue vs. Baseline (Jan 2020)",
      x = "Date",
      y = "Revenue % Change",
      color = "Industry",
      caption = "Source: Womply – Small Business Revenue and Data (2020–2023)"
    ) +
    
    # 5. Formatting
    scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
    scale_linetype_manual(values = c("dashed", "solid"), guide = "none")  +

      theme_economist() + scale_colour_economist() +
      theme(
      axis.text.x = element_text(angle = 90, hjust = 1),
      axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
      plot.title = element_text(face = "bold", size = 16),
      legend.position = "bottom",
      legend.text = element_text(size = 10),
      legend.title = element_text(size = 10)
      )

## Interactive

ggplotly(df_revenue_interactive)  %>%
  layout(legend = list(orientation = "h",   
                       x = 0.5,            
                       xanchor = "center", 
                       y = -0.35))  
df_business <- df_plot_bus_vs_rev %>%
  filter(Metric_Type == "A_Merchants Open")

df_business_interactive <- df_business %>%
  mutate(isAll = (Industry_Group == "All")) %>%   
  ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
  
    # 1. Shade the recession period
    geom_rect(
      data = recession_df,
      inherit.aes = FALSE,
      aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
      fill = "grey10",
      alpha = 0.2) +
    
    # 2. Add the baseline (0% change)
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    
    # 3. Lines show the continuous trend
    geom_line(
        data = filter(df_business, Industry_Group != "All"), 
        size = 1, 
        alpha = 0.8, 
        linetype = "dashed" # Set linetype outside of aes()
    ) +    
    geom_line(
        data = filter(df_business, Industry_Group == "All"), 
        size = 1, 
        alpha = 0.8, 
        linetype = "solid"
    ) +
    geom_point(size = 1.5) +
    
    # 4. Labels and Titles
    labs(
      title = "Businesses Opened",
      subtitle = "% Change v.s. Baseline (Jan 2020)",
      x = "Date",
      y = "% Change in Businesses Opened",
      color = "Industry",
      caption = "Source: Womply, Economic COVID Tracker"
    ) +
    
    # 5. Formatting
    scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
    scale_linetype_manual(values = c("dashed", "solid"), guide = "none")  +

    theme_economist() + scale_colour_economist() +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1),
      axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
      legend.position = "bottom",
      legend.text = element_text(size = 10),
      legend.title = element_text(size = 10)
    )
ggplotly(df_business_interactive) %>%
  layout(legend = list(orientation = "h", x = 0.5, xanchor = "center", y = -0.4))       
theme_set(theme_economist())

# Define a custom, academic-style theme
theme_oi_kshape <- theme(
  # Titles and Text
  plot.title = element_text(face = "bold", size = 18, hjust = 0), # Left-aligned, bold title
  plot.subtitle = element_text(size = 13, hjust = 0),              # Subtitle below title
  plot.caption = element_text(size = 10, color = "gray50", hjust = 1), # Source text
  
  # Axis Lines and Text
  panel.grid.major.x = element_blank(),
  panel.grid.minor.y = element_blank(), 
  panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"), 
  axis.line.x = element_line(color = "black", linewidth = 0.5), 
  axis.text.x = element_text(size = 11, color = "black"),
  axis.text.y = element_text(size = 11, color = "black"),
  
  # Facet Titles (if using facets)
  strip.text = element_text(face = "bold", size = 12, hjust = 0), 
  panel.spacing = unit(1, "lines"), 
  
  # Legend
  legend.position = "bottom",
  legend.title = element_text(face = "bold"),
  legend.text = element_text(size = 10)
)
recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)
df_industry_jobs %>%
ggplot(aes(x = date, y = job_posting_change, color = industry)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_industry_jobs$job_posting_change, na.rm = TRUE),
    label = "Recession",
    color = "black",
    fontface = "italic",
    size = 4
  ) +
  
  
  # Lines and actual data points
  
  geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +

  geom_line(size = 1, alpha = 0.8)  +
  geom_point(size = 1) +
  
  # Labels
  labs(
    title = "Job Postings After Covid by Industry",
    subtitle = "Baseline = January 2020",
    x = "Date",
    y = "Job Posting% Change",
    color = "industry",
    caption = "Source: LightCast, Economic COVID Tracker 2020"
  ) +
  
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() + scale_color_viridis(discrete = TRUE, option = "C") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),  
    legend.position = "bottom",
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 10)
  )

df_skill_jobs %>%
ggplot(aes(x = date, y = posting, color = Skills)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_skill_jobs$posting, na.rm = TRUE),
    label = "Recession",
    color = "black",
    fontface = "italic",
    size = 4
  ) +
  
  # Lines and actual data points
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  
      geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 1) +

  
  # Labels
  labs(
    title = "Job Postings After Covid By Skill Level",
    subtitle = "Baseline = January 2020",
    x = "Month",
    y = "Job Posting % Change",
    color = "Industry",
    caption = "Source: LightCast, Economic COVID Tracker 2020"
 ) +
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "bottom"
    )