# 1.  folder path (Confirmed working: "~/Desktop/projects/kshape")
data_path <- "~/Desktop/projects/kshape data"


# 2. ## Affinity Daily Total Spending - National.csv

nat_spending_total <- list.files(
  data_path, 
  pattern = "Affinity Daily Total Spending - National\\.csv", 
  full.names = TRUE
)



print(paste("Using data file:", basename(nat_spending_total)))
## [1] "Using data file: Affinity Daily Total Spending - National.csv"
# 3. Load target file
## This file uses different column names than the weekly version.
spending_data_raw <- read_csv(
  nat_spending_total, 
  na = c("NA", "*", "", "."),
  show_col_types = FALSE
)
df_k_plot <- spending_data_raw %>%
  # Select only the relevant columns FOR DAILY SPEND NOT ALL
  select(year, month, Low_Income = daily_spend_19_q1, Low_Middle_Income = daily_spend_19_q2, High_Middle_Income = daily_spend_19_q3, High_Income = daily_spend_19_q4) %>%
  
  # Group by year and month to get monthly average spending
  group_by(year, month) %>%
  summarise(
    Low_Income = mean(Low_Income, na.rm = TRUE),
    Low_Middle_Income = mean(Low_Middle_Income, na.rm = TRUE),
    High_Middle_Income = mean(High_Middle_Income, na.rm = TRUE),
    High_Income = mean(High_Income, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  
  # Convert to long format for ggplot
  pivot_longer(
    cols = c(Low_Income, Low_Middle_Income, High_Middle_Income, High_Income),
    names_to = "income_group",
    values_to = "monthly_spending"
  ) %>%
  
  #proper date (use first day of month for plotting)
  mutate(date = as.Date(paste(year, month, "01", sep = "-"))) %>%
  
  filter(!is.na(monthly_spending))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Define recession period for ggplot as dataframe, shows cause/start

recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)
k_plot <- df_k_plot %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_k_plot$monthly_spending, na.rm = TRUE),
    label = "  Recession",
    color = "black",
    fontface = "bold",
    size = 4
  ) +
  
  # Lines and actual data points
  geom_line(size = 1, alpha = 0.8) +
  geom_point(size = 2) +
  
  # Labels
  labs(
    title = "K-Shaped Economic Recovery",
    x = "Date",
    y = "Daily Spending Index",
    color = "Income Group",
    caption  = "Source: Affinity, Economic COVID Tracker"
  ) +
  
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() + scale_colour_economist() +
  theme(
    axis.text.x = element_text(angle = 90, size = 10),
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "bottom",
    legend.key.size = unit(0.7,"line"), 
    legend.title = element_text(size = 10), 
    legend.text = element_text(size = 8) 
    )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(k_plot)

save(k_plot, file = "k_plot.RData")
# 2. 
## Affinity  Total Spending MONTHLY - National.csv

all_nat_spending <- list.files(
  data_path, 
  pattern = "Affinity - National - Monthly", 
  full.names = TRUE
)

# 3. Load file
## uses different column names than the weekly version.
all_nat_spending_raw <- read_csv(
  all_nat_spending, 
  na = c("NA", "*", "", "."),   
  show_col_types = FALSE
)
plot_spend_all <- all_nat_spending_raw %>%
  # Relevant Columns
  select(year, month, Low_Income = spend_all_q1, Low_Middle_Income = spend_all_q2, High_Middle_Income = spend_all_q3, High_Income = spend_all_q4) %>%
  
  # Group by year and month to get monthly average spending
  group_by(year, month) %>%
  summarise(
    Low_Income = mean(Low_Income, na.rm = TRUE),
    Low_Middle_Income = mean(Low_Middle_Income, na.rm = TRUE),
    High_Middle_Income = mean(High_Middle_Income, na.rm = TRUE),
    High_Income = mean(High_Income, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  
  # Convert to long format for ggplot
  pivot_longer(
    cols = c(Low_Income, Low_Middle_Income, High_Middle_Income, High_Income),
    names_to = "income_group",
    values_to = "monthly_spending"
  ) %>%
  
  #proper date (use first day of month for plotting)
  mutate(date = as.Date(paste(year, month, "01", sep = "-"))) %>%
  
  filter(!is.na(monthly_spending))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
interactive_spend_all <- plot_spend_all %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  
  # Lines and actual data points
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 1.5) +
  
  # Labels
  labs(
    title = "K-Shaped Economy: All Spending",
    x = "Date",
    y = "All Spending Index",
    color = "Income Group",
     caption = "Source: Affinity, Economic COVID Tracker"
  ) +
  
  # x-axis formatting
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
coord_cartesian(
    ylim = c(min(plot_spend_all$monthly_spending, na.rm = TRUE), max(plot_spend_all$monthly_spending, na.rm = TRUE))
  ) +
  
  # Theme
  theme_economist() + scale_colour_economist() + 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "bottom",
    legend.text = element_text()
  )


ggplotly_spend_all <- interactive_spend_all +
  geom_hline(yintercept = 0, color = "black", size = 0.3)

plotly_spend_final <- ggplotly_spend_all %>%
  ggplotly() %>%
  layout(
    legend = list(
      orientation = "h",
      x = 0.5,
      xanchor = "center",
      y = -.35
    ),
    annotations = list(
      list(
        text = "Source: Intuit, Economic COVID Tracker",
        xref = "paper",
        yref = "paper",
        x = 1,    
        y = -.6,          
        xanchor = "right",
        yanchor = "bottom",
        showarrow = FALSE,
        font = list(size = 8),
        align = "right"
      )))

  
plotly_spend_final
## Employment download
employment_data_file <- list.files(
  data_path, 
  pattern = "employment_nat_weekly.csv", 
  full.names = TRUE
)


# Load data
employment_data <- read_csv(
  employment_data_file, 
  na = c("NA", "*", "", "."),
  show_col_types = FALSE
)

# Select the date and ALL FOUR quartile columns
date_emp <- employment_data %>%
# 1. 
  select(
    year, 
    month, 
    Q1_Low_Emp_Wage = emp_incq1,
    Q2_Low_Emp_Middle_Wage = emp_incq2,
    Q3_High_Emp_Middle_Wage = emp_incq3,
    Q4_High_Emp_Wage = emp_incq4
  ) %>%
  
  # 2. convert format
  pivot_longer(
    cols = starts_with("Q"),
    names_to = "wage_group",
    values_to = "employment_index"
  ) %>%
  
  # 3. Group by Year and Month to calculate monthly averages
  group_by(year, month, wage_group) %>%
  summarise(
    avg_employment_index = mean(employment_index, na.rm = TRUE),
    .groups = 'drop' 
  ) %>%
  
  # 4. Create the date column 
  mutate(
    date_monthly = ymd(paste(year, month, 1, sep = '-')),
    avg_employment_index = avg_employment_index * 1000  # Scale 
  ) %>%
  filter(!is.na(avg_employment_index))


# plot

employment_full_plot <- ggplot(
  date_emp, 
  aes(x = date_monthly, y = avg_employment_index, color = wage_group)
) +
   geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.3
  ) +
  geom_line(linewidth = 1, alpha = 0.8) +
  geom_point(size = 1) +
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +

  
  labs(
    title = "Monthly Employment Recovery By Wage Quartile",
    x = "Date",
    y = "Employment Level Loss",
    color = "Wage Group",
     caption = "Source: Affinity Income Shares and Daily Total Spending"
  ) +
  
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  theme_economist() + scale_colour_economist() +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    legend.text = element_text(size = 6)
  )

##Print plot

ggplotly(employment_full_plot) %>%
  layout(legend = list(orientation = "h",
                       x = .4,
                       xanchor = "center", 
                       y = -0.4),
  legend = list(title = list(text = "Income Group", font = list(size = 10))),
          annotations = list(
      text = "Source: Intuit, Economic COVID Tracker",
      xref = "paper",
      yref = "paper",
      x = 1,
      y = -.4,
      showarrow = FALSE,
      font = list(size = 8),
      align = "left"))
# Define recession period for ggplot as dataframe, shows cause 

recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)

df_plot_bus_vs_rev <- small_business_raw %>%
  # 1. Create neew date column
  mutate(monthly_date = as.Date(paste(year, month, "01", sep = "-"))) %>% 

  # 2. Select columns
  select(
    monthly_date, year, month, 
    starts_with("merchants_"), 
    starts_with("revenue_")
  ) %>%
  # 3. Group and Summarize 
  group_by(year, month) %>%
  summarise(
    # Apply mean() to columns starting with 'merchants_' AND 'revenue_'
    across(
        c(starts_with("merchants_"), starts_with("revenue_")),
        ~mean(.x, na.rm = TRUE),
        .names = "{.col}" # Keep original column names for now
    ),
    # Keep date key
    date_key = first(monthly_date),
    .groups = "drop"
  ) %>%
  
  # 4.  ggplot formatting
  pivot_longer(
    # Exclude the grouping columns
    cols = c(starts_with("merchants_"), starts_with("revenue_")), 
    names_to = "indicator_category",
    values_to = "monthly_index_value"
  ) %>%
  
  # 5. Create variables for merchants opening AND revenue change, make graphs for each after
  mutate(
    # Separate the metrics (Merchants/Revenue) from the industry (retail, health, etc.)
    Metric_Type = if_else(
        str_detect(indicator_category, "^merchants"),
        "A_Merchants Open", 
        "B_Revenue Change"
    ),
    # Clean up industry names for the chart labels
    Industry_Group = indicator_category %>% 
        str_remove("merchants_|revenue_") %>% 
        str_replace_all("_", " ") %>% 
        str_to_title()
  ) %>%
  # Final select to keep only necessary columns for the plot function
  select(date = date_key, monthly_index_value, Metric_Type, Industry_Group) %>%
  filter(!is.na(monthly_index_value))
df_revenue <- df_plot_bus_vs_rev %>%
  filter(Metric_Type == "B_Revenue Change")

df_revenue_interactive <- df_revenue %>%
  mutate(isAll = (Industry_Group == "All")) %>%   #add column that makes All highlighted(could remove)
  ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
  
    # 1. Recession Shade
  
    geom_rect(
      data = recession_df,
      inherit.aes = FALSE,
      aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
      fill = "grey10",
      alpha = 0.2
    ) +
    
    # 2. Add the baseline (0% change)
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    
    geom_line(size = 1, alpha = 0.8, linetype = "dashed") +
    geom_point(size = 1) +

    geom_line(
      data = filter(df_revenue, Industry_Group == "All"),
      aes(color = Industry_Group), 
      size = 1, 
      alpha = 0.8, 
      linetype = "solid"
    ) +
    # 4. Labels and Titles
    labs(
      title = "Small Business Revenue Change",
      subtitle = "Percentage Change in Net Revenue vs. Baseline (Jan 2020)",
      x = "Date",
      y = "Revenue % Change",
      color = "Industry",
      caption = "Source: Womply – Small Business Revenue and Data (2020–2023)"
    ) +
    
    # 5. Formatting
    scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
    scale_linetype_manual(values = c("dashed", "solid"), guide = "none")  +

      theme_economist() + scale_colour_viridis(discrete = TRUE, option = "C") +
      theme(
      axis.text.x = element_text(angle = 90, hjust = 1),
      axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
      plot.title = element_text(face = "bold", size = 16),
      legend.position = "bottom",
      legend.text = element_text(size = 10),
      legend.title = element_text(size = 10)
      )

## Interactive

ggplotly(df_revenue_interactive)  %>%
  layout(legend = list(orientation = "h",   
                       x = 0.5,            
                       xanchor = "center", 
                       y = -0.35),
         legend = list(title = list(text = "Income Group", font = list(size = 10))),
          annotations = list(
      text = "Source: Womply, Economic COVID Tracker",
      xref = "paper",
      yref = "paper",
      x = 1,
      y = -0.6,
      showarrow = FALSE,
      font = list(size = 8),
      align = "left"))  
df_business <- df_plot_bus_vs_rev %>%
  filter(Metric_Type == "A_Merchants Open")

df_business_interactive <- df_business %>%
  mutate(isAll = (Industry_Group == "All")) %>%   
  ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
  
    # 1. Shade the recession period
    geom_rect(
      data = recession_df,
      inherit.aes = FALSE,
      aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
      fill = "grey10",
      alpha = 0.2) +
    
    # 2. Add the baseline (0% change)
    geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    
    # 3. Lines show the continuous trend
    geom_line(
        data = filter(df_business, Industry_Group != "All"), 
        size = 1, 
        alpha = 0.8, 
        linetype = "dashed" # Set linetype outside of aes()
    ) +    
    geom_line(
        data = filter(df_business, Industry_Group == "All"), 
        size = 1, 
        alpha = 0.8, 
        linetype = "solid"
    ) +
    geom_point(size = 1.5) +
    
    # 4. Labels and Titles
    labs(
      title = "Businesses Opened After COVID",
      subtitle = "% Change v.s. Baseline (Jan 2020)",
      x = "Date",
      y = "% Change in Businesses Opened",
      color = "Industry",
      caption = "Source: Womply, Economic COVID Tracker"
    ) +
    
    # 5. Formatting
    scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
    scale_linetype_manual(values = c("dashed", "solid"), guide = "none")  +

    theme_economist() + scale_colour_economist() +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1),
      axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
      legend.position = "bottom",
      legend.text = element_text(size = 10),
      legend.title = element_text(size = 10)
    )
ggplotly(df_business_interactive) %>%
  layout(legend = list(orientation = "h", x = 0.5, xanchor = "center", y = -0.4),
         legend = list(title = list(text = "Income Group", font = list(size = 10))),
          annotations = list(
      text = "Source: Womply, Economic COVID Tracker",
      xref = "paper",
      yref = "paper",
      x = 1,
      y = -0.65,
      showarrow = FALSE,
      font = list(size = 8),
      align = "left"))       
recession_df <- data.frame(
  start = as.Date("2020-01-01"),
  end   = as.Date("2020-06-30"),
  ymin  = -Inf,
  ymax  = Inf
)
df_industry_jobs %>%
ggplot(aes(x = date, y = job_posting_change, color = industry)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.5
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_industry_jobs$job_posting_change, na.rm = TRUE),
    label = "Recession",
    color = "black",
    fontface = "italic",
    size = 4
  ) +
  
  
  # Lines and actual data points
  
  geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +

  geom_line(size = 1, alpha = 0.8)  +
  geom_point(size = 1) +
  
  # Labels
  labs(
    title = "Job Postings After Covid by Industry",
    subtitle = "Baseline = January 2020",
    x = "Date",
    y = "Job Posting% Change",
    color = "industry",
    caption = "Source: LightCast, Economic COVID Tracker 2020"
  ) +
  
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() + scale_color_viridis(discrete = TRUE, option = "C") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),  
    legend.position = "bottom",
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 10)
  )

df_skill_jobs %>%
ggplot(aes(x = date, y = posting, color = Skills)) +
  
  # recession shown
  geom_rect(
    data = recession_df,
    inherit.aes = FALSE,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.5
  ) +
  
  # Recession label
  annotate("text",
    x = as.Date("2020-03-15"),
    y = max(df_skill_jobs$posting, na.rm = TRUE),
    label = "Recession",
    color = "black",
    fontface = "italic",
    size = 4
  ) +
  
  # Lines and actual data points
  geom_line(size = 1) +
  geom_point(size = 1.5) +
  
      geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 1) +

  
  # Labels
  labs(
    title = "Job Postings After Covid By Skill Level",
    subtitle = "Baseline = January 2020",
    x = "Month",
    y = "Job Posting % Change",
    color = "Industry",
    caption = "Source: LightCast, Economic COVID Tracker 2020"
 ) +
  # Format x-axis
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
  
  # Theme
  theme_economist() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "bottom"
    )