# 1. folder path (Confirmed working: "~/Desktop/projects/kshape")
data_path <- "~/Desktop/projects/kshape data"
# 2. ## Affinity Daily Total Spending - National.csv
nat_spending_total <- list.files(
data_path,
pattern = "Affinity Daily Total Spending - National\\.csv",
full.names = TRUE
)
print(paste("Using data file:", basename(nat_spending_total)))
## [1] "Using data file: Affinity Daily Total Spending - National.csv"
# 3. Load target file
## This file uses different column names than the weekly version.
spending_data_raw <- read_csv(
nat_spending_total,
na = c("NA", "*", "", "."),
show_col_types = FALSE
)
df_k_plot <- spending_data_raw %>%
# Select only the relevant columns FOR DAILY SPEND NOT ALL
select(year, month, Low_Income = daily_spend_19_q1, Low_Middle_Income = daily_spend_19_q2, High_Middle_Income = daily_spend_19_q3, High_Income = daily_spend_19_q4) %>%
# Group by year and month to get monthly average spending
group_by(year, month) %>%
summarise(
Low_Income = mean(Low_Income, na.rm = TRUE),
Low_Middle_Income = mean(Low_Middle_Income, na.rm = TRUE),
High_Middle_Income = mean(High_Middle_Income, na.rm = TRUE),
High_Income = mean(High_Income, na.rm = TRUE)
) %>%
ungroup() %>%
# Convert to long format for ggplot
pivot_longer(
cols = c(Low_Income, Low_Middle_Income, High_Middle_Income, High_Income),
names_to = "income_group",
values_to = "monthly_spending"
) %>%
#proper date (use first day of month for plotting)
mutate(date = as.Date(paste(year, month, "01", sep = "-"))) %>%
filter(!is.na(monthly_spending))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Define recession period for ggplot as dataframe, shows cause/start
recession_df <- data.frame(
start = as.Date("2020-01-01"),
end = as.Date("2020-06-30"),
ymin = -Inf,
ymax = Inf
)
k_plot <- df_k_plot %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
# recession shown
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey",
alpha = 0.3
) +
# Recession label
annotate("text",
x = as.Date("2020-03-15"),
y = max(df_k_plot$monthly_spending, na.rm = TRUE),
label = " Recession",
color = "black",
fontface = "bold",
size = 4
) +
# Lines and actual data points
geom_line(size = 1, alpha = 0.8) +
geom_point(size = 2) +
# Labels
labs(
title = "K-Shaped Economic Recovery",
x = "Date",
y = "Daily Spending Index",
color = "Income Group",
caption = "Source: Affinity, Economic COVID Tracker"
) +
# Format x-axis
scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
# Theme
theme_economist() + scale_colour_economist() +
theme(
axis.text.x = element_text(angle = 90, size = 10),
plot.title = element_text(face = "bold", size = 16),
legend.position = "bottom",
legend.key.size = unit(0.7,"line"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(k_plot)

save(k_plot, file = "k_plot.RData")
# 2.
## Affinity Total Spending MONTHLY - National.csv
all_nat_spending <- list.files(
data_path,
pattern = "Affinity - National - Monthly",
full.names = TRUE
)
# 3. Load file
## uses different column names than the weekly version.
all_nat_spending_raw <- read_csv(
all_nat_spending,
na = c("NA", "*", "", "."),
show_col_types = FALSE
)
plot_spend_all <- all_nat_spending_raw %>%
# Relevant Columns
select(year, month, Low_Income = spend_all_q1, Low_Middle_Income = spend_all_q2, High_Middle_Income = spend_all_q3, High_Income = spend_all_q4) %>%
# Group by year and month to get monthly average spending
group_by(year, month) %>%
summarise(
Low_Income = mean(Low_Income, na.rm = TRUE),
Low_Middle_Income = mean(Low_Middle_Income, na.rm = TRUE),
High_Middle_Income = mean(High_Middle_Income, na.rm = TRUE),
High_Income = mean(High_Income, na.rm = TRUE)
) %>%
ungroup() %>%
# Convert to long format for ggplot
pivot_longer(
cols = c(Low_Income, Low_Middle_Income, High_Middle_Income, High_Income),
names_to = "income_group",
values_to = "monthly_spending"
) %>%
#proper date (use first day of month for plotting)
mutate(date = as.Date(paste(year, month, "01", sep = "-"))) %>%
filter(!is.na(monthly_spending))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
interactive_spend_all <- plot_spend_all %>%
ggplot(aes(x = date, y = monthly_spending, color = income_group)) +
# recession shown
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey",
alpha = 0.3
) +
# Lines and actual data points
geom_line(size = 1.2, alpha = 0.8) +
geom_point(size = 1.5) +
# Labels
labs(
title = "K-Shaped Economy: All Spending",
x = "Date",
y = "All Spending Index",
color = "Income Group",
caption = "Source: Affinity, Economic COVID Tracker"
) +
# x-axis formatting
scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
coord_cartesian(
ylim = c(min(plot_spend_all$monthly_spending, na.rm = TRUE), max(plot_spend_all$monthly_spending, na.rm = TRUE))
) +
# Theme
theme_economist() + scale_colour_economist() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(face = "bold", size = 16),
legend.position = "bottom",
legend.text = element_text()
)
ggplotly_spend_all <- interactive_spend_all +
geom_hline(yintercept = 0, color = "black", size = 0.3)
plotly_spend_final <- ggplotly_spend_all %>%
ggplotly() %>%
layout(
legend = list(
orientation = "h",
x = 0.5,
xanchor = "center",
y = -.35
),
annotations = list(
list(
text = "Source: Intuit, Economic COVID Tracker",
xref = "paper",
yref = "paper",
x = 1,
y = -.6,
xanchor = "right",
yanchor = "bottom",
showarrow = FALSE,
font = list(size = 8),
align = "right"
)))
plotly_spend_final
## Employment download
employment_data_file <- list.files(
data_path,
pattern = "employment_nat_weekly.csv",
full.names = TRUE
)
# Load data
employment_data <- read_csv(
employment_data_file,
na = c("NA", "*", "", "."),
show_col_types = FALSE
)
# Select the date and ALL FOUR quartile columns
date_emp <- employment_data %>%
# 1.
select(
year,
month,
Q1_Low_Emp_Wage = emp_incq1,
Q2_Low_Emp_Middle_Wage = emp_incq2,
Q3_High_Emp_Middle_Wage = emp_incq3,
Q4_High_Emp_Wage = emp_incq4
) %>%
# 2. convert format
pivot_longer(
cols = starts_with("Q"),
names_to = "wage_group",
values_to = "employment_index"
) %>%
# 3. Group by Year and Month to calculate monthly averages
group_by(year, month, wage_group) %>%
summarise(
avg_employment_index = mean(employment_index, na.rm = TRUE),
.groups = 'drop'
) %>%
# 4. Create the date column
mutate(
date_monthly = ymd(paste(year, month, 1, sep = '-')),
avg_employment_index = avg_employment_index * 1000 # Scale
) %>%
filter(!is.na(avg_employment_index))
# plot
employment_full_plot <- ggplot(
date_emp,
aes(x = date_monthly, y = avg_employment_index, color = wage_group)
) +
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey",
alpha = 0.3
) +
geom_line(linewidth = 1, alpha = 0.8) +
geom_point(size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
labs(
title = "Monthly Employment Recovery By Wage Quartile",
x = "Date",
y = "Employment Level Loss",
color = "Wage Group",
caption = "Source: Affinity Income Shares and Daily Total Spending"
) +
scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
theme_economist() + scale_colour_economist() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(face = "bold", size = 16),
legend.text = element_text(size = 6)
)
##Print plot
ggplotly(employment_full_plot) %>%
layout(legend = list(orientation = "h",
x = .4,
xanchor = "center",
y = -0.4),
legend = list(title = list(text = "Income Group", font = list(size = 10))),
annotations = list(
text = "Source: Intuit, Economic COVID Tracker",
xref = "paper",
yref = "paper",
x = 1,
y = -.4,
showarrow = FALSE,
font = list(size = 8),
align = "left"))
# Define recession period for ggplot as dataframe, shows cause
recession_df <- data.frame(
start = as.Date("2020-01-01"),
end = as.Date("2020-06-30"),
ymin = -Inf,
ymax = Inf
)
df_plot_bus_vs_rev <- small_business_raw %>%
# 1. Create neew date column
mutate(monthly_date = as.Date(paste(year, month, "01", sep = "-"))) %>%
# 2. Select columns
select(
monthly_date, year, month,
starts_with("merchants_"),
starts_with("revenue_")
) %>%
# 3. Group and Summarize
group_by(year, month) %>%
summarise(
# Apply mean() to columns starting with 'merchants_' AND 'revenue_'
across(
c(starts_with("merchants_"), starts_with("revenue_")),
~mean(.x, na.rm = TRUE),
.names = "{.col}" # Keep original column names for now
),
# Keep date key
date_key = first(monthly_date),
.groups = "drop"
) %>%
# 4. ggplot formatting
pivot_longer(
# Exclude the grouping columns
cols = c(starts_with("merchants_"), starts_with("revenue_")),
names_to = "indicator_category",
values_to = "monthly_index_value"
) %>%
# 5. Create variables for merchants opening AND revenue change, make graphs for each after
mutate(
# Separate the metrics (Merchants/Revenue) from the industry (retail, health, etc.)
Metric_Type = if_else(
str_detect(indicator_category, "^merchants"),
"A_Merchants Open",
"B_Revenue Change"
),
# Clean up industry names for the chart labels
Industry_Group = indicator_category %>%
str_remove("merchants_|revenue_") %>%
str_replace_all("_", " ") %>%
str_to_title()
) %>%
# Final select to keep only necessary columns for the plot function
select(date = date_key, monthly_index_value, Metric_Type, Industry_Group) %>%
filter(!is.na(monthly_index_value))
df_revenue <- df_plot_bus_vs_rev %>%
filter(Metric_Type == "B_Revenue Change")
df_revenue_interactive <- df_revenue %>%
mutate(isAll = (Industry_Group == "All")) %>% #add column that makes All highlighted(could remove)
ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
# 1. Recession Shade
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey10",
alpha = 0.2
) +
# 2. Add the baseline (0% change)
geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
geom_line(size = 1, alpha = 0.8, linetype = "dashed") +
geom_point(size = 1) +
geom_line(
data = filter(df_revenue, Industry_Group == "All"),
aes(color = Industry_Group),
size = 1,
alpha = 0.8,
linetype = "solid"
) +
# 4. Labels and Titles
labs(
title = "Small Business Revenue Change",
subtitle = "Percentage Change in Net Revenue vs. Baseline (Jan 2020)",
x = "Date",
y = "Revenue % Change",
color = "Industry",
caption = "Source: Womply – Small Business Revenue and Data (2020–2023)"
) +
# 5. Formatting
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
theme_economist() + scale_colour_viridis(discrete = TRUE, option = "C") +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
plot.title = element_text(face = "bold", size = 16),
legend.position = "bottom",
legend.text = element_text(size = 10),
legend.title = element_text(size = 10)
)
## Interactive
ggplotly(df_revenue_interactive) %>%
layout(legend = list(orientation = "h",
x = 0.5,
xanchor = "center",
y = -0.35),
legend = list(title = list(text = "Income Group", font = list(size = 10))),
annotations = list(
text = "Source: Womply, Economic COVID Tracker",
xref = "paper",
yref = "paper",
x = 1,
y = -0.6,
showarrow = FALSE,
font = list(size = 8),
align = "left"))
df_business <- df_plot_bus_vs_rev %>%
filter(Metric_Type == "A_Merchants Open")
df_business_interactive <- df_business %>%
mutate(isAll = (Industry_Group == "All")) %>%
ggplot(aes(x = date, y = monthly_index_value * 100, color = Industry_Group)) +
# 1. Shade the recession period
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey10",
alpha = 0.2) +
# 2. Add the baseline (0% change)
geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
# 3. Lines show the continuous trend
geom_line(
data = filter(df_business, Industry_Group != "All"),
size = 1,
alpha = 0.8,
linetype = "dashed" # Set linetype outside of aes()
) +
geom_line(
data = filter(df_business, Industry_Group == "All"),
size = 1,
alpha = 0.8,
linetype = "solid"
) +
geom_point(size = 1.5) +
# 4. Labels and Titles
labs(
title = "Businesses Opened After COVID",
subtitle = "% Change v.s. Baseline (Jan 2020)",
x = "Date",
y = "% Change in Businesses Opened",
color = "Industry",
caption = "Source: Womply, Economic COVID Tracker"
) +
# 5. Formatting
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
theme_economist() + scale_colour_economist() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.y = element_text(margin = margin(r = 15, unit = "pt")),
legend.position = "bottom",
legend.text = element_text(size = 10),
legend.title = element_text(size = 10)
)
ggplotly(df_business_interactive) %>%
layout(legend = list(orientation = "h", x = 0.5, xanchor = "center", y = -0.4),
legend = list(title = list(text = "Income Group", font = list(size = 10))),
annotations = list(
text = "Source: Womply, Economic COVID Tracker",
xref = "paper",
yref = "paper",
x = 1,
y = -0.65,
showarrow = FALSE,
font = list(size = 8),
align = "left"))
recession_df <- data.frame(
start = as.Date("2020-01-01"),
end = as.Date("2020-06-30"),
ymin = -Inf,
ymax = Inf
)
df_industry_jobs %>%
ggplot(aes(x = date, y = job_posting_change, color = industry)) +
# recession shown
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey",
alpha = 0.5
) +
# Recession label
annotate("text",
x = as.Date("2020-03-15"),
y = max(df_industry_jobs$job_posting_change, na.rm = TRUE),
label = "Recession",
color = "black",
fontface = "italic",
size = 4
) +
# Lines and actual data points
geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
geom_line(size = 1, alpha = 0.8) +
geom_point(size = 1) +
# Labels
labs(
title = "Job Postings After Covid by Industry",
subtitle = "Baseline = January 2020",
x = "Date",
y = "Job Posting% Change",
color = "industry",
caption = "Source: LightCast, Economic COVID Tracker 2020"
) +
# Format x-axis
scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
# Theme
theme_economist() + scale_color_viridis(discrete = TRUE, option = "C") +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
legend.position = "bottom",
legend.text = element_text(size = 10),
legend.title = element_text(size = 10)
)

df_skill_jobs %>%
ggplot(aes(x = date, y = posting, color = Skills)) +
# recession shown
geom_rect(
data = recession_df,
inherit.aes = FALSE,
aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax),
fill = "grey",
alpha = 0.5
) +
# Recession label
annotate("text",
x = as.Date("2020-03-15"),
y = max(df_skill_jobs$posting, na.rm = TRUE),
label = "Recession",
color = "black",
fontface = "italic",
size = 4
) +
# Lines and actual data points
geom_line(size = 1) +
geom_point(size = 1.5) +
geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 1) +
# Labels
labs(
title = "Job Postings After Covid By Skill Level",
subtitle = "Baseline = January 2020",
x = "Month",
y = "Job Posting % Change",
color = "Industry",
caption = "Source: LightCast, Economic COVID Tracker 2020"
) +
# Format x-axis
scale_x_date(date_labels = "%b %Y", date_breaks = "3 month") +
# Theme
theme_economist() +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
legend.position = "bottom"
)
