library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
data("campaigns")
data("demographics")

# Graph 1: Heat Map of Household Size vs Income Level

campaigns_size_income <- campaigns %>%
  inner_join(demographics, by = "household_id") %>%
  filter(!is.na(income) & !is.na(household_size)) %>%
  group_by(household_size, income) %>%
  summarise(households_participating = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
ggplot(campaigns_size_income, aes(x = factor(household_size), y = income, fill = households_participating)) +
  geom_tile() + 
  scale_fill_gradient(low = "white", high = "blue") +
  labs(title = "Heat Map of Campaign Participation by Household Size and Income Level", 
       subtitle = "Color intensity represents the number of households participating",
       x = "Household Size", 
       y = "Income Level",
       fill = "Households Participating") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graph 2: Campaign Participation by Marital Status

marital_status <- campaigns %>%
  inner_join(demographics, by = "household_id") %>%
  filter(!is.na(marital_status)) %>%
  group_by(campaign_id, marital_status) %>%
  summarise(household_id = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'campaign_id'. You can override using the
## `.groups` argument.
ggplot(marital_status, aes(x = factor(campaign_id), y = household_id, fill = marital_status)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_brewer(palette = "Set2") + 
  labs(title = "Campaign Participation by Marital Status", 
       subtitle = "Number of households participating in each campaign, grouped by marital status",
       x = "Campaign ID", 
       y = "Number of Households", 
       fill = "Marital Status") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graph 3: Top 5 Campaigns by Household Participation and Household Size

campaigns_household_size <- campaigns %>%
  inner_join(demographics, by = "household_id") %>%
  filter(!is.na(household_size)) %>%
  group_by(household_size, campaign_id) %>%
  summarise(households_participating = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
top_campaigns_by_size <- campaigns_household_size %>%
  group_by(household_size) %>%
  top_n(5, households_participating) %>%
  ungroup()

ggplot(top_campaigns_by_size, aes(x = campaign_id, y = households_participating, 
                                  color = factor(household_size), size = households_participating)) +
  geom_point(alpha = 0.7) +  
  scale_size_continuous(range = c(3, 10)) +  
  labs(title = "Top 5 Campaigns by Household Participation and Household Size", 
       subtitle = "Point size represents the number of households participating",
       x = "Campaign ID", 
       y = "Number of Households Participating",
       color = "Household Size",
       size = "Household Participation") +
  theme_minimal() +
  theme(legend.position = "right")