library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
data("campaigns")
data("demographics")
# Graph 1: Heat Map of Household Size vs Income Level
campaigns_size_income <- campaigns %>%
inner_join(demographics, by = "household_id") %>%
filter(!is.na(income) & !is.na(household_size)) %>%
group_by(household_size, income) %>%
summarise(households_participating = n()) %>%
ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
ggplot(campaigns_size_income, aes(x = factor(household_size), y = income, fill = households_participating)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "blue") +
labs(title = "Heat Map of Campaign Participation by Household Size and Income Level",
subtitle = "Color intensity represents the number of households participating",
x = "Household Size",
y = "Income Level",
fill = "Households Participating") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graph 2: Campaign Participation by Marital Status
marital_status <- campaigns %>%
inner_join(demographics, by = "household_id") %>%
filter(!is.na(marital_status)) %>%
group_by(campaign_id, marital_status) %>%
summarise(household_id = n()) %>%
ungroup()
## `summarise()` has grouped output by 'campaign_id'. You can override using the
## `.groups` argument.
ggplot(marital_status, aes(x = factor(campaign_id), y = household_id, fill = marital_status)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_brewer(palette = "Set2") +
labs(title = "Campaign Participation by Marital Status",
subtitle = "Number of households participating in each campaign, grouped by marital status",
x = "Campaign ID",
y = "Number of Households",
fill = "Marital Status") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Graph 3: Top 5 Campaigns by Household Participation and Household Size
campaigns_household_size <- campaigns %>%
inner_join(demographics, by = "household_id") %>%
filter(!is.na(household_size)) %>%
group_by(household_size, campaign_id) %>%
summarise(households_participating = n()) %>%
ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
top_campaigns_by_size <- campaigns_household_size %>%
group_by(household_size) %>%
top_n(5, households_participating) %>%
ungroup()
ggplot(top_campaigns_by_size, aes(x = campaign_id, y = households_participating,
color = factor(household_size), size = households_participating)) +
geom_point(alpha = 0.7) +
scale_size_continuous(range = c(3, 10)) +
labs(title = "Top 5 Campaigns by Household Participation and Household Size",
subtitle = "Point size represents the number of households participating",
x = "Campaign ID",
y = "Number of Households Participating",
color = "Household Size",
size = "Household Participation") +
theme_minimal() +
theme(legend.position = "right")
