Page 1

Data - 2019 American Community Survey

This is project uses data from the 2019 American Community Survey.The ACS covers a wide range of topics, but this project focuses specfically on housing in the United States and only uses a subset of variables. Please see the list below:

Since the ACS can have multiple respondents from a single household, this analysis will only look at person 1 of a household to avoid duplicate data since values across respondents in the same household for the above variables would be the same regardless of which number person they are.

While this data was collected by the Census Bureau, I will be using an extract created by the Center for Economic and Policy Research for this project.Specifically, I’ll be using the 2019 extract. Here is the link to the CEPR extract: https://ceprdata.org/acs-uniform-data-extracts/acs-data/ Citation: Center for Economic and Policy Research. 2021. ACS Uniform Extracts, Version 1.6. Washington, DC.

#Loading the Data
ACS_2019 <- read_dta("C:/Users/MelissaPatton/Box/MPatton Work Personal/Coursera/Data Visualization & Dashboarding in R/Course 5/Capstone_Project/cepr_acs_2019.dta")

#Selecting Relevant Variables 
ACS_2019 <- ACS_2019 %>% select(serialno, region,rntp_adj, bld, mrgp_adj, value08_adj, taxp_amt, age, mv, hmown, insp_adj, state)

#Only Keep the first occurrence of the Household ID
ACS2019_unique <- ACS_2019[!duplicated(ACS_2019$serialno), ]

#Correct variable class errors from importing Stata data to R
ACS2019_unique$rntp_adj <- as.numeric(ACS2019_unique$rntp_adj)
ACS2019_unique$state <- as.numeric(ACS2019_unique$state)
ACS2019_unique$region <- as.character(ACS2019_unique$region)

#Creating Census Region Names
ACS2019_unique <- ACS2019_unique %>% 
  mutate(Region = case_when(region == 1 ~ 'Northeast',
                            region == 2 ~ 'Midwest',
                            region == 3 ~ 'South',
                            region == 4 ~ 'West'))

Figure 1. Histogram of monthly rent payments by region.

Each panel in this figure shows the number of households within $500 rent intervals, ranging from $0–$500 up to $4000–$4500 per month. The distributions vary across Census regions, illustrating regional differences in rent levels.

#Selecting Relevant Variables
fig_dat1<-ACS2019_unique %>% select(serialno, Region, rntp_adj)
#Removing Households with N/A for Rent Amount (Non-Renter Households)
fig_dat1 <- na.omit(fig_dat1)


Fig_1<- ggplot(fig_dat1,(aes(x=rntp_adj, color = Region, fill=Region)))+
  geom_histogram(breaks = seq(0, 4500, by = 500), color="black")+
  facet_wrap(~Region) +
  theme_minimal() +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Pastel1") +
  labs(x="Rent per Month ($)", y= "Number of Households", title= "Distribution of Monthly Rent Payments by Census Region")
Fig_1

Figure 2. Histogram of monthly mortgage payments by region.

Each panel in this figure shows the number of households within $500 mortgage payment intervals, ranging from $0–$500 up to $7500–$8000 per month. The distributions vary across Census regions, illustrating regional differences in rent levels.

#Selecting Relevant Variables
fig_dat2<-ACS2019_unique %>% select(serialno, Region, mrgp_adj)
#Removing Households with N/A for Mortgage Amount (Renter Households)
fig_dat2 <- na.omit(fig_dat2)


Fig_2<- ggplot(fig_dat2,(aes(x=mrgp_adj, color = Region, fill=Region)))+
  geom_histogram(breaks = seq(0, 8000, by = 500), color="black")+
  facet_wrap(~Region) +
  theme_minimal() +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Pastel1") +
  labs(x="Mortgage Payment per Month ($)", y= "Number of Households", title= "Distribution of Monthly Mortgage Payments by Census Region")
Fig_2

Figure 3.Scatterplot showing the relationship between age and average monthly mortgage payment across Census regions.

Each point on the scatterplot represents the average mortgage payment for a specific age group within a region. The figure illustrates regional variation in mortgage costs and highlights patterns in how mortgage payments vary by age.

#Selecting Relevant Variables
fig_dat3<-ACS2019_unique %>% select(serialno, Region, age, mrgp_adj, state)
#Grouping by Region and Age, and creating average mortgage amount 
fig_dat3 <- fig_dat3 %>% 
  group_by(Region, age) %>%
  summarise(avg_mortgage = mean(mrgp_adj, na.rm = TRUE), .groups = 'drop')
#Removing any entries where monthly mortgage amount was missing (removers renters)
fig_dat3 <- na.omit(fig_dat3)

Fig_3_a<- ggplot(fig_dat3,(aes(x=age, y=avg_mortgage,color=Region)))+
  geom_point()+
  theme_minimal() +
  scale_color_brewer(palette = "Pastel1") +
  labs(x="Age", y = "Average Mortgage Payment per Month ($)", title= "Average Monthly Mortgage Payments by Age and Region")


Fig_3 <- ggplotly(Fig_3_a)
Fig_3

Figure 4.Scatterplot showing the relationship between age and average monthly rent payment across Census regions.

Each point on the scatterplot represents the average rent payment for a specific age group within a region. The figure illustrates regional variation in rent costs and highlights patterns in how rent payments vary by age.

#Selecting Relevant Variables
fig_dat4<-ACS2019_unique %>% select(serialno, Region, age, rntp_adj)
#Grouping by Region and Age, and creating average mortgage amount 
fig_dat4 <- fig_dat4 %>% 
  group_by(Region, age) %>%
  summarise(avg_rent = mean(rntp_adj, na.rm = TRUE), .groups = 'drop')
#Removing any entries where monthly mortgage amount was missing (removers renters)
fig_dat4 <- na.omit(fig_dat4)

Fig_4_a<- ggplot(fig_dat4,(aes(x=age, y=avg_rent,color=Region)))+
  geom_point()+
  theme_minimal() +
  scale_color_brewer(palette = "Pastel1") +
  labs(x="Age", y = "Average Rent per Month ($)", title= "Average Monthly Rent by Age and Region")


Fig_4 <- ggplotly(Fig_4_a)
Fig_4

Figure 5. Bar chart showing the number of households by housing type and Census region.

This figure shows the number of households in the U.S. residing in different housing types include apartment buildings, single-family houses, and other structures. The stacked bars also highlight regional differences in the prevalence of each housing type.

#Selecting Relevant Variables
fig_dat5<-ACS2019_unique %>% select(serialno, Region,bld)
#Creating/Consolidating Housing Building Types
fig_dat5 <- fig_dat5 %>% 
  mutate(type = case_when(bld == 1 | bld == 10 ~ 'Other',
                          bld %in% 2:3 ~ 'Single Family House',
                          bld %in% 4:9 ~ 'Apartment Building'))
#Removing any entries where housing type was missing
fig_dat5 <- na.omit(fig_dat5)

Fig_5<- ggplot(fig_dat5, aes(x = factor(type, levels = c("Apartment Building", "Single Family House", "Other")), fill = Region)) +
  geom_bar()+
  theme_minimal() +
  scale_fill_brewer(palette = "Pastel1") +
  labs(x="Type", y= "Number of Households", title= "Regional Differences in Household Housing Types")
Fig_5

#Figure 6. Bar chart showing household tenure by homeowner status

Distribution of households by length of residence at their current home, separated by homeownership status. Homeowners tend to have longer tenures, while non-owners are more likely to have shorter stays, reflecting differences in residential stability.

#Selecting Relevant Variables
fig_dat6<-ACS2019_unique %>% select(serialno, Region, mv, hmown, age)
fig_dat6 <- fig_dat6 %>% 
  mutate(time = factor(mv,
                       levels = 1:7,
    labels = c("Less than 1 year", "1 to less than 2 years", "2 to less than 5 years", "5 to les than 10 years",
               "10 to less than 20 years", "20 to less than 30 years", "30 years or more"),
    ordered = TRUE
  )) %>% 
  mutate(owner = case_when(hmown == 1 ~ 'Residence Owned',
                           hmown == 0 ~ 'Residence Not Owned'))
fig_dat6 <- na.omit(fig_dat6)

Fig_6<- ggplot(fig_dat6,(aes(x=time, color=time, fill=time)))+
  geom_bar()+
  facet_wrap(~owner) +
  theme_minimal() +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Pastel1") +
  scale_y_continuous(labels = label_comma(), limits = c(0, 275000), breaks = seq(0, 275000, by = 50000)) +
  labs(x = "Length of Time at Current Residence",
    y = "Number of Households",
    title = "Household Tenure by Homeownership Status",)
Fig_6

#Figure 7. Choropleth showing the the average monthly cost of fire, hazard, and flood insurance in each US state

This map shows the average monthly cost of fire, hazard, and flood insurance by state. States with darker green shading have higher average insurance costs, while lighter shades indicate lower costs. Areas with no data are shown in gray. Florida had the highest average fire, hazard, and flood insurance cost while west virginia had the lowest. The values are shown in dollars.

states <- map_data("state")

fig_dat7 <- ACS2019_unique %>%
  select(serialno, state, insp_adj) %>%
  mutate(region = case_when(
    state == 1  ~ "alabama",
    state == 2  ~ "alaska",
    state == 4  ~ "arizona",
    state == 5  ~ "arkansas",
    state == 6  ~ "california",
    state == 8  ~ "colorado",
    state == 9  ~ "connecticut",
    state == 10 ~ "delaware",
    state == 11 ~ "district of columbia",
    state == 12 ~ "florida",
    state == 13 ~ "georgia",
    state == 15 ~ "hawaii",
    state == 16 ~ "idaho",
    state == 17 ~ "illinois",
    state == 18 ~ "indiana",
    state == 19 ~ "iowa",
    state == 20 ~ "kansas",
    state == 21 ~ "kentucky",
    state == 22 ~ "louisiana",
    state == 23 ~ "maine",
    state == 24 ~ "maryland",
    state == 25 ~ "massachusetts",
    state == 26 ~ "michigan",
    state == 27 ~ "minnesota",
    state == 28 ~ "mississippi",
    state == 29 ~ "missouri",
    state == 30 ~ "montana",
    state == 31 ~ "nebraska",
    state == 32 ~ "nevada",
    state == 33 ~ "new hampshire",
    state == 34 ~ "new jersey",
    state == 35 ~ "new mexico",
    state == 36 ~ "new york",
    state == 37 ~ "north carolina",
    state == 38 ~ "north dakota",
    state == 39 ~ "ohio",
    state == 40 ~ "oklahoma",
    state == 41 ~ "oregon",
    state == 42 ~ "pennsylvania",
    state == 44 ~ "rhode island",
    state == 45 ~ "south carolina",
    state == 46 ~ "south dakota",
    state == 47 ~ "tennessee",
    state == 48 ~ "texas",
    state == 49 ~ "utah",
    state == 50 ~ "vermont",
    state == 51 ~ "virginia",
    state == 53 ~ "washington",
    state == 54 ~ "west virginia",
    state == 55 ~ "wisconsin",
    state == 56 ~ "wyoming",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(region))  # Drop unmapped states if any

#Grouping by Region and Age, and creating average mortgage amount 
fig_dat7 <- fig_dat7 %>% 
  group_by(region) %>%
  summarise(avg_insurance =mean(insp_adj, na.rm=TRUE))

# Merge map data with your data
fig_dat7 <- left_join(states, fig_dat7, by = "region")

Fig_7_a <- ggplot(data = fig_dat7, aes(x = long, y = lat, group = group, fill = avg_insurance)) +
  geom_polygon(color = "white") +
  scale_fill_gradient(low = "#8FBC8F", high = "#023020", na.value = "gray90") +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  ) +
  labs(title = "Average Monthly Cost of Fire, Hazard, and Flood insurance by State", fill = "Monthly Insurance")

Fig_7 <- ggplotly(Fig_7_a)
Fig_7

#Figure 8. Choropleth showing average property tax by state.

This map displays the average property values by state, with darker blue shades representing higher average values and lighter blue shades indicating lower values. States with missing data are shown in gray. The values are shown in dollars. The District of Columbia had the highest average property tax rate while West Virginia had the lowest.

fig_dat8 <- ACS2019_unique %>%
  select(serialno, state, value08_adj) %>%
  mutate(region = case_when(
    state == 1  ~ "alabama",
    state == 2  ~ "alaska",
    state == 4  ~ "arizona",
    state == 5  ~ "arkansas",
    state == 6  ~ "california",
    state == 8  ~ "colorado",
    state == 9  ~ "connecticut",
    state == 10 ~ "delaware",
    state == 11 ~ "district of columbia",
    state == 12 ~ "florida",
    state == 13 ~ "georgia",
    state == 15 ~ "hawaii",
    state == 16 ~ "idaho",
    state == 17 ~ "illinois",
    state == 18 ~ "indiana",
    state == 19 ~ "iowa",
    state == 20 ~ "kansas",
    state == 21 ~ "kentucky",
    state == 22 ~ "louisiana",
    state == 23 ~ "maine",
    state == 24 ~ "maryland",
    state == 25 ~ "massachusetts",
    state == 26 ~ "michigan",
    state == 27 ~ "minnesota",
    state == 28 ~ "mississippi",
    state == 29 ~ "missouri",
    state == 30 ~ "montana",
    state == 31 ~ "nebraska",
    state == 32 ~ "nevada",
    state == 33 ~ "new hampshire",
    state == 34 ~ "new jersey",
    state == 35 ~ "new mexico",
    state == 36 ~ "new york",
    state == 37 ~ "north carolina",
    state == 38 ~ "north dakota",
    state == 39 ~ "ohio",
    state == 40 ~ "oklahoma",
    state == 41 ~ "oregon",
    state == 42 ~ "pennsylvania",
    state == 44 ~ "rhode island",
    state == 45 ~ "south carolina",
    state == 46 ~ "south dakota",
    state == 47 ~ "tennessee",
    state == 48 ~ "texas",
    state == 49 ~ "utah",
    state == 50 ~ "vermont",
    state == 51 ~ "virginia",
    state == 53 ~ "washington",
    state == 54 ~ "west virginia",
    state == 55 ~ "wisconsin",
    state == 56 ~ "wyoming",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(region))  # Drop unmapped states if any

#Grouping by Region and Age, and creating average mortgage amount 
fig_dat8 <- fig_dat8 %>% 
  group_by(region) %>%
  summarise(avg_value =mean(value08_adj, na.rm=TRUE))

# Merge map data with your data
fig_dat8 <- left_join(states, fig_dat8, by = "region")

Fig_8_a <- ggplot(data = fig_dat8, aes(x = long, y = lat, group = group, fill = avg_value)) +
  geom_polygon(color = "white") +
  scale_fill_gradient(low = "#ADD8E6", high = "#00008B", na.value = "gray90", labels = scales::label_comma()) +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  ) +
  labs(title = "Average Property Value by State", fill = "Property Values")

Fig_8 <- ggplotly(Fig_8_a)
Fig_8