Grouping and summarizing class assignment

# Installing and loading required packages

if (!require("tidyverse"))
  install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("openxlsx"))
  install.packages("openxlsx")
## Loading required package: openxlsx
if (!require("gtExtras"))
  install.packages("gtExtras")
## Loading required package: gtExtras
## Loading required package: gt
library(tidyverse)
library(openxlsx)
library(gtExtras)

# Reading data from:
# https://www.huduser.gov/portal/datasets/fmr/smallarea/index.html#year2024

FMR <- read.xlsx(
  "https://www.huduser.gov/portal/datasets/fmr/fmr2024/fy2024_safmrs_revised.xlsx",
  sheet = 1)

# Making a list of Rutherford County ZIP codes

ZIPList <- c(
  "37127",
  "37128",
  "37129",
  "37130",
  "37132",
  "37085",
  "37118",
  "37149",
  "37037",
  "37153",
  "37167",
  "37086")

# Filtering for Rutherford ZIP codes and
# selecting columns of interest

FMR_RuCo <- FMR %>%
  filter(ZIP.Code %in% ZIPList) %>%
  select(ZIP.Code, SAFMR.0BR, SAFMR.1BR, SAFMR.2BR, SAFMR.3BR, SAFMR.4BR) %>% 
  distinct()

# Renaming the columns

colnames(FMR_RuCo) <- c("ZIP", "Studio", "BR1", "BR2", "BR3", "BR4")

# Averaging estimates

FMR_RuCo <- FMR_RuCo %>%
  mutate(ZIP_Average = (Studio + BR1 + BR2 + BR3 + BR4) / 5)

# Sorting in descending order by ZIP_Average

FMR_RuCo <- FMR_RuCo %>%
  arrange(desc(ZIP_Average))

# Averaging ZIP_Average

Average_ZIP_Average <- mean(FMR_RuCo$ZIP_Average)

# Categorizing by ZIP_Average

FMR_RuCo <- FMR_RuCo %>%
  mutate(
    Rent_Category = case_when(
      ZIP_Average > Average_ZIP_Average ~ "Above average",
      ZIP_Average == Average_ZIP_Average ~ "Average",
      ZIP_Average < Average_ZIP_Average ~ "Below average",
      .default = "Error"))

# Showing the data as a table

FMR_RuCo_table <- gt(FMR_RuCo) %>% 
  tab_header("Rutherford FMR, by size and ZIP") %>%
  cols_align(align = "left") %>%
  gt_theme_538

FMR_RuCo_table
Rutherford FMR, by size and ZIP
ZIP Studio BR1 BR2 BR3 BR4 ZIP_Average Rent_Category
37037 1660 1710 1920 2410 2940 2128 Above average
37086 1580 1620 1820 2290 2790 2020 Above average
37128 1510 1550 1740 2190 2670 1932 Above average
37129 1420 1460 1640 2060 2510 1818 Above average
37153 1410 1450 1630 2040 2490 1804 Above average
37167 1290 1330 1490 1870 2280 1652 Below average
37085 1260 1290 1450 1820 2210 1606 Below average
37127 1240 1270 1430 1800 2190 1586 Below average
37130 1180 1210 1360 1710 2080 1508 Below average
37132 1180 1210 1360 1710 2080 1508 Below average
37118 1100 1130 1270 1590 1960 1410 Below average
37149 1100 1130 1270 1590 1960 1410 Below average
# Grouping and summarizing

Summary_BR2 <- FMR_RuCo %>% 
  group_by(Rent_Category) %>% 
  summarize(Count = n(),
            Minimum = min(BR2),
            Average = round(mean(BR2), 0),
            Maximum = max(BR2))

# Making the table

Summary_BR2_table <- gt(Summary_BR2) %>% 
  tab_header("Two-bedroom stats, by rent category") %>%
  cols_align(align = "left") %>%
  gt_theme_538

# Showing the table

Summary_BR2_table
Two-bedroom stats, by rent category
Rent_Category Count Minimum Average Maximum
Above average 5 1630 1750 1920
Below average 7 1270 1376 1490
Summary_BR2 <- FMR_RuCo %>% 
  group_by(Rent_Category) %>% 
  summarize(Average = mean(BR2))

# Making the table

Summary_BR2_table <- gt(Summary_BR2) %>% 
  tab_header("Two-bedroom stats, by rent category") %>%
  cols_align(align = "left") %>%
  gt_theme_538

# Grouping and summarizing

Summary_BR2 <- FMR_RuCo %>% 
  group_by(Rent_Category) %>% 
  summarize(Count = n(),
            Minimum = min(BR2),
            Average = round(mean(BR2), 0),
            Maximum = max(BR2))

# Making the table

Summary_BR2_table <- gt(Summary_BR2) %>% 
  tab_header("Two-bedroom stats, by rent category") %>%
  cols_align(align = "left") %>%
  gt_theme_538

# Showing the table

Summary_BR2_table
Two-bedroom stats, by rent category
Rent_Category Count Minimum Average Maximum
Above average 5 1630 1750 1920
Below average 7 1270 1376 1490