Final Report

# Loading required libraries 
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(gt)
library(readr)

# Set up knitr options
knitr::opts_chunk$set(echo = TRUE)

housing_data <- read.csv("/Users/pearcy/CHS2021ECL_PUMF.csv")

str(housing_data)

## 'data.frame':    40988 obs. of  109 variables:
##  $ PUMFID  : int  63501 63502 63503 63504 63505 63506 63507 63508 63509 63510 ...
##  $ EHA_10  : int  3 2 2 5 3 3 1 3 4 4 ...
##  $ EHA_10A : int  6 6 6 6 6 6 1 6 6 6 ...
##  $ EHA_10B : int  6 2 2 6 6 6 6 6 6 6 ...
##  $ EHA_25  : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ DWS_05A : int  3 3 2 3 1 3 3 3 3 3 ...
##  $ DWI_05A : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05B : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05C : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05D : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ NES_05A : int  3 2 2 3 3 3 1 3 3 3 ...
##  $ NSC_30A : int  1 1 2 3 1 2 1 2 2 1 ...
##  $ NSC_30B : int  1 1 2 2 1 2 1 1 1 1 ...
##  $ NSC_30C : int  1 3 3 3 3 3 1 3 2 2 ...
##  $ NEI_05A : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ NEI_05B : int  4 3 3 4 4 4 2 4 4 4 ...
##  $ NEI_05C : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05D : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05E : int  4 2 3 4 4 4 4 4 4 4 ...
##  $ NEI_05F : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05G : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05H : int  4 2 2 3 4 4 4 4 4 4 ...
##  $ NEI_05I : int  4 3 4 4 4 4 4 4 4 4 ...
##  $ WSA_05  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ SDH_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_20  : int  3 1 3 3 3 3 3 3 3 3 ...
##  $ LIS_10  : int  2 1 2 1 2 2 2 3 1 3 ...
##  $ COS_10  : int  3 3 2 1 2 3 3 9 3 3 ...
##  $ COS_15  : int  3 2 3 3 4 3 2 2 1 1 ...
##  $ GH_05   : int  4 4 4 1 1 4 5 3 2 2 ...
##  $ GH_10   : int  3 4 4 3 1 2 3 3 2 2 ...
##  $ REGION  : int  1 5 4 4 3 2 2 3 4 1 ...
##  $ PAGEGR1 : int  2 9 2 1 2 2 9 9 1 2 ...
##  $ PAGEGR2 : int  1 9 2 2 2 1 9 9 2 2 ...
##  $ PAGEGR3 : int  1 9 2 1 1 2 9 9 1 2 ...
##  $ PAGEGR4 : int  2 9 1 2 2 2 9 9 2 1 ...
##  $ PAGEP1  : int  3 3 4 2 2 1 2 1 2 4 ...
##  $ PCER_10 : int  96 2 96 96 96 96 96 96 96 96 ...
##  $ PCER_15 : int  6 3 6 6 6 6 6 6 6 6 ...
##  $ PCHN    : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PCOS_05 : int  9 5 1 6 4 99 5 99 9 9 ...
##  $ PDCLASS : int  1 1 0 1 0 1 2 0 0 0 ...
##  $ PDCT_05 : int  1 2 1 1 1 2 2 2 1 1 ...
##  $ PDCT_20 : int  4 2 3 3 2 1 3 2 4 3 ...
##  $ PDCT_25 : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ PDTYPER : int  3 9 0 1 0 1 9 0 0 0 ...
##  $ PDV_SAH : int  6 2 6 6 6 2 2 2 6 6 ...
##  $ PDV_SHCO: num  4700 1400 3400 3500 2100 ...
##  $ PDV_SUIT: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PDWLTYPE: int  1 6 1 2 1 6 4 99 1 1 ...
##  $ PDWS_05 : int  3 2 1 2 3 2 3 3 3 3 ...
##  $ PDWS_10A: int  1 4 2 2 4 3 2 2 1 1 ...
##  $ PDWS_10B: int  1 3 2 2 4 4 2 2 1 1 ...
##  $ PDWS_10C: int  1 2 4 3 4 2 4 2 1 1 ...
##  $ PDWS_10D: int  1 2 2 2 1 2 2 2 2 1 ...
##  $ PDWS_10E: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10F: int  1 4 3 3 3 3 4 2 2 2 ...
##  $ PDWS_10G: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10H: int  1 2 2 2 2 2 2 2 2 1 ...
##  $ PDWS_10I: int  1 2 2 2 2 3 1 2 1 1 ...
##  $ PDWS_10J: int  1 2 2 2 4 3 1 2 1 1 ...
##  $ PEHA_05A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEHA_05B: int  2 1 1 2 2 2 1 2 2 2 ...
##  $ PEHA_05C: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEMPL   : int  1 1 2 1 1 1 9 9 1 2 ...
##  $ PFTHB5YR: int  2 6 2 2 2 6 6 6 2 2 ...
##  $ PFWEIGHT: num  338.6 44.6 1706.9 151 1683.9 ...
##  $ PGEOGR  : int  3 26 22 22 16 10 10 16 18 4 ...
##  $ PHGEDUC : int  3 4 6 7 5 2 99 99 1 6 ...
##  $ PHHSIZE : int  3 2 1 3 1 2 99 99 5 1 ...
##  $ PHHTTINC: num  75000 92500 60000 190000 97500 ...
##  $ PHTYPE  : int  1 3 5 1 5 2 99 99 1 5 ...
##  $ PLIS_05 : int  6 6 1 7 7 7 2 6 9 9 ...
##  $ PNES_05 : int  3 1 1 3 2 1 3 2 3 3 ...
##  $ PNSC_15 : int  1 4 2 2 4 2 2 1 1 1 ...
##  $ POWN_20 : int  1 6 1 1 1 6 6 6 1 1 ...
##  $ POWN_80 : int  50000 99999996 520000 350000 100000 99999996 99999996 99999996 200000 90000 ...
##  $ PPAC_05 : int  4 4 4 3 3 1 1 1 1 4 ...
##  $ PPAC_10 : int  1 1 2 1 2 2 1 1 2 1 ...
##  $ PPAC_23 : int  1 1 6 1 6 2 9 9 1 6 ...
##  $ PPAC_30 : int  1 2 2 2 2 2 2 2 1 1 ...
##  $ PPAC_35 : int  6 2 2 2 2 2 2 2 6 6 ...
##  $ PPAC_45A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PPAC_45C: int  2 2 1 2 2 2 2 2 2 2 ...
##  $ PPAC_45D: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45E: int  2 2 2 2 2 1 2 1 2 2 ...
##  $ PPAC_45F: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45G: int  2 2 2 1 2 2 2 2 2 1 ...
##  $ PPAC_45H: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45I: int  1 1 2 1 2 2 2 2 1 2 ...
##  $ PPAC_45J: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45K: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45L: int  2 2 2 2 1 2 2 2 2 2 ...
##  $ PPAC_45M: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ PPAC_45N: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45O: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPROV   : int  12 59 48 48 35 24 24 35 46 12 ...
##  $ PRSPGNDR: int  2 2 2 1 1 1 1 1 1 2 ...
##   [list output truncated]

names(housing_data)

##   [1] "PUMFID"   "EHA_10"   "EHA_10A"  "EHA_10B"  "EHA_25"   "DWS_05A" 
##   [7] "DWI_05A"  "DWI_05B"  "DWI_05C"  "DWI_05D"  "NES_05A"  "NSC_30A" 
##  [13] "NSC_30B"  "NSC_30C"  "NEI_05A"  "NEI_05B"  "NEI_05C"  "NEI_05D" 
##  [19] "NEI_05E"  "NEI_05F"  "NEI_05G"  "NEI_05H"  "NEI_05I"  "WSA_05"  
##  [25] "SDH_05"   "CER_05"   "CER_20"   "LIS_10"   "COS_10"   "COS_15"  
##  [31] "GH_05"    "GH_10"    "REGION"   "PAGEGR1"  "PAGEGR2"  "PAGEGR3" 
##  [37] "PAGEGR4"  "PAGEP1"   "PCER_10"  "PCER_15"  "PCHN"     "PCOS_05" 
##  [43] "PDCLASS"  "PDCT_05"  "PDCT_20"  "PDCT_25"  "PDTYPER"  "PDV_SAH" 
##  [49] "PDV_SHCO" "PDV_SUIT" "PDWLTYPE" "PDWS_05"  "PDWS_10A" "PDWS_10B"
##  [55] "PDWS_10C" "PDWS_10D" "PDWS_10E" "PDWS_10F" "PDWS_10G" "PDWS_10H"
##  [61] "PDWS_10I" "PDWS_10J" "PEHA_05A" "PEHA_05B" "PEHA_05C" "PEMPL"   
##  [67] "PFTHB5YR" "PFWEIGHT" "PGEOGR"   "PHGEDUC"  "PHHSIZE"  "PHHTTINC"
##  [73] "PHTYPE"   "PLIS_05"  "PNES_05"  "PNSC_15"  "POWN_20"  "POWN_80" 
##  [79] "PPAC_05"  "PPAC_10"  "PPAC_23"  "PPAC_30"  "PPAC_35"  "PPAC_45A"
##  [85] "PPAC_45C" "PPAC_45D" "PPAC_45E" "PPAC_45F" "PPAC_45G" "PPAC_45H"
##  [91] "PPAC_45I" "PPAC_45J" "PPAC_45K" "PPAC_45L" "PPAC_45M" "PPAC_45N"
##  [97] "PPAC_45O" "PPROV"    "PRSPGNDR" "PRSPIMST" "PSCR_05"  "PSCR_10" 
## [103] "PSCR_25"  "PSCR_35"  "PSCR_D40" "PSTIR_GR" "PVISMIN"  "PWSA_D15"
## [109] "VERDATE"

# Check distribution of GH_05
table(housing_data$GH_05)

## 
##     1     2     3     4     5     9 
##  5428 13124 13955  6208  2068   205

# Load necessary libraries
library(dplyr)
library(readr)
library(gt)

# Recode GH_10
housing_data <- housing_data %>%
  mutate(GH_10_recode = case_when(
    GH_10 == 1 ~ "Excellent",
    GH_10 == 2 ~ "Very Good",
    GH_10 == 3 ~ "Good",
    GH_10 == 4 ~ "Fair",
    GH_10 == 5 ~ "Poor",
    GH_10 == 9 ~ "Not Stated", 
    TRUE ~ "Unknown"
  ))

# Create frequency table
gh10_freq_table <- housing_data %>%
  count(GH_10, GH_10_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_10) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  gh10_freq_table <- left_join(gh10_freq_table, weighted_freq, by = "GH_10")
} else {
  gh10_freq_table <- gh10_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
gh10_freq_table %>%
  gt() %>%
  tab_header(
    title = "Mental Health (GH_10) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_10 = "Code",
    GH_10_recode = "Mental Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	Mental Health Category	Count	% of Total	Weighted Frequency
Mental Health (GH_10) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	Excellent	6,728.00	16.41	NA
2	Very Good	12,655.00	30.87	NA
3	Good	13,519.00	32.98	NA
4	Fair	6,027.00	14.70	NA
5	Poor	1,779.00	4.34	NA
9	Not Stated	280.00	0.68	NA

library(dplyr)
# Recode gh_05
housing_data <- housing_data %>%
  mutate(GH_05_recode = case_when(
    GH_05 == 1 ~ "Excellent",
    GH_05 == 2 ~ "Very Good",
    GH_05 == 3 ~ "Good",
    GH_05 == 4 ~ "Fair",
    GH_05 == 5 ~ "Poor",
    GH_05 == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
freq_table <- housing_data %>%
  count(GH_05, GH_05_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_05) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  freq_table <- left_join(freq_table, weighted_freq, by = "GH_05")
} else {
  freq_table <- freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
freq_table %>%
  gt() %>%
  tab_header(
    title = "General Health (GH_05) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_05 = "Code",
    GH_05_recode = "General Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	General Health Category	Count	% of Total	Weighted Frequency
General Health (GH_05) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	Excellent	5,428.00	13.24	NA
2	Very Good	13,124.00	32.02	NA
3	Good	13,955.00	34.05	NA
4	Fair	6,208.00	15.15	NA
5	Poor	2,068.00	5.05	NA
9	Not Stated	205.00	0.50	NA

# Load necessary libraries
library(dplyr)
library(readr)
library(gt)


# Recode NEI_05A
housing_data <- housing_data %>%
  mutate(NEI_05A_recode = case_when(
    NEI_05A == 1 ~ "A big problem",
    NEI_05A == 2 ~ "A moderate problem",
    NEI_05A == 3 ~ "A small problem",
    NEI_05A == 4 ~ "Not a problem",
    NEI_05A == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
nei05a_freq_table <- housing_data %>%
  count(NEI_05A, NEI_05A_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(NEI_05A) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  nei05a_freq_table <- left_join(nei05a_freq_table, weighted_freq, by = "NEI_05A")
} else {
  nei05a_freq_table <- nei05a_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
nei05a_freq_table %>%
  gt() %>%
  tab_header(
    title = "Neighbourhood Issues (NEI_05A) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    NEI_05A = "Code",
    NEI_05A_recode = "Neighbourhood Problem Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	Neighbourhood Problem Category	Count	% of Total	Weighted Frequency
Neighbourhood Issues (NEI_05A) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	A big problem	1,270.00	3.10	NA
2	A moderate problem	3,192.00	7.79	NA
3	A small problem	6,862.00	16.74	NA
4	Not a problem	29,607.00	72.23	NA
9	Not Stated	57.00	0.14	NA

Final Report

Kaleb Pearson

2025-04-07