# Loading required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gt)
library(readr)
# Set up knitr options
knitr::opts_chunk$set(echo = TRUE)
housing_data <- read.csv("/Users/pearcy/CHS2021ECL_PUMF.csv")
str(housing_data)
## 'data.frame': 40988 obs. of 109 variables:
## $ PUMFID : int 63501 63502 63503 63504 63505 63506 63507 63508 63509 63510 ...
## $ EHA_10 : int 3 2 2 5 3 3 1 3 4 4 ...
## $ EHA_10A : int 6 6 6 6 6 6 1 6 6 6 ...
## $ EHA_10B : int 6 2 2 6 6 6 6 6 6 6 ...
## $ EHA_25 : int 2 2 2 2 2 2 1 2 2 2 ...
## $ DWS_05A : int 3 3 2 3 1 3 3 3 3 3 ...
## $ DWI_05A : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05B : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05C : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05D : int 2 1 2 2 2 2 2 2 2 2 ...
## $ NES_05A : int 3 2 2 3 3 3 1 3 3 3 ...
## $ NSC_30A : int 1 1 2 3 1 2 1 2 2 1 ...
## $ NSC_30B : int 1 1 2 2 1 2 1 1 1 1 ...
## $ NSC_30C : int 1 3 3 3 3 3 1 3 2 2 ...
## $ NEI_05A : int 4 4 3 4 4 3 4 4 4 4 ...
## $ NEI_05B : int 4 3 3 4 4 4 2 4 4 4 ...
## $ NEI_05C : int 4 3 3 4 4 3 4 4 4 4 ...
## $ NEI_05D : int 4 3 3 4 4 3 4 4 4 4 ...
## $ NEI_05E : int 4 2 3 4 4 4 4 4 4 4 ...
## $ NEI_05F : int 4 1 2 4 4 4 4 4 4 4 ...
## $ NEI_05G : int 4 1 2 4 4 4 4 4 4 4 ...
## $ NEI_05H : int 4 2 2 3 4 4 4 4 4 4 ...
## $ NEI_05I : int 4 3 4 4 4 4 4 4 4 4 ...
## $ WSA_05 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ SDH_05 : int 2 1 2 2 2 2 2 2 2 2 ...
## $ CER_05 : int 2 1 2 2 2 2 2 2 2 2 ...
## $ CER_20 : int 3 1 3 3 3 3 3 3 3 3 ...
## $ LIS_10 : int 2 1 2 1 2 2 2 3 1 3 ...
## $ COS_10 : int 3 3 2 1 2 3 3 9 3 3 ...
## $ COS_15 : int 3 2 3 3 4 3 2 2 1 1 ...
## $ GH_05 : int 4 4 4 1 1 4 5 3 2 2 ...
## $ GH_10 : int 3 4 4 3 1 2 3 3 2 2 ...
## $ REGION : int 1 5 4 4 3 2 2 3 4 1 ...
## $ PAGEGR1 : int 2 9 2 1 2 2 9 9 1 2 ...
## $ PAGEGR2 : int 1 9 2 2 2 1 9 9 2 2 ...
## $ PAGEGR3 : int 1 9 2 1 1 2 9 9 1 2 ...
## $ PAGEGR4 : int 2 9 1 2 2 2 9 9 2 1 ...
## $ PAGEP1 : int 3 3 4 2 2 1 2 1 2 4 ...
## $ PCER_10 : int 96 2 96 96 96 96 96 96 96 96 ...
## $ PCER_15 : int 6 3 6 6 6 6 6 6 6 6 ...
## $ PCHN : int 2 2 2 2 2 2 1 2 2 2 ...
## $ PCOS_05 : int 9 5 1 6 4 99 5 99 9 9 ...
## $ PDCLASS : int 1 1 0 1 0 1 2 0 0 0 ...
## $ PDCT_05 : int 1 2 1 1 1 2 2 2 1 1 ...
## $ PDCT_20 : int 4 2 3 3 2 1 3 2 4 3 ...
## $ PDCT_25 : int 1 1 2 1 1 1 1 1 1 1 ...
## $ PDTYPER : int 3 9 0 1 0 1 9 0 0 0 ...
## $ PDV_SAH : int 6 2 6 6 6 2 2 2 6 6 ...
## $ PDV_SHCO: num 4700 1400 3400 3500 2100 ...
## $ PDV_SUIT: int 1 1 1 1 1 1 1 1 1 1 ...
## $ PDWLTYPE: int 1 6 1 2 1 6 4 99 1 1 ...
## $ PDWS_05 : int 3 2 1 2 3 2 3 3 3 3 ...
## $ PDWS_10A: int 1 4 2 2 4 3 2 2 1 1 ...
## $ PDWS_10B: int 1 3 2 2 4 4 2 2 1 1 ...
## $ PDWS_10C: int 1 2 4 3 4 2 4 2 1 1 ...
## $ PDWS_10D: int 1 2 2 2 1 2 2 2 2 1 ...
## $ PDWS_10E: int 1 2 2 2 1 3 2 2 1 1 ...
## $ PDWS_10F: int 1 4 3 3 3 3 4 2 2 2 ...
## $ PDWS_10G: int 1 2 2 2 1 3 2 2 1 1 ...
## $ PDWS_10H: int 1 2 2 2 2 2 2 2 2 1 ...
## $ PDWS_10I: int 1 2 2 2 2 3 1 2 1 1 ...
## $ PDWS_10J: int 1 2 2 2 4 3 1 2 1 1 ...
## $ PEHA_05A: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PEHA_05B: int 2 1 1 2 2 2 1 2 2 2 ...
## $ PEHA_05C: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PEMPL : int 1 1 2 1 1 1 9 9 1 2 ...
## $ PFTHB5YR: int 2 6 2 2 2 6 6 6 2 2 ...
## $ PFWEIGHT: num 338.6 44.6 1706.9 151 1683.9 ...
## $ PGEOGR : int 3 26 22 22 16 10 10 16 18 4 ...
## $ PHGEDUC : int 3 4 6 7 5 2 99 99 1 6 ...
## $ PHHSIZE : int 3 2 1 3 1 2 99 99 5 1 ...
## $ PHHTTINC: num 75000 92500 60000 190000 97500 ...
## $ PHTYPE : int 1 3 5 1 5 2 99 99 1 5 ...
## $ PLIS_05 : int 6 6 1 7 7 7 2 6 9 9 ...
## $ PNES_05 : int 3 1 1 3 2 1 3 2 3 3 ...
## $ PNSC_15 : int 1 4 2 2 4 2 2 1 1 1 ...
## $ POWN_20 : int 1 6 1 1 1 6 6 6 1 1 ...
## $ POWN_80 : int 50000 99999996 520000 350000 100000 99999996 99999996 99999996 200000 90000 ...
## $ PPAC_05 : int 4 4 4 3 3 1 1 1 1 4 ...
## $ PPAC_10 : int 1 1 2 1 2 2 1 1 2 1 ...
## $ PPAC_23 : int 1 1 6 1 6 2 9 9 1 6 ...
## $ PPAC_30 : int 1 2 2 2 2 2 2 2 1 1 ...
## $ PPAC_35 : int 6 2 2 2 2 2 2 2 6 6 ...
## $ PPAC_45A: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PPAC_45C: int 2 2 1 2 2 2 2 2 2 2 ...
## $ PPAC_45D: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45E: int 2 2 2 2 2 1 2 1 2 2 ...
## $ PPAC_45F: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45G: int 2 2 2 1 2 2 2 2 2 1 ...
## $ PPAC_45H: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45I: int 1 1 2 1 2 2 2 2 1 2 ...
## $ PPAC_45J: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45K: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45L: int 2 2 2 2 1 2 2 2 2 2 ...
## $ PPAC_45M: int 2 2 2 1 2 2 2 2 2 2 ...
## $ PPAC_45N: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45O: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPROV : int 12 59 48 48 35 24 24 35 46 12 ...
## $ PRSPGNDR: int 2 2 2 1 1 1 1 1 1 2 ...
## [list output truncated]
names(housing_data)
## [1] "PUMFID" "EHA_10" "EHA_10A" "EHA_10B" "EHA_25" "DWS_05A"
## [7] "DWI_05A" "DWI_05B" "DWI_05C" "DWI_05D" "NES_05A" "NSC_30A"
## [13] "NSC_30B" "NSC_30C" "NEI_05A" "NEI_05B" "NEI_05C" "NEI_05D"
## [19] "NEI_05E" "NEI_05F" "NEI_05G" "NEI_05H" "NEI_05I" "WSA_05"
## [25] "SDH_05" "CER_05" "CER_20" "LIS_10" "COS_10" "COS_15"
## [31] "GH_05" "GH_10" "REGION" "PAGEGR1" "PAGEGR2" "PAGEGR3"
## [37] "PAGEGR4" "PAGEP1" "PCER_10" "PCER_15" "PCHN" "PCOS_05"
## [43] "PDCLASS" "PDCT_05" "PDCT_20" "PDCT_25" "PDTYPER" "PDV_SAH"
## [49] "PDV_SHCO" "PDV_SUIT" "PDWLTYPE" "PDWS_05" "PDWS_10A" "PDWS_10B"
## [55] "PDWS_10C" "PDWS_10D" "PDWS_10E" "PDWS_10F" "PDWS_10G" "PDWS_10H"
## [61] "PDWS_10I" "PDWS_10J" "PEHA_05A" "PEHA_05B" "PEHA_05C" "PEMPL"
## [67] "PFTHB5YR" "PFWEIGHT" "PGEOGR" "PHGEDUC" "PHHSIZE" "PHHTTINC"
## [73] "PHTYPE" "PLIS_05" "PNES_05" "PNSC_15" "POWN_20" "POWN_80"
## [79] "PPAC_05" "PPAC_10" "PPAC_23" "PPAC_30" "PPAC_35" "PPAC_45A"
## [85] "PPAC_45C" "PPAC_45D" "PPAC_45E" "PPAC_45F" "PPAC_45G" "PPAC_45H"
## [91] "PPAC_45I" "PPAC_45J" "PPAC_45K" "PPAC_45L" "PPAC_45M" "PPAC_45N"
## [97] "PPAC_45O" "PPROV" "PRSPGNDR" "PRSPIMST" "PSCR_05" "PSCR_10"
## [103] "PSCR_25" "PSCR_35" "PSCR_D40" "PSTIR_GR" "PVISMIN" "PWSA_D15"
## [109] "VERDATE"
# Check distribution of GH_05
table(housing_data$GH_05)
##
## 1 2 3 4 5 9
## 5428 13124 13955 6208 2068 205
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)
# Recode GH_10
housing_data <- housing_data %>%
mutate(GH_10_recode = case_when(
GH_10 == 1 ~ "Excellent",
GH_10 == 2 ~ "Very Good",
GH_10 == 3 ~ "Good",
GH_10 == 4 ~ "Fair",
GH_10 == 5 ~ "Poor",
GH_10 == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
gh10_freq_table <- housing_data %>%
count(GH_10, GH_10_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(GH_10) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
gh10_freq_table <- left_join(gh10_freq_table, weighted_freq, by = "GH_10")
} else {
gh10_freq_table <- gh10_freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
gh10_freq_table %>%
gt() %>%
tab_header(
title = "Mental Health (GH_10) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
GH_10 = "Code",
GH_10_recode = "Mental Health Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| Mental Health (GH_10) Summary |
| Frequency, Weighted Frequency, and Percentage Distribution |
| Code |
Mental Health Category |
Count |
% of Total |
Weighted Frequency |
| 1 |
Excellent |
6,728.00 |
16.41 |
NA |
| 2 |
Very Good |
12,655.00 |
30.87 |
NA |
| 3 |
Good |
13,519.00 |
32.98 |
NA |
| 4 |
Fair |
6,027.00 |
14.70 |
NA |
| 5 |
Poor |
1,779.00 |
4.34 |
NA |
| 9 |
Not Stated |
280.00 |
0.68 |
NA |
library(dplyr)
# Recode gh_05
housing_data <- housing_data %>%
mutate(GH_05_recode = case_when(
GH_05 == 1 ~ "Excellent",
GH_05 == 2 ~ "Very Good",
GH_05 == 3 ~ "Good",
GH_05 == 4 ~ "Fair",
GH_05 == 5 ~ "Poor",
GH_05 == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
freq_table <- housing_data %>%
count(GH_05, GH_05_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(GH_05) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
freq_table <- left_join(freq_table, weighted_freq, by = "GH_05")
} else {
freq_table <- freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
freq_table %>%
gt() %>%
tab_header(
title = "General Health (GH_05) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
GH_05 = "Code",
GH_05_recode = "General Health Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| General Health (GH_05) Summary |
| Frequency, Weighted Frequency, and Percentage Distribution |
| Code |
General Health Category |
Count |
% of Total |
Weighted Frequency |
| 1 |
Excellent |
5,428.00 |
13.24 |
NA |
| 2 |
Very Good |
13,124.00 |
32.02 |
NA |
| 3 |
Good |
13,955.00 |
34.05 |
NA |
| 4 |
Fair |
6,208.00 |
15.15 |
NA |
| 5 |
Poor |
2,068.00 |
5.05 |
NA |
| 9 |
Not Stated |
205.00 |
0.50 |
NA |
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)
# Recode NEI_05A
housing_data <- housing_data %>%
mutate(NEI_05A_recode = case_when(
NEI_05A == 1 ~ "A big problem",
NEI_05A == 2 ~ "A moderate problem",
NEI_05A == 3 ~ "A small problem",
NEI_05A == 4 ~ "Not a problem",
NEI_05A == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
nei05a_freq_table <- housing_data %>%
count(NEI_05A, NEI_05A_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(NEI_05A) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
nei05a_freq_table <- left_join(nei05a_freq_table, weighted_freq, by = "NEI_05A")
} else {
nei05a_freq_table <- nei05a_freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
nei05a_freq_table %>%
gt() %>%
tab_header(
title = "Neighbourhood Issues (NEI_05A) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
NEI_05A = "Code",
NEI_05A_recode = "Neighbourhood Problem Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| Neighbourhood Issues (NEI_05A) Summary |
| Frequency, Weighted Frequency, and Percentage Distribution |
| Code |
Neighbourhood Problem Category |
Count |
% of Total |
Weighted Frequency |
| 1 |
A big problem |
1,270.00 |
3.10 |
NA |
| 2 |
A moderate problem |
3,192.00 |
7.79 |
NA |
| 3 |
A small problem |
6,862.00 |
16.74 |
NA |
| 4 |
Not a problem |
29,607.00 |
72.23 |
NA |
| 9 |
Not Stated |
57.00 |
0.14 |
NA |