housing_data <- read.csv("/Users/pearcy/CHS2021ECL_PUMF.csv")
str(housing_data)
## 'data.frame':    40988 obs. of  109 variables:
##  $ PUMFID  : int  63501 63502 63503 63504 63505 63506 63507 63508 63509 63510 ...
##  $ EHA_10  : int  3 2 2 5 3 3 1 3 4 4 ...
##  $ EHA_10A : int  6 6 6 6 6 6 1 6 6 6 ...
##  $ EHA_10B : int  6 2 2 6 6 6 6 6 6 6 ...
##  $ EHA_25  : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ DWS_05A : int  3 3 2 3 1 3 3 3 3 3 ...
##  $ DWI_05A : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05B : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05C : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05D : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ NES_05A : int  3 2 2 3 3 3 1 3 3 3 ...
##  $ NSC_30A : int  1 1 2 3 1 2 1 2 2 1 ...
##  $ NSC_30B : int  1 1 2 2 1 2 1 1 1 1 ...
##  $ NSC_30C : int  1 3 3 3 3 3 1 3 2 2 ...
##  $ NEI_05A : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ NEI_05B : int  4 3 3 4 4 4 2 4 4 4 ...
##  $ NEI_05C : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05D : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05E : int  4 2 3 4 4 4 4 4 4 4 ...
##  $ NEI_05F : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05G : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05H : int  4 2 2 3 4 4 4 4 4 4 ...
##  $ NEI_05I : int  4 3 4 4 4 4 4 4 4 4 ...
##  $ WSA_05  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ SDH_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_20  : int  3 1 3 3 3 3 3 3 3 3 ...
##  $ LIS_10  : int  2 1 2 1 2 2 2 3 1 3 ...
##  $ COS_10  : int  3 3 2 1 2 3 3 9 3 3 ...
##  $ COS_15  : int  3 2 3 3 4 3 2 2 1 1 ...
##  $ GH_05   : int  4 4 4 1 1 4 5 3 2 2 ...
##  $ GH_10   : int  3 4 4 3 1 2 3 3 2 2 ...
##  $ REGION  : int  1 5 4 4 3 2 2 3 4 1 ...
##  $ PAGEGR1 : int  2 9 2 1 2 2 9 9 1 2 ...
##  $ PAGEGR2 : int  1 9 2 2 2 1 9 9 2 2 ...
##  $ PAGEGR3 : int  1 9 2 1 1 2 9 9 1 2 ...
##  $ PAGEGR4 : int  2 9 1 2 2 2 9 9 2 1 ...
##  $ PAGEP1  : int  3 3 4 2 2 1 2 1 2 4 ...
##  $ PCER_10 : int  96 2 96 96 96 96 96 96 96 96 ...
##  $ PCER_15 : int  6 3 6 6 6 6 6 6 6 6 ...
##  $ PCHN    : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PCOS_05 : int  9 5 1 6 4 99 5 99 9 9 ...
##  $ PDCLASS : int  1 1 0 1 0 1 2 0 0 0 ...
##  $ PDCT_05 : int  1 2 1 1 1 2 2 2 1 1 ...
##  $ PDCT_20 : int  4 2 3 3 2 1 3 2 4 3 ...
##  $ PDCT_25 : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ PDTYPER : int  3 9 0 1 0 1 9 0 0 0 ...
##  $ PDV_SAH : int  6 2 6 6 6 2 2 2 6 6 ...
##  $ PDV_SHCO: num  4700 1400 3400 3500 2100 ...
##  $ PDV_SUIT: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PDWLTYPE: int  1 6 1 2 1 6 4 99 1 1 ...
##  $ PDWS_05 : int  3 2 1 2 3 2 3 3 3 3 ...
##  $ PDWS_10A: int  1 4 2 2 4 3 2 2 1 1 ...
##  $ PDWS_10B: int  1 3 2 2 4 4 2 2 1 1 ...
##  $ PDWS_10C: int  1 2 4 3 4 2 4 2 1 1 ...
##  $ PDWS_10D: int  1 2 2 2 1 2 2 2 2 1 ...
##  $ PDWS_10E: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10F: int  1 4 3 3 3 3 4 2 2 2 ...
##  $ PDWS_10G: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10H: int  1 2 2 2 2 2 2 2 2 1 ...
##  $ PDWS_10I: int  1 2 2 2 2 3 1 2 1 1 ...
##  $ PDWS_10J: int  1 2 2 2 4 3 1 2 1 1 ...
##  $ PEHA_05A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEHA_05B: int  2 1 1 2 2 2 1 2 2 2 ...
##  $ PEHA_05C: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEMPL   : int  1 1 2 1 1 1 9 9 1 2 ...
##  $ PFTHB5YR: int  2 6 2 2 2 6 6 6 2 2 ...
##  $ PFWEIGHT: num  338.6 44.6 1706.9 151 1683.9 ...
##  $ PGEOGR  : int  3 26 22 22 16 10 10 16 18 4 ...
##  $ PHGEDUC : int  3 4 6 7 5 2 99 99 1 6 ...
##  $ PHHSIZE : int  3 2 1 3 1 2 99 99 5 1 ...
##  $ PHHTTINC: num  75000 92500 60000 190000 97500 ...
##  $ PHTYPE  : int  1 3 5 1 5 2 99 99 1 5 ...
##  $ PLIS_05 : int  6 6 1 7 7 7 2 6 9 9 ...
##  $ PNES_05 : int  3 1 1 3 2 1 3 2 3 3 ...
##  $ PNSC_15 : int  1 4 2 2 4 2 2 1 1 1 ...
##  $ POWN_20 : int  1 6 1 1 1 6 6 6 1 1 ...
##  $ POWN_80 : int  50000 99999996 520000 350000 100000 99999996 99999996 99999996 200000 90000 ...
##  $ PPAC_05 : int  4 4 4 3 3 1 1 1 1 4 ...
##  $ PPAC_10 : int  1 1 2 1 2 2 1 1 2 1 ...
##  $ PPAC_23 : int  1 1 6 1 6 2 9 9 1 6 ...
##  $ PPAC_30 : int  1 2 2 2 2 2 2 2 1 1 ...
##  $ PPAC_35 : int  6 2 2 2 2 2 2 2 6 6 ...
##  $ PPAC_45A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PPAC_45C: int  2 2 1 2 2 2 2 2 2 2 ...
##  $ PPAC_45D: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45E: int  2 2 2 2 2 1 2 1 2 2 ...
##  $ PPAC_45F: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45G: int  2 2 2 1 2 2 2 2 2 1 ...
##  $ PPAC_45H: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45I: int  1 1 2 1 2 2 2 2 1 2 ...
##  $ PPAC_45J: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45K: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45L: int  2 2 2 2 1 2 2 2 2 2 ...
##  $ PPAC_45M: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ PPAC_45N: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45O: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPROV   : int  12 59 48 48 35 24 24 35 46 12 ...
##  $ PRSPGNDR: int  2 2 2 1 1 1 1 1 1 2 ...
##   [list output truncated]
head(housing_data)
##   PUMFID EHA_10 EHA_10A EHA_10B EHA_25 DWS_05A DWI_05A DWI_05B DWI_05C DWI_05D
## 1  63501      3       6       6      2       3       2       2       2       2
## 2  63502      2       6       2      2       3       2       2       2       1
## 3  63503      2       6       2      2       2       2       2       2       2
## 4  63504      5       6       6      2       3       2       2       2       2
## 5  63505      3       6       6      2       1       2       2       2       2
## 6  63506      3       6       6      2       3       2       2       2       2
##   NES_05A NSC_30A NSC_30B NSC_30C NEI_05A NEI_05B NEI_05C NEI_05D NEI_05E
## 1       3       1       1       1       4       4       4       4       4
## 2       2       1       1       3       4       3       3       3       2
## 3       2       2       2       3       3       3       3       3       3
## 4       3       3       2       3       4       4       4       4       4
## 5       3       1       1       3       4       4       4       4       4
## 6       3       2       2       3       3       4       3       3       4
##   NEI_05F NEI_05G NEI_05H NEI_05I WSA_05 SDH_05 CER_05 CER_20 LIS_10 COS_10
## 1       4       4       4       4      2      2      2      3      2      3
## 2       1       1       2       3      2      1      1      1      1      3
## 3       2       2       2       4      2      2      2      3      2      2
## 4       4       4       3       4      2      2      2      3      1      1
## 5       4       4       4       4      2      2      2      3      2      2
## 6       4       4       4       4      2      2      2      3      2      3
##   COS_15 GH_05 GH_10 REGION PAGEGR1 PAGEGR2 PAGEGR3 PAGEGR4 PAGEP1 PCER_10
## 1      3     4     3      1       2       1       1       2      3      96
## 2      2     4     4      5       9       9       9       9      3       2
## 3      3     4     4      4       2       2       2       1      4      96
## 4      3     1     3      4       1       2       1       2      2      96
## 5      4     1     1      3       2       2       1       2      2      96
## 6      3     4     2      2       2       1       2       2      1      96
##   PCER_15 PCHN PCOS_05 PDCLASS PDCT_05 PDCT_20 PDCT_25 PDTYPER PDV_SAH PDV_SHCO
## 1       6    2       9       1       1       4       1       3       6  4.7e+03
## 2       3    2       5       1       2       2       1       9       2  1.4e+03
## 3       6    2       1       0       1       3       2       0       6  3.4e+03
## 4       6    2       6       1       1       3       1       1       6  3.5e+03
## 5       6    2       4       0       1       2       1       0       6  2.1e+03
## 6       6    2      99       1       2       1       1       1       2  1.0e+07
##   PDV_SUIT PDWLTYPE PDWS_05 PDWS_10A PDWS_10B PDWS_10C PDWS_10D PDWS_10E
## 1        1        1       3        1        1        1        1        1
## 2        1        6       2        4        3        2        2        2
## 3        1        1       1        2        2        4        2        2
## 4        1        2       2        2        2        3        2        2
## 5        1        1       3        4        4        4        1        1
## 6        1        6       2        3        4        2        2        3
##   PDWS_10F PDWS_10G PDWS_10H PDWS_10I PDWS_10J PEHA_05A PEHA_05B PEHA_05C PEMPL
## 1        1        1        1        1        1        2        2        2     1
## 2        4        2        2        2        2        2        1        2     1
## 3        3        2        2        2        2        2        1        2     2
## 4        3        2        2        2        2        2        2        2     1
## 5        3        1        2        2        4        2        2        2     1
## 6        3        3        2        3        3        2        2        2     1
##   PFTHB5YR  PFWEIGHT PGEOGR PHGEDUC PHHSIZE PHHTTINC PHTYPE PLIS_05 PNES_05
## 1        2  338.5873      3       3       3 7.50e+04      1       6       3
## 2        6   44.6467     26       4       2 9.25e+04      3       6       1
## 3        2 1706.9443     22       6       1 6.00e+04      5       1       1
## 4        2  150.9932     22       7       3 1.90e+05      1       7       3
## 5        2 1683.8582     16       5       1 9.75e+04      5       7       2
## 6        6  205.0446     10       2       2 1.00e+11      2       7       1
##   PNSC_15 POWN_20  POWN_80 PPAC_05 PPAC_10 PPAC_23 PPAC_30 PPAC_35 PPAC_45A
## 1       1       1    50000       4       1       1       1       6        2
## 2       4       6 99999996       4       1       1       2       2        2
## 3       2       1   520000       4       2       6       2       2        2
## 4       2       1   350000       3       1       1       2       2        2
## 5       4       1   100000       3       2       6       2       2        2
## 6       2       6 99999996       1       2       2       2       2        2
##   PPAC_45C PPAC_45D PPAC_45E PPAC_45F PPAC_45G PPAC_45H PPAC_45I PPAC_45J
## 1        2        2        2        2        2        2        1        2
## 2        2        2        2        2        2        2        1        2
## 3        1        2        2        2        2        2        2        2
## 4        2        2        2        2        1        2        1        2
## 5        2        2        2        2        2        2        2        2
## 6        2        2        1        2        2        2        2        2
##   PPAC_45K PPAC_45L PPAC_45M PPAC_45N PPAC_45O PPROV PRSPGNDR PRSPIMST PSCR_05
## 1        2        2        2        2        2    12        2        9       6
## 2        2        2        2        2        2    59        2        1       2
## 3        2        2        2        2        2    48        2        1       6
## 4        2        2        1        2        2    48        1        1       6
## 5        2        1        2        2        2    35        1        1       6
## 6        2        2        2        2        2    24        1        1       2
##   PSCR_10 PSCR_25 PSCR_35 PSCR_D40 PSTIR_GR PVISMIN PWSA_D15    VERDATE
## 1       6      96       6    999.6        3       9    999.6 30/11/2022
## 2       2       1       2    999.6        1       2    999.6 30/11/2022
## 3       6      96       6    999.6        3       2    999.6 30/11/2022
## 4       6      96       6    999.6        1       1    999.6 30/11/2022
## 5       6      96       6    999.6        1       2    999.6 30/11/2022
## 6       2       3       6    999.6        1       2    999.6 30/11/2022
names(housing_data)
##   [1] "PUMFID"   "EHA_10"   "EHA_10A"  "EHA_10B"  "EHA_25"   "DWS_05A" 
##   [7] "DWI_05A"  "DWI_05B"  "DWI_05C"  "DWI_05D"  "NES_05A"  "NSC_30A" 
##  [13] "NSC_30B"  "NSC_30C"  "NEI_05A"  "NEI_05B"  "NEI_05C"  "NEI_05D" 
##  [19] "NEI_05E"  "NEI_05F"  "NEI_05G"  "NEI_05H"  "NEI_05I"  "WSA_05"  
##  [25] "SDH_05"   "CER_05"   "CER_20"   "LIS_10"   "COS_10"   "COS_15"  
##  [31] "GH_05"    "GH_10"    "REGION"   "PAGEGR1"  "PAGEGR2"  "PAGEGR3" 
##  [37] "PAGEGR4"  "PAGEP1"   "PCER_10"  "PCER_15"  "PCHN"     "PCOS_05" 
##  [43] "PDCLASS"  "PDCT_05"  "PDCT_20"  "PDCT_25"  "PDTYPER"  "PDV_SAH" 
##  [49] "PDV_SHCO" "PDV_SUIT" "PDWLTYPE" "PDWS_05"  "PDWS_10A" "PDWS_10B"
##  [55] "PDWS_10C" "PDWS_10D" "PDWS_10E" "PDWS_10F" "PDWS_10G" "PDWS_10H"
##  [61] "PDWS_10I" "PDWS_10J" "PEHA_05A" "PEHA_05B" "PEHA_05C" "PEMPL"   
##  [67] "PFTHB5YR" "PFWEIGHT" "PGEOGR"   "PHGEDUC"  "PHHSIZE"  "PHHTTINC"
##  [73] "PHTYPE"   "PLIS_05"  "PNES_05"  "PNSC_15"  "POWN_20"  "POWN_80" 
##  [79] "PPAC_05"  "PPAC_10"  "PPAC_23"  "PPAC_30"  "PPAC_35"  "PPAC_45A"
##  [85] "PPAC_45C" "PPAC_45D" "PPAC_45E" "PPAC_45F" "PPAC_45G" "PPAC_45H"
##  [91] "PPAC_45I" "PPAC_45J" "PPAC_45K" "PPAC_45L" "PPAC_45M" "PPAC_45N"
##  [97] "PPAC_45O" "PPROV"    "PRSPGNDR" "PRSPIMST" "PSCR_05"  "PSCR_10" 
## [103] "PSCR_25"  "PSCR_35"  "PSCR_D40" "PSTIR_GR" "PVISMIN"  "PWSA_D15"
## [109] "VERDATE"

Table GH_05 (General Health - Focal Explanatory Variable )

# Check distribution of GH_05
table(housing_data$GH_05)
## 
##     1     2     3     4     5     9 
##  5428 13124 13955  6208  2068   205
# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(gt)

# Recode gh_05
housing_data <- housing_data %>%
  mutate(GH_05_recode = case_when(
    GH_05 == 1 ~ "Excellent",
    GH_05 == 2 ~ "Very Good",
    GH_05 == 3 ~ "Good",
    GH_05 == 4 ~ "Fair",
    GH_05 == 5 ~ "Poor",
    GH_05 == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
freq_table <- housing_data %>%
  count(GH_05, GH_05_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_05) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  freq_table <- left_join(freq_table, weighted_freq, by = "GH_05")
} else {
  freq_table <- freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
freq_table %>%
  gt() %>%
  tab_header(
    title = "General Health (GH_05) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_05 = "Code",
    GH_05_recode = "General Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )
General Health (GH_05) Summary
Frequency, Weighted Frequency, and Percentage Distribution
Code General Health Category Count % of Total Weighted Frequency
1 Excellent 5,428.00 13.24 NA
2 Very Good 13,124.00 32.02 NA
3 Good 13,955.00 34.05 NA
4 Fair 6,208.00 15.15 NA
5 Poor 2,068.00 5.05 NA
9 Not Stated 205.00 0.50 NA
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)

# Recode GH_10
housing_data <- housing_data %>%
  mutate(GH_10_recode = case_when(
    GH_10 == 1 ~ "Excellent",
    GH_10 == 2 ~ "Very Good",
    GH_10 == 3 ~ "Good",
    GH_10 == 4 ~ "Fair",
    GH_10 == 5 ~ "Poor",
    GH_10 == 9 ~ "Not Stated", 
    TRUE ~ "Unknown"
  ))

# Create frequency table
gh10_freq_table <- housing_data %>%
  count(GH_10, GH_10_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_10) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  gh10_freq_table <- left_join(gh10_freq_table, weighted_freq, by = "GH_10")
} else {
  gh10_freq_table <- gh10_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
gh10_freq_table %>%
  gt() %>%
  tab_header(
    title = "Mental Health (GH_10) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_10 = "Code",
    GH_10_recode = "Mental Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )
Mental Health (GH_10) Summary
Frequency, Weighted Frequency, and Percentage Distribution
Code Mental Health Category Count % of Total Weighted Frequency
1 Excellent 6,728.00 16.41 NA
2 Very Good 12,655.00 30.87 NA
3 Good 13,519.00 32.98 NA
4 Fair 6,027.00 14.70 NA
5 Poor 1,779.00 4.34 NA
9 Not Stated 280.00 0.68 NA
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)

# Recode NES_05A
housing_data <- housing_data %>%
  mutate(NES_05A_recode = case_when(
    NES_05A == 1 ~ "Increased",
    NES_05A == 2 ~ "Decreased",
    NES_05A == 3 ~ "About the Same",
    NES_05A == 9 ~ "Not Stated", 
    TRUE ~ "Unknown"
  ))

# Create frequency table
nes05a_freq_table <- housing_data %>%
  count(NES_05A, NES_05A_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if "weight" column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(NES_05A) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  nes05a_freq_table <- left_join(nes05a_freq_table, weighted_freq, by = "NES_05A")
} else {
  nes05a_freq_table <- nes05a_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
nes05a_freq_table %>%
  gt() %>%
  tab_header(
    title = "Neighborhood Satisfaction (NES_05A) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    NES_05A = "Code",
    NES_05A_recode = "Satisfaction Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )
Neighborhood Satisfaction (NES_05A) Summary
Frequency, Weighted Frequency, and Percentage Distribution
Code Satisfaction Category Count % of Total Weighted Frequency
1 Increased 2,412.00 5.88 NA
2 Decreased 2,489.00 6.07 NA
3 About the Same 36,034.00 87.91 NA
9 Not Stated 53.00 0.13 NA
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)


# Recode NEI_05A
housing_data <- housing_data %>%
  mutate(NEI_05A_recode = case_when(
    NEI_05A == 1 ~ "A big problem",
    NEI_05A == 2 ~ "A moderate problem",
    NEI_05A == 3 ~ "A small problem",
    NEI_05A == 4 ~ "Not a problem",
    NEI_05A == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
nei05a_freq_table <- housing_data %>%
  count(NEI_05A, NEI_05A_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(NEI_05A) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  nei05a_freq_table <- left_join(nei05a_freq_table, weighted_freq, by = "NEI_05A")
} else {
  nei05a_freq_table <- nei05a_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
nei05a_freq_table %>%
  gt() %>%
  tab_header(
    title = "Neighbourhood Issues (NEI_05A) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    NEI_05A = "Code",
    NEI_05A_recode = "Neighbourhood Problem Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )
Neighbourhood Issues (NEI_05A) Summary
Frequency, Weighted Frequency, and Percentage Distribution
Code Neighbourhood Problem Category Count % of Total Weighted Frequency
1 A big problem 1,270.00 3.10 NA
2 A moderate problem 3,192.00 7.79 NA
3 A small problem 6,862.00 16.74 NA
4 Not a problem 29,607.00 72.23 NA
9 Not Stated 57.00 0.14 NA
library(dplyr)
library(ggplot2)
library(gt)

# Create a table summarizing the relationship
relationship_table <- housing_data %>%
  count(GH_05_recode, GH_10_recode, name = "Count") %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 2))

# Display the table
relationship_table %>%
  gt() %>%
  tab_header(
    title = "Relationship Between General Health and Mental Health",
    subtitle = "Frequency and Percentage Distribution"
  ) %>%
  cols_label(
    GH_05_recode = "General Health",
    GH_10_recode = "Mental Health",
    Count = "Number of People",
    Percentage = "% of Total"
  )
Relationship Between General Health and Mental Health
Frequency and Percentage Distribution
General Health Mental Health Number of People % of Total
Excellent Excellent 3131 7.64
Excellent Fair 165 0.40
Excellent Good 627 1.53
Excellent Not Stated 19 0.05
Excellent Poor 29 0.07
Excellent Very Good 1457 3.55
Fair Excellent 400 0.98
Fair Fair 2217 5.41
Fair Good 2010 4.90
Fair Not Stated 25 0.06
Fair Poor 625 1.52
Fair Very Good 931 2.27
Good Excellent 1043 2.54
Good Fair 2137 5.21
Good Good 7372 17.99
Good Not Stated 48 0.12
Good Poor 410 1.00
Good Very Good 2945 7.19
Not Stated Excellent 11 0.03
Not Stated Fair 6 0.01
Not Stated Good 25 0.06
Not Stated Not Stated 140 0.34
Not Stated Poor 4 0.01
Not Stated Very Good 19 0.05
Poor Excellent 118 0.29
Poor Fair 645 1.57
Poor Good 485 1.18
Poor Not Stated 12 0.03
Poor Poor 569 1.39
Poor Very Good 239 0.58
Very Good Excellent 2025 4.94
Very Good Fair 857 2.09
Very Good Good 3000 7.32
Very Good Not Stated 36 0.09
Very Good Poor 142 0.35
Very Good Very Good 7064 17.23
library(ggplot2)

housing_data <- housing_data %>%
  mutate(GH_05_recode = factor(GH_05_recode, 
                               levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))

housing_data <- housing_data %>%
  mutate(GH_10_recode = factor(GH_10_recode, 
                               levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))

# Create the graph
ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by General Health",
    x = "General Health",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  theme_minimal()

# Create frequency table for NEI_05A (Noisy Neighbors) vs GH_10 (Mental Health)
noisy_neighbors_table <- housing_data %>%
  count(NEI_05A, GH_10_recode, name = "Count") %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 2))

# Display the table
noisy_neighbors_table %>%
  gt() %>%
  tab_header(
    title = "Mental Health by Noisy Neighbors",
    subtitle = "Exploring the Impact of Noise Disturbances on Mental Health"
  ) %>%
  cols_label(
    NEI_05A = "Noise Disturbance Level",
    GH_10_recode = "Mental Health Category",
    Count = "Number of Respondents",
    Percentage = "% of Total"
  ) %>%
  fmt_number(columns = c(Count, Percentage), decimals = 2) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(everything())
  )
Mental Health by Noisy Neighbors
Exploring the Impact of Noise Disturbances on Mental Health
Noise Disturbance Level Mental Health Category Number of Respondents % of Total
1 Excellent 182.00 0.44
1 Very Good 247.00 0.60
1 Good 379.00 0.92
1 Fair 272.00 0.66
1 Poor 183.00 0.45
1 Not Stated 7.00 0.02
2 Excellent 355.00 0.87
2 Very Good 774.00 1.89
2 Good 1,094.00 2.67
2 Fair 704.00 1.72
2 Poor 246.00 0.60
2 Not Stated 19.00 0.05
3 Excellent 792.00 1.93
3 Very Good 1,915.00 4.67
3 Good 2,413.00 5.89
3 Fair 1,310.00 3.20
3 Poor 391.00 0.95
3 Not Stated 41.00 0.10
4 Excellent 5,388.00 13.15
4 Very Good 9,703.00 23.67
4 Good 9,619.00 23.47
4 Fair 3,734.00 9.11
4 Poor 958.00 2.34
4 Not Stated 205.00 0.50
9 Excellent 11.00 0.03
9 Very Good 16.00 0.04
9 Good 14.00 0.03
9 Fair 7.00 0.02
9 Poor 1.00 0.00
9 Not Stated 8.00 0.02
library(ggplot2)

# Visualization of Mental Health by Noisy Neighbors
ggplot(housing_data, aes(x = NEI_05A, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by Noisy Neighbors",
    x = "Noise Disturbance Level",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) + scale_x_discrete(limits = c("A big problem", "A moderate problem", "A small problem", "Not a problem", "Not stated")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1), 
    plot.margin = margin(20, 20, 20, 20)  
  )

library(ggplot2)

# Visualization of Mental Health by Neighborhood Satisfaction
ggplot(housing_data, aes(x = NES_05A, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by Neighborhood Satisfaction",
    x = "Neighborhood Satisfaction",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  scale_x_discrete(limits = c("Increased", "About the Same", "Decreased", "Not stated")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
  )

library(ggplot2)

ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  facet_wrap(~ NES_05A) +
  labs(
    title = "Mental Health by General Health and Neighborhood Satisfaction",
    x = "General Health",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
  )