Initial Analysis

housing_data <- read.csv("/Users/pearcy/CHS2021ECL_PUMF.csv")

str(housing_data)

## 'data.frame':    40988 obs. of  109 variables:
##  $ PUMFID  : int  63501 63502 63503 63504 63505 63506 63507 63508 63509 63510 ...
##  $ EHA_10  : int  3 2 2 5 3 3 1 3 4 4 ...
##  $ EHA_10A : int  6 6 6 6 6 6 1 6 6 6 ...
##  $ EHA_10B : int  6 2 2 6 6 6 6 6 6 6 ...
##  $ EHA_25  : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ DWS_05A : int  3 3 2 3 1 3 3 3 3 3 ...
##  $ DWI_05A : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05B : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05C : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ DWI_05D : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ NES_05A : int  3 2 2 3 3 3 1 3 3 3 ...
##  $ NSC_30A : int  1 1 2 3 1 2 1 2 2 1 ...
##  $ NSC_30B : int  1 1 2 2 1 2 1 1 1 1 ...
##  $ NSC_30C : int  1 3 3 3 3 3 1 3 2 2 ...
##  $ NEI_05A : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ NEI_05B : int  4 3 3 4 4 4 2 4 4 4 ...
##  $ NEI_05C : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05D : int  4 3 3 4 4 3 4 4 4 4 ...
##  $ NEI_05E : int  4 2 3 4 4 4 4 4 4 4 ...
##  $ NEI_05F : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05G : int  4 1 2 4 4 4 4 4 4 4 ...
##  $ NEI_05H : int  4 2 2 3 4 4 4 4 4 4 ...
##  $ NEI_05I : int  4 3 4 4 4 4 4 4 4 4 ...
##  $ WSA_05  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ SDH_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_05  : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ CER_20  : int  3 1 3 3 3 3 3 3 3 3 ...
##  $ LIS_10  : int  2 1 2 1 2 2 2 3 1 3 ...
##  $ COS_10  : int  3 3 2 1 2 3 3 9 3 3 ...
##  $ COS_15  : int  3 2 3 3 4 3 2 2 1 1 ...
##  $ GH_05   : int  4 4 4 1 1 4 5 3 2 2 ...
##  $ GH_10   : int  3 4 4 3 1 2 3 3 2 2 ...
##  $ REGION  : int  1 5 4 4 3 2 2 3 4 1 ...
##  $ PAGEGR1 : int  2 9 2 1 2 2 9 9 1 2 ...
##  $ PAGEGR2 : int  1 9 2 2 2 1 9 9 2 2 ...
##  $ PAGEGR3 : int  1 9 2 1 1 2 9 9 1 2 ...
##  $ PAGEGR4 : int  2 9 1 2 2 2 9 9 2 1 ...
##  $ PAGEP1  : int  3 3 4 2 2 1 2 1 2 4 ...
##  $ PCER_10 : int  96 2 96 96 96 96 96 96 96 96 ...
##  $ PCER_15 : int  6 3 6 6 6 6 6 6 6 6 ...
##  $ PCHN    : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PCOS_05 : int  9 5 1 6 4 99 5 99 9 9 ...
##  $ PDCLASS : int  1 1 0 1 0 1 2 0 0 0 ...
##  $ PDCT_05 : int  1 2 1 1 1 2 2 2 1 1 ...
##  $ PDCT_20 : int  4 2 3 3 2 1 3 2 4 3 ...
##  $ PDCT_25 : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ PDTYPER : int  3 9 0 1 0 1 9 0 0 0 ...
##  $ PDV_SAH : int  6 2 6 6 6 2 2 2 6 6 ...
##  $ PDV_SHCO: num  4700 1400 3400 3500 2100 ...
##  $ PDV_SUIT: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PDWLTYPE: int  1 6 1 2 1 6 4 99 1 1 ...
##  $ PDWS_05 : int  3 2 1 2 3 2 3 3 3 3 ...
##  $ PDWS_10A: int  1 4 2 2 4 3 2 2 1 1 ...
##  $ PDWS_10B: int  1 3 2 2 4 4 2 2 1 1 ...
##  $ PDWS_10C: int  1 2 4 3 4 2 4 2 1 1 ...
##  $ PDWS_10D: int  1 2 2 2 1 2 2 2 2 1 ...
##  $ PDWS_10E: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10F: int  1 4 3 3 3 3 4 2 2 2 ...
##  $ PDWS_10G: int  1 2 2 2 1 3 2 2 1 1 ...
##  $ PDWS_10H: int  1 2 2 2 2 2 2 2 2 1 ...
##  $ PDWS_10I: int  1 2 2 2 2 3 1 2 1 1 ...
##  $ PDWS_10J: int  1 2 2 2 4 3 1 2 1 1 ...
##  $ PEHA_05A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEHA_05B: int  2 1 1 2 2 2 1 2 2 2 ...
##  $ PEHA_05C: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PEMPL   : int  1 1 2 1 1 1 9 9 1 2 ...
##  $ PFTHB5YR: int  2 6 2 2 2 6 6 6 2 2 ...
##  $ PFWEIGHT: num  338.6 44.6 1706.9 151 1683.9 ...
##  $ PGEOGR  : int  3 26 22 22 16 10 10 16 18 4 ...
##  $ PHGEDUC : int  3 4 6 7 5 2 99 99 1 6 ...
##  $ PHHSIZE : int  3 2 1 3 1 2 99 99 5 1 ...
##  $ PHHTTINC: num  75000 92500 60000 190000 97500 ...
##  $ PHTYPE  : int  1 3 5 1 5 2 99 99 1 5 ...
##  $ PLIS_05 : int  6 6 1 7 7 7 2 6 9 9 ...
##  $ PNES_05 : int  3 1 1 3 2 1 3 2 3 3 ...
##  $ PNSC_15 : int  1 4 2 2 4 2 2 1 1 1 ...
##  $ POWN_20 : int  1 6 1 1 1 6 6 6 1 1 ...
##  $ POWN_80 : int  50000 99999996 520000 350000 100000 99999996 99999996 99999996 200000 90000 ...
##  $ PPAC_05 : int  4 4 4 3 3 1 1 1 1 4 ...
##  $ PPAC_10 : int  1 1 2 1 2 2 1 1 2 1 ...
##  $ PPAC_23 : int  1 1 6 1 6 2 9 9 1 6 ...
##  $ PPAC_30 : int  1 2 2 2 2 2 2 2 1 1 ...
##  $ PPAC_35 : int  6 2 2 2 2 2 2 2 6 6 ...
##  $ PPAC_45A: int  2 2 2 2 2 2 1 2 2 2 ...
##  $ PPAC_45C: int  2 2 1 2 2 2 2 2 2 2 ...
##  $ PPAC_45D: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45E: int  2 2 2 2 2 1 2 1 2 2 ...
##  $ PPAC_45F: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45G: int  2 2 2 1 2 2 2 2 2 1 ...
##  $ PPAC_45H: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45I: int  1 1 2 1 2 2 2 2 1 2 ...
##  $ PPAC_45J: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45K: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45L: int  2 2 2 2 1 2 2 2 2 2 ...
##  $ PPAC_45M: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ PPAC_45N: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPAC_45O: int  2 2 2 2 2 2 2 2 2 2 ...
##  $ PPROV   : int  12 59 48 48 35 24 24 35 46 12 ...
##  $ PRSPGNDR: int  2 2 2 1 1 1 1 1 1 2 ...
##   [list output truncated]

head(housing_data)

##   PUMFID EHA_10 EHA_10A EHA_10B EHA_25 DWS_05A DWI_05A DWI_05B DWI_05C DWI_05D
## 1  63501      3       6       6      2       3       2       2       2       2
## 2  63502      2       6       2      2       3       2       2       2       1
## 3  63503      2       6       2      2       2       2       2       2       2
## 4  63504      5       6       6      2       3       2       2       2       2
## 5  63505      3       6       6      2       1       2       2       2       2
## 6  63506      3       6       6      2       3       2       2       2       2
##   NES_05A NSC_30A NSC_30B NSC_30C NEI_05A NEI_05B NEI_05C NEI_05D NEI_05E
## 1       3       1       1       1       4       4       4       4       4
## 2       2       1       1       3       4       3       3       3       2
## 3       2       2       2       3       3       3       3       3       3
## 4       3       3       2       3       4       4       4       4       4
## 5       3       1       1       3       4       4       4       4       4
## 6       3       2       2       3       3       4       3       3       4
##   NEI_05F NEI_05G NEI_05H NEI_05I WSA_05 SDH_05 CER_05 CER_20 LIS_10 COS_10
## 1       4       4       4       4      2      2      2      3      2      3
## 2       1       1       2       3      2      1      1      1      1      3
## 3       2       2       2       4      2      2      2      3      2      2
## 4       4       4       3       4      2      2      2      3      1      1
## 5       4       4       4       4      2      2      2      3      2      2
## 6       4       4       4       4      2      2      2      3      2      3
##   COS_15 GH_05 GH_10 REGION PAGEGR1 PAGEGR2 PAGEGR3 PAGEGR4 PAGEP1 PCER_10
## 1      3     4     3      1       2       1       1       2      3      96
## 2      2     4     4      5       9       9       9       9      3       2
## 3      3     4     4      4       2       2       2       1      4      96
## 4      3     1     3      4       1       2       1       2      2      96
## 5      4     1     1      3       2       2       1       2      2      96
## 6      3     4     2      2       2       1       2       2      1      96
##   PCER_15 PCHN PCOS_05 PDCLASS PDCT_05 PDCT_20 PDCT_25 PDTYPER PDV_SAH PDV_SHCO
## 1       6    2       9       1       1       4       1       3       6  4.7e+03
## 2       3    2       5       1       2       2       1       9       2  1.4e+03
## 3       6    2       1       0       1       3       2       0       6  3.4e+03
## 4       6    2       6       1       1       3       1       1       6  3.5e+03
## 5       6    2       4       0       1       2       1       0       6  2.1e+03
## 6       6    2      99       1       2       1       1       1       2  1.0e+07
##   PDV_SUIT PDWLTYPE PDWS_05 PDWS_10A PDWS_10B PDWS_10C PDWS_10D PDWS_10E
## 1        1        1       3        1        1        1        1        1
## 2        1        6       2        4        3        2        2        2
## 3        1        1       1        2        2        4        2        2
## 4        1        2       2        2        2        3        2        2
## 5        1        1       3        4        4        4        1        1
## 6        1        6       2        3        4        2        2        3
##   PDWS_10F PDWS_10G PDWS_10H PDWS_10I PDWS_10J PEHA_05A PEHA_05B PEHA_05C PEMPL
## 1        1        1        1        1        1        2        2        2     1
## 2        4        2        2        2        2        2        1        2     1
## 3        3        2        2        2        2        2        1        2     2
## 4        3        2        2        2        2        2        2        2     1
## 5        3        1        2        2        4        2        2        2     1
## 6        3        3        2        3        3        2        2        2     1
##   PFTHB5YR  PFWEIGHT PGEOGR PHGEDUC PHHSIZE PHHTTINC PHTYPE PLIS_05 PNES_05
## 1        2  338.5873      3       3       3 7.50e+04      1       6       3
## 2        6   44.6467     26       4       2 9.25e+04      3       6       1
## 3        2 1706.9443     22       6       1 6.00e+04      5       1       1
## 4        2  150.9932     22       7       3 1.90e+05      1       7       3
## 5        2 1683.8582     16       5       1 9.75e+04      5       7       2
## 6        6  205.0446     10       2       2 1.00e+11      2       7       1
##   PNSC_15 POWN_20  POWN_80 PPAC_05 PPAC_10 PPAC_23 PPAC_30 PPAC_35 PPAC_45A
## 1       1       1    50000       4       1       1       1       6        2
## 2       4       6 99999996       4       1       1       2       2        2
## 3       2       1   520000       4       2       6       2       2        2
## 4       2       1   350000       3       1       1       2       2        2
## 5       4       1   100000       3       2       6       2       2        2
## 6       2       6 99999996       1       2       2       2       2        2
##   PPAC_45C PPAC_45D PPAC_45E PPAC_45F PPAC_45G PPAC_45H PPAC_45I PPAC_45J
## 1        2        2        2        2        2        2        1        2
## 2        2        2        2        2        2        2        1        2
## 3        1        2        2        2        2        2        2        2
## 4        2        2        2        2        1        2        1        2
## 5        2        2        2        2        2        2        2        2
## 6        2        2        1        2        2        2        2        2
##   PPAC_45K PPAC_45L PPAC_45M PPAC_45N PPAC_45O PPROV PRSPGNDR PRSPIMST PSCR_05
## 1        2        2        2        2        2    12        2        9       6
## 2        2        2        2        2        2    59        2        1       2
## 3        2        2        2        2        2    48        2        1       6
## 4        2        2        1        2        2    48        1        1       6
## 5        2        1        2        2        2    35        1        1       6
## 6        2        2        2        2        2    24        1        1       2
##   PSCR_10 PSCR_25 PSCR_35 PSCR_D40 PSTIR_GR PVISMIN PWSA_D15    VERDATE
## 1       6      96       6    999.6        3       9    999.6 30/11/2022
## 2       2       1       2    999.6        1       2    999.6 30/11/2022
## 3       6      96       6    999.6        3       2    999.6 30/11/2022
## 4       6      96       6    999.6        1       1    999.6 30/11/2022
## 5       6      96       6    999.6        1       2    999.6 30/11/2022
## 6       2       3       6    999.6        1       2    999.6 30/11/2022

names(housing_data)

##   [1] "PUMFID"   "EHA_10"   "EHA_10A"  "EHA_10B"  "EHA_25"   "DWS_05A" 
##   [7] "DWI_05A"  "DWI_05B"  "DWI_05C"  "DWI_05D"  "NES_05A"  "NSC_30A" 
##  [13] "NSC_30B"  "NSC_30C"  "NEI_05A"  "NEI_05B"  "NEI_05C"  "NEI_05D" 
##  [19] "NEI_05E"  "NEI_05F"  "NEI_05G"  "NEI_05H"  "NEI_05I"  "WSA_05"  
##  [25] "SDH_05"   "CER_05"   "CER_20"   "LIS_10"   "COS_10"   "COS_15"  
##  [31] "GH_05"    "GH_10"    "REGION"   "PAGEGR1"  "PAGEGR2"  "PAGEGR3" 
##  [37] "PAGEGR4"  "PAGEP1"   "PCER_10"  "PCER_15"  "PCHN"     "PCOS_05" 
##  [43] "PDCLASS"  "PDCT_05"  "PDCT_20"  "PDCT_25"  "PDTYPER"  "PDV_SAH" 
##  [49] "PDV_SHCO" "PDV_SUIT" "PDWLTYPE" "PDWS_05"  "PDWS_10A" "PDWS_10B"
##  [55] "PDWS_10C" "PDWS_10D" "PDWS_10E" "PDWS_10F" "PDWS_10G" "PDWS_10H"
##  [61] "PDWS_10I" "PDWS_10J" "PEHA_05A" "PEHA_05B" "PEHA_05C" "PEMPL"   
##  [67] "PFTHB5YR" "PFWEIGHT" "PGEOGR"   "PHGEDUC"  "PHHSIZE"  "PHHTTINC"
##  [73] "PHTYPE"   "PLIS_05"  "PNES_05"  "PNSC_15"  "POWN_20"  "POWN_80" 
##  [79] "PPAC_05"  "PPAC_10"  "PPAC_23"  "PPAC_30"  "PPAC_35"  "PPAC_45A"
##  [85] "PPAC_45C" "PPAC_45D" "PPAC_45E" "PPAC_45F" "PPAC_45G" "PPAC_45H"
##  [91] "PPAC_45I" "PPAC_45J" "PPAC_45K" "PPAC_45L" "PPAC_45M" "PPAC_45N"
##  [97] "PPAC_45O" "PPROV"    "PRSPGNDR" "PRSPIMST" "PSCR_05"  "PSCR_10" 
## [103] "PSCR_25"  "PSCR_35"  "PSCR_D40" "PSTIR_GR" "PVISMIN"  "PWSA_D15"
## [109] "VERDATE"

Table GH_05 (General Health - Focal Explanatory Variable )

# Check distribution of GH_05
table(housing_data$GH_05)

## 
##     1     2     3     4     5     9 
##  5428 13124 13955  6208  2068   205

# Load necessary libraries
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(gt)

# Recode gh_05
housing_data <- housing_data %>%
  mutate(GH_05_recode = case_when(
    GH_05 == 1 ~ "Excellent",
    GH_05 == 2 ~ "Very Good",
    GH_05 == 3 ~ "Good",
    GH_05 == 4 ~ "Fair",
    GH_05 == 5 ~ "Poor",
    GH_05 == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
freq_table <- housing_data %>%
  count(GH_05, GH_05_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_05) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  freq_table <- left_join(freq_table, weighted_freq, by = "GH_05")
} else {
  freq_table <- freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
freq_table %>%
  gt() %>%
  tab_header(
    title = "General Health (GH_05) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_05 = "Code",
    GH_05_recode = "General Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	General Health Category	Count	% of Total	Weighted Frequency
General Health (GH_05) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	Excellent	5,428.00	13.24	NA
2	Very Good	13,124.00	32.02	NA
3	Good	13,955.00	34.05	NA
4	Fair	6,208.00	15.15	NA
5	Poor	2,068.00	5.05	NA
9	Not Stated	205.00	0.50	NA

# Load necessary libraries
library(dplyr)
library(readr)
library(gt)

# Recode GH_10
housing_data <- housing_data %>%
  mutate(GH_10_recode = case_when(
    GH_10 == 1 ~ "Excellent",
    GH_10 == 2 ~ "Very Good",
    GH_10 == 3 ~ "Good",
    GH_10 == 4 ~ "Fair",
    GH_10 == 5 ~ "Poor",
    GH_10 == 9 ~ "Not Stated", 
    TRUE ~ "Unknown"
  ))

# Create frequency table
gh10_freq_table <- housing_data %>%
  count(GH_10, GH_10_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(GH_10) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  gh10_freq_table <- left_join(gh10_freq_table, weighted_freq, by = "GH_10")
} else {
  gh10_freq_table <- gh10_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
gh10_freq_table %>%
  gt() %>%
  tab_header(
    title = "Mental Health (GH_10) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    GH_10 = "Code",
    GH_10_recode = "Mental Health Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	Mental Health Category	Count	% of Total	Weighted Frequency
Mental Health (GH_10) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	Excellent	6,728.00	16.41	NA
2	Very Good	12,655.00	30.87	NA
3	Good	13,519.00	32.98	NA
4	Fair	6,027.00	14.70	NA
5	Poor	1,779.00	4.34	NA
9	Not Stated	280.00	0.68	NA

# Load necessary libraries
library(dplyr)
library(readr)
library(gt)

# Recode NES_05A
housing_data <- housing_data %>%
  mutate(NES_05A_recode = case_when(
    NES_05A == 1 ~ "Increased",
    NES_05A == 2 ~ "Decreased",
    NES_05A == 3 ~ "About the Same",
    NES_05A == 9 ~ "Not Stated", 
    TRUE ~ "Unknown"
  ))

# Create frequency table
nes05a_freq_table <- housing_data %>%
  count(NES_05A, NES_05A_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if "weight" column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(NES_05A) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  nes05a_freq_table <- left_join(nes05a_freq_table, weighted_freq, by = "NES_05A")
} else {
  nes05a_freq_table <- nes05a_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
nes05a_freq_table %>%
  gt() %>%
  tab_header(
    title = "Neighborhood Satisfaction (NES_05A) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    NES_05A = "Code",
    NES_05A_recode = "Satisfaction Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	Satisfaction Category	Count	% of Total	Weighted Frequency
Neighborhood Satisfaction (NES_05A) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	Increased	2,412.00	5.88	NA
2	Decreased	2,489.00	6.07	NA
3	About the Same	36,034.00	87.91	NA
9	Not Stated	53.00	0.13	NA

# Load necessary libraries
library(dplyr)
library(readr)
library(gt)


# Recode NEI_05A
housing_data <- housing_data %>%
  mutate(NEI_05A_recode = case_when(
    NEI_05A == 1 ~ "A big problem",
    NEI_05A == 2 ~ "A moderate problem",
    NEI_05A == 3 ~ "A small problem",
    NEI_05A == 4 ~ "Not a problem",
    NEI_05A == 9 ~ "Not Stated",
    TRUE ~ "Unknown"
  ))

# Create frequency table
nei05a_freq_table <- housing_data %>%
  count(NEI_05A, NEI_05A_recode, name = "Frequency") %>%
  mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))

# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
  weighted_freq <- housing_data %>%
    group_by(NEI_05A) %>%
    summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
  
  nei05a_freq_table <- left_join(nei05a_freq_table, weighted_freq, by = "NEI_05A")
} else {
  nei05a_freq_table <- nei05a_freq_table %>%
    mutate(Weighted_Frequency = NA)  # If no weight, add NA
}

# Generate a well-formatted table with gt
nei05a_freq_table %>%
  gt() %>%
  tab_header(
    title = "Neighbourhood Issues (NEI_05A) Summary",
    subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
  ) %>%
  cols_label(
    NEI_05A = "Code",
    NEI_05A_recode = "Neighbourhood Problem Category",
    Frequency = "Count",
    Percentage = "% of Total",
    Weighted_Frequency = "Weighted Frequency"
  ) %>%
  fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.width = pct(80)
  )

Code	Neighbourhood Problem Category	Count	% of Total	Weighted Frequency
Neighbourhood Issues (NEI_05A) Summary
Frequency, Weighted Frequency, and Percentage Distribution
1	A big problem	1,270.00	3.10	NA
2	A moderate problem	3,192.00	7.79	NA
3	A small problem	6,862.00	16.74	NA
4	Not a problem	29,607.00	72.23	NA
9	Not Stated	57.00	0.14	NA

library(dplyr)
library(ggplot2)
library(gt)

# Create a table summarizing the relationship
relationship_table <- housing_data %>%
  count(GH_05_recode, GH_10_recode, name = "Count") %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 2))

# Display the table
relationship_table %>%
  gt() %>%
  tab_header(
    title = "Relationship Between General Health and Mental Health",
    subtitle = "Frequency and Percentage Distribution"
  ) %>%
  cols_label(
    GH_05_recode = "General Health",
    GH_10_recode = "Mental Health",
    Count = "Number of People",
    Percentage = "% of Total"
  )

General Health	Mental Health	Number of People	% of Total
Relationship Between General Health and Mental Health
Frequency and Percentage Distribution
Excellent	Excellent	3131	7.64
Excellent	Fair	165	0.40
Excellent	Good	627	1.53
Excellent	Not Stated	19	0.05
Excellent	Poor	29	0.07
Excellent	Very Good	1457	3.55
Fair	Excellent	400	0.98
Fair	Fair	2217	5.41
Fair	Good	2010	4.90
Fair	Not Stated	25	0.06
Fair	Poor	625	1.52
Fair	Very Good	931	2.27
Good	Excellent	1043	2.54
Good	Fair	2137	5.21
Good	Good	7372	17.99
Good	Not Stated	48	0.12
Good	Poor	410	1.00
Good	Very Good	2945	7.19
Not Stated	Excellent	11	0.03
Not Stated	Fair	6	0.01
Not Stated	Good	25	0.06
Not Stated	Not Stated	140	0.34
Not Stated	Poor	4	0.01
Not Stated	Very Good	19	0.05
Poor	Excellent	118	0.29
Poor	Fair	645	1.57
Poor	Good	485	1.18
Poor	Not Stated	12	0.03
Poor	Poor	569	1.39
Poor	Very Good	239	0.58
Very Good	Excellent	2025	4.94
Very Good	Fair	857	2.09
Very Good	Good	3000	7.32
Very Good	Not Stated	36	0.09
Very Good	Poor	142	0.35
Very Good	Very Good	7064	17.23

library(ggplot2)

housing_data <- housing_data %>%
  mutate(GH_05_recode = factor(GH_05_recode, 
                               levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))

housing_data <- housing_data %>%
  mutate(GH_10_recode = factor(GH_10_recode, 
                               levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))

# Create the graph
ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by General Health",
    x = "General Health",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  theme_minimal()

# Create frequency table for NEI_05A (Noisy Neighbors) vs GH_10 (Mental Health)
noisy_neighbors_table <- housing_data %>%
  count(NEI_05A, GH_10_recode, name = "Count") %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 2))

# Display the table
noisy_neighbors_table %>%
  gt() %>%
  tab_header(
    title = "Mental Health by Noisy Neighbors",
    subtitle = "Exploring the Impact of Noise Disturbances on Mental Health"
  ) %>%
  cols_label(
    NEI_05A = "Noise Disturbance Level",
    GH_10_recode = "Mental Health Category",
    Count = "Number of Respondents",
    Percentage = "% of Total"
  ) %>%
  fmt_number(columns = c(Count, Percentage), decimals = 2) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(everything())
  )

Noise Disturbance Level	Mental Health Category	Number of Respondents	% of Total
Mental Health by Noisy Neighbors
Exploring the Impact of Noise Disturbances on Mental Health
1	Excellent	182.00	0.44
1	Very Good	247.00	0.60
1	Good	379.00	0.92
1	Fair	272.00	0.66
1	Poor	183.00	0.45
1	Not Stated	7.00	0.02
2	Excellent	355.00	0.87
2	Very Good	774.00	1.89
2	Good	1,094.00	2.67
2	Fair	704.00	1.72
2	Poor	246.00	0.60
2	Not Stated	19.00	0.05
3	Excellent	792.00	1.93
3	Very Good	1,915.00	4.67
3	Good	2,413.00	5.89
3	Fair	1,310.00	3.20
3	Poor	391.00	0.95
3	Not Stated	41.00	0.10
4	Excellent	5,388.00	13.15
4	Very Good	9,703.00	23.67
4	Good	9,619.00	23.47
4	Fair	3,734.00	9.11
4	Poor	958.00	2.34
4	Not Stated	205.00	0.50
9	Excellent	11.00	0.03
9	Very Good	16.00	0.04
9	Good	14.00	0.03
9	Fair	7.00	0.02
9	Poor	1.00	0.00
9	Not Stated	8.00	0.02

library(ggplot2)

# Visualization of Mental Health by Noisy Neighbors
ggplot(housing_data, aes(x = NEI_05A, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by Noisy Neighbors",
    x = "Noise Disturbance Level",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) + scale_x_discrete(limits = c("A big problem", "A moderate problem", "A small problem", "Not a problem", "Not stated")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1), 
    plot.margin = margin(20, 20, 20, 20)  
  )

library(ggplot2)

# Visualization of Mental Health by Neighborhood Satisfaction
ggplot(housing_data, aes(x = NES_05A, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  labs(
    title = "Mental Health by Neighborhood Satisfaction",
    x = "Neighborhood Satisfaction",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  scale_x_discrete(limits = c("Increased", "About the Same", "Decreased", "Not stated")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
  )

library(ggplot2)

ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
  geom_bar(position = "fill") +
  facet_wrap(~ NES_05A) +
  labs(
    title = "Mental Health by General Health and Neighborhood Satisfaction",
    x = "General Health",
    y = "Proportion of Mental Health",
    fill = "Mental Health"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
  )

Initial Analysis

Kaleb Pearson

2025-03-11

Table GH_05 (General Health - Focal Explanatory Variable )