housing_data <- read.csv("/Users/pearcy/CHS2021ECL_PUMF.csv")
str(housing_data)
## 'data.frame': 40988 obs. of 109 variables:
## $ PUMFID : int 63501 63502 63503 63504 63505 63506 63507 63508 63509 63510 ...
## $ EHA_10 : int 3 2 2 5 3 3 1 3 4 4 ...
## $ EHA_10A : int 6 6 6 6 6 6 1 6 6 6 ...
## $ EHA_10B : int 6 2 2 6 6 6 6 6 6 6 ...
## $ EHA_25 : int 2 2 2 2 2 2 1 2 2 2 ...
## $ DWS_05A : int 3 3 2 3 1 3 3 3 3 3 ...
## $ DWI_05A : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05B : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05C : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DWI_05D : int 2 1 2 2 2 2 2 2 2 2 ...
## $ NES_05A : int 3 2 2 3 3 3 1 3 3 3 ...
## $ NSC_30A : int 1 1 2 3 1 2 1 2 2 1 ...
## $ NSC_30B : int 1 1 2 2 1 2 1 1 1 1 ...
## $ NSC_30C : int 1 3 3 3 3 3 1 3 2 2 ...
## $ NEI_05A : int 4 4 3 4 4 3 4 4 4 4 ...
## $ NEI_05B : int 4 3 3 4 4 4 2 4 4 4 ...
## $ NEI_05C : int 4 3 3 4 4 3 4 4 4 4 ...
## $ NEI_05D : int 4 3 3 4 4 3 4 4 4 4 ...
## $ NEI_05E : int 4 2 3 4 4 4 4 4 4 4 ...
## $ NEI_05F : int 4 1 2 4 4 4 4 4 4 4 ...
## $ NEI_05G : int 4 1 2 4 4 4 4 4 4 4 ...
## $ NEI_05H : int 4 2 2 3 4 4 4 4 4 4 ...
## $ NEI_05I : int 4 3 4 4 4 4 4 4 4 4 ...
## $ WSA_05 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ SDH_05 : int 2 1 2 2 2 2 2 2 2 2 ...
## $ CER_05 : int 2 1 2 2 2 2 2 2 2 2 ...
## $ CER_20 : int 3 1 3 3 3 3 3 3 3 3 ...
## $ LIS_10 : int 2 1 2 1 2 2 2 3 1 3 ...
## $ COS_10 : int 3 3 2 1 2 3 3 9 3 3 ...
## $ COS_15 : int 3 2 3 3 4 3 2 2 1 1 ...
## $ GH_05 : int 4 4 4 1 1 4 5 3 2 2 ...
## $ GH_10 : int 3 4 4 3 1 2 3 3 2 2 ...
## $ REGION : int 1 5 4 4 3 2 2 3 4 1 ...
## $ PAGEGR1 : int 2 9 2 1 2 2 9 9 1 2 ...
## $ PAGEGR2 : int 1 9 2 2 2 1 9 9 2 2 ...
## $ PAGEGR3 : int 1 9 2 1 1 2 9 9 1 2 ...
## $ PAGEGR4 : int 2 9 1 2 2 2 9 9 2 1 ...
## $ PAGEP1 : int 3 3 4 2 2 1 2 1 2 4 ...
## $ PCER_10 : int 96 2 96 96 96 96 96 96 96 96 ...
## $ PCER_15 : int 6 3 6 6 6 6 6 6 6 6 ...
## $ PCHN : int 2 2 2 2 2 2 1 2 2 2 ...
## $ PCOS_05 : int 9 5 1 6 4 99 5 99 9 9 ...
## $ PDCLASS : int 1 1 0 1 0 1 2 0 0 0 ...
## $ PDCT_05 : int 1 2 1 1 1 2 2 2 1 1 ...
## $ PDCT_20 : int 4 2 3 3 2 1 3 2 4 3 ...
## $ PDCT_25 : int 1 1 2 1 1 1 1 1 1 1 ...
## $ PDTYPER : int 3 9 0 1 0 1 9 0 0 0 ...
## $ PDV_SAH : int 6 2 6 6 6 2 2 2 6 6 ...
## $ PDV_SHCO: num 4700 1400 3400 3500 2100 ...
## $ PDV_SUIT: int 1 1 1 1 1 1 1 1 1 1 ...
## $ PDWLTYPE: int 1 6 1 2 1 6 4 99 1 1 ...
## $ PDWS_05 : int 3 2 1 2 3 2 3 3 3 3 ...
## $ PDWS_10A: int 1 4 2 2 4 3 2 2 1 1 ...
## $ PDWS_10B: int 1 3 2 2 4 4 2 2 1 1 ...
## $ PDWS_10C: int 1 2 4 3 4 2 4 2 1 1 ...
## $ PDWS_10D: int 1 2 2 2 1 2 2 2 2 1 ...
## $ PDWS_10E: int 1 2 2 2 1 3 2 2 1 1 ...
## $ PDWS_10F: int 1 4 3 3 3 3 4 2 2 2 ...
## $ PDWS_10G: int 1 2 2 2 1 3 2 2 1 1 ...
## $ PDWS_10H: int 1 2 2 2 2 2 2 2 2 1 ...
## $ PDWS_10I: int 1 2 2 2 2 3 1 2 1 1 ...
## $ PDWS_10J: int 1 2 2 2 4 3 1 2 1 1 ...
## $ PEHA_05A: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PEHA_05B: int 2 1 1 2 2 2 1 2 2 2 ...
## $ PEHA_05C: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PEMPL : int 1 1 2 1 1 1 9 9 1 2 ...
## $ PFTHB5YR: int 2 6 2 2 2 6 6 6 2 2 ...
## $ PFWEIGHT: num 338.6 44.6 1706.9 151 1683.9 ...
## $ PGEOGR : int 3 26 22 22 16 10 10 16 18 4 ...
## $ PHGEDUC : int 3 4 6 7 5 2 99 99 1 6 ...
## $ PHHSIZE : int 3 2 1 3 1 2 99 99 5 1 ...
## $ PHHTTINC: num 75000 92500 60000 190000 97500 ...
## $ PHTYPE : int 1 3 5 1 5 2 99 99 1 5 ...
## $ PLIS_05 : int 6 6 1 7 7 7 2 6 9 9 ...
## $ PNES_05 : int 3 1 1 3 2 1 3 2 3 3 ...
## $ PNSC_15 : int 1 4 2 2 4 2 2 1 1 1 ...
## $ POWN_20 : int 1 6 1 1 1 6 6 6 1 1 ...
## $ POWN_80 : int 50000 99999996 520000 350000 100000 99999996 99999996 99999996 200000 90000 ...
## $ PPAC_05 : int 4 4 4 3 3 1 1 1 1 4 ...
## $ PPAC_10 : int 1 1 2 1 2 2 1 1 2 1 ...
## $ PPAC_23 : int 1 1 6 1 6 2 9 9 1 6 ...
## $ PPAC_30 : int 1 2 2 2 2 2 2 2 1 1 ...
## $ PPAC_35 : int 6 2 2 2 2 2 2 2 6 6 ...
## $ PPAC_45A: int 2 2 2 2 2 2 1 2 2 2 ...
## $ PPAC_45C: int 2 2 1 2 2 2 2 2 2 2 ...
## $ PPAC_45D: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45E: int 2 2 2 2 2 1 2 1 2 2 ...
## $ PPAC_45F: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45G: int 2 2 2 1 2 2 2 2 2 1 ...
## $ PPAC_45H: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45I: int 1 1 2 1 2 2 2 2 1 2 ...
## $ PPAC_45J: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45K: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45L: int 2 2 2 2 1 2 2 2 2 2 ...
## $ PPAC_45M: int 2 2 2 1 2 2 2 2 2 2 ...
## $ PPAC_45N: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPAC_45O: int 2 2 2 2 2 2 2 2 2 2 ...
## $ PPROV : int 12 59 48 48 35 24 24 35 46 12 ...
## $ PRSPGNDR: int 2 2 2 1 1 1 1 1 1 2 ...
## [list output truncated]
head(housing_data)
## PUMFID EHA_10 EHA_10A EHA_10B EHA_25 DWS_05A DWI_05A DWI_05B DWI_05C DWI_05D
## 1 63501 3 6 6 2 3 2 2 2 2
## 2 63502 2 6 2 2 3 2 2 2 1
## 3 63503 2 6 2 2 2 2 2 2 2
## 4 63504 5 6 6 2 3 2 2 2 2
## 5 63505 3 6 6 2 1 2 2 2 2
## 6 63506 3 6 6 2 3 2 2 2 2
## NES_05A NSC_30A NSC_30B NSC_30C NEI_05A NEI_05B NEI_05C NEI_05D NEI_05E
## 1 3 1 1 1 4 4 4 4 4
## 2 2 1 1 3 4 3 3 3 2
## 3 2 2 2 3 3 3 3 3 3
## 4 3 3 2 3 4 4 4 4 4
## 5 3 1 1 3 4 4 4 4 4
## 6 3 2 2 3 3 4 3 3 4
## NEI_05F NEI_05G NEI_05H NEI_05I WSA_05 SDH_05 CER_05 CER_20 LIS_10 COS_10
## 1 4 4 4 4 2 2 2 3 2 3
## 2 1 1 2 3 2 1 1 1 1 3
## 3 2 2 2 4 2 2 2 3 2 2
## 4 4 4 3 4 2 2 2 3 1 1
## 5 4 4 4 4 2 2 2 3 2 2
## 6 4 4 4 4 2 2 2 3 2 3
## COS_15 GH_05 GH_10 REGION PAGEGR1 PAGEGR2 PAGEGR3 PAGEGR4 PAGEP1 PCER_10
## 1 3 4 3 1 2 1 1 2 3 96
## 2 2 4 4 5 9 9 9 9 3 2
## 3 3 4 4 4 2 2 2 1 4 96
## 4 3 1 3 4 1 2 1 2 2 96
## 5 4 1 1 3 2 2 1 2 2 96
## 6 3 4 2 2 2 1 2 2 1 96
## PCER_15 PCHN PCOS_05 PDCLASS PDCT_05 PDCT_20 PDCT_25 PDTYPER PDV_SAH PDV_SHCO
## 1 6 2 9 1 1 4 1 3 6 4.7e+03
## 2 3 2 5 1 2 2 1 9 2 1.4e+03
## 3 6 2 1 0 1 3 2 0 6 3.4e+03
## 4 6 2 6 1 1 3 1 1 6 3.5e+03
## 5 6 2 4 0 1 2 1 0 6 2.1e+03
## 6 6 2 99 1 2 1 1 1 2 1.0e+07
## PDV_SUIT PDWLTYPE PDWS_05 PDWS_10A PDWS_10B PDWS_10C PDWS_10D PDWS_10E
## 1 1 1 3 1 1 1 1 1
## 2 1 6 2 4 3 2 2 2
## 3 1 1 1 2 2 4 2 2
## 4 1 2 2 2 2 3 2 2
## 5 1 1 3 4 4 4 1 1
## 6 1 6 2 3 4 2 2 3
## PDWS_10F PDWS_10G PDWS_10H PDWS_10I PDWS_10J PEHA_05A PEHA_05B PEHA_05C PEMPL
## 1 1 1 1 1 1 2 2 2 1
## 2 4 2 2 2 2 2 1 2 1
## 3 3 2 2 2 2 2 1 2 2
## 4 3 2 2 2 2 2 2 2 1
## 5 3 1 2 2 4 2 2 2 1
## 6 3 3 2 3 3 2 2 2 1
## PFTHB5YR PFWEIGHT PGEOGR PHGEDUC PHHSIZE PHHTTINC PHTYPE PLIS_05 PNES_05
## 1 2 338.5873 3 3 3 7.50e+04 1 6 3
## 2 6 44.6467 26 4 2 9.25e+04 3 6 1
## 3 2 1706.9443 22 6 1 6.00e+04 5 1 1
## 4 2 150.9932 22 7 3 1.90e+05 1 7 3
## 5 2 1683.8582 16 5 1 9.75e+04 5 7 2
## 6 6 205.0446 10 2 2 1.00e+11 2 7 1
## PNSC_15 POWN_20 POWN_80 PPAC_05 PPAC_10 PPAC_23 PPAC_30 PPAC_35 PPAC_45A
## 1 1 1 50000 4 1 1 1 6 2
## 2 4 6 99999996 4 1 1 2 2 2
## 3 2 1 520000 4 2 6 2 2 2
## 4 2 1 350000 3 1 1 2 2 2
## 5 4 1 100000 3 2 6 2 2 2
## 6 2 6 99999996 1 2 2 2 2 2
## PPAC_45C PPAC_45D PPAC_45E PPAC_45F PPAC_45G PPAC_45H PPAC_45I PPAC_45J
## 1 2 2 2 2 2 2 1 2
## 2 2 2 2 2 2 2 1 2
## 3 1 2 2 2 2 2 2 2
## 4 2 2 2 2 1 2 1 2
## 5 2 2 2 2 2 2 2 2
## 6 2 2 1 2 2 2 2 2
## PPAC_45K PPAC_45L PPAC_45M PPAC_45N PPAC_45O PPROV PRSPGNDR PRSPIMST PSCR_05
## 1 2 2 2 2 2 12 2 9 6
## 2 2 2 2 2 2 59 2 1 2
## 3 2 2 2 2 2 48 2 1 6
## 4 2 2 1 2 2 48 1 1 6
## 5 2 1 2 2 2 35 1 1 6
## 6 2 2 2 2 2 24 1 1 2
## PSCR_10 PSCR_25 PSCR_35 PSCR_D40 PSTIR_GR PVISMIN PWSA_D15 VERDATE
## 1 6 96 6 999.6 3 9 999.6 30/11/2022
## 2 2 1 2 999.6 1 2 999.6 30/11/2022
## 3 6 96 6 999.6 3 2 999.6 30/11/2022
## 4 6 96 6 999.6 1 1 999.6 30/11/2022
## 5 6 96 6 999.6 1 2 999.6 30/11/2022
## 6 2 3 6 999.6 1 2 999.6 30/11/2022
names(housing_data)
## [1] "PUMFID" "EHA_10" "EHA_10A" "EHA_10B" "EHA_25" "DWS_05A"
## [7] "DWI_05A" "DWI_05B" "DWI_05C" "DWI_05D" "NES_05A" "NSC_30A"
## [13] "NSC_30B" "NSC_30C" "NEI_05A" "NEI_05B" "NEI_05C" "NEI_05D"
## [19] "NEI_05E" "NEI_05F" "NEI_05G" "NEI_05H" "NEI_05I" "WSA_05"
## [25] "SDH_05" "CER_05" "CER_20" "LIS_10" "COS_10" "COS_15"
## [31] "GH_05" "GH_10" "REGION" "PAGEGR1" "PAGEGR2" "PAGEGR3"
## [37] "PAGEGR4" "PAGEP1" "PCER_10" "PCER_15" "PCHN" "PCOS_05"
## [43] "PDCLASS" "PDCT_05" "PDCT_20" "PDCT_25" "PDTYPER" "PDV_SAH"
## [49] "PDV_SHCO" "PDV_SUIT" "PDWLTYPE" "PDWS_05" "PDWS_10A" "PDWS_10B"
## [55] "PDWS_10C" "PDWS_10D" "PDWS_10E" "PDWS_10F" "PDWS_10G" "PDWS_10H"
## [61] "PDWS_10I" "PDWS_10J" "PEHA_05A" "PEHA_05B" "PEHA_05C" "PEMPL"
## [67] "PFTHB5YR" "PFWEIGHT" "PGEOGR" "PHGEDUC" "PHHSIZE" "PHHTTINC"
## [73] "PHTYPE" "PLIS_05" "PNES_05" "PNSC_15" "POWN_20" "POWN_80"
## [79] "PPAC_05" "PPAC_10" "PPAC_23" "PPAC_30" "PPAC_35" "PPAC_45A"
## [85] "PPAC_45C" "PPAC_45D" "PPAC_45E" "PPAC_45F" "PPAC_45G" "PPAC_45H"
## [91] "PPAC_45I" "PPAC_45J" "PPAC_45K" "PPAC_45L" "PPAC_45M" "PPAC_45N"
## [97] "PPAC_45O" "PPROV" "PRSPGNDR" "PRSPIMST" "PSCR_05" "PSCR_10"
## [103] "PSCR_25" "PSCR_35" "PSCR_D40" "PSTIR_GR" "PVISMIN" "PWSA_D15"
## [109] "VERDATE"
# Check distribution of GH_05
table(housing_data$GH_05)
##
## 1 2 3 4 5 9
## 5428 13124 13955 6208 2068 205
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(gt)
# Recode gh_05
housing_data <- housing_data %>%
mutate(GH_05_recode = case_when(
GH_05 == 1 ~ "Excellent",
GH_05 == 2 ~ "Very Good",
GH_05 == 3 ~ "Good",
GH_05 == 4 ~ "Fair",
GH_05 == 5 ~ "Poor",
GH_05 == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
freq_table <- housing_data %>%
count(GH_05, GH_05_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(GH_05) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
freq_table <- left_join(freq_table, weighted_freq, by = "GH_05")
} else {
freq_table <- freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
freq_table %>%
gt() %>%
tab_header(
title = "General Health (GH_05) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
GH_05 = "Code",
GH_05_recode = "General Health Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| General Health (GH_05) Summary | ||||
| Frequency, Weighted Frequency, and Percentage Distribution | ||||
| Code | General Health Category | Count | % of Total | Weighted Frequency |
|---|---|---|---|---|
| 1 | Excellent | 5,428.00 | 13.24 | NA |
| 2 | Very Good | 13,124.00 | 32.02 | NA |
| 3 | Good | 13,955.00 | 34.05 | NA |
| 4 | Fair | 6,208.00 | 15.15 | NA |
| 5 | Poor | 2,068.00 | 5.05 | NA |
| 9 | Not Stated | 205.00 | 0.50 | NA |
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)
# Recode GH_10
housing_data <- housing_data %>%
mutate(GH_10_recode = case_when(
GH_10 == 1 ~ "Excellent",
GH_10 == 2 ~ "Very Good",
GH_10 == 3 ~ "Good",
GH_10 == 4 ~ "Fair",
GH_10 == 5 ~ "Poor",
GH_10 == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
gh10_freq_table <- housing_data %>%
count(GH_10, GH_10_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(GH_10) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
gh10_freq_table <- left_join(gh10_freq_table, weighted_freq, by = "GH_10")
} else {
gh10_freq_table <- gh10_freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
gh10_freq_table %>%
gt() %>%
tab_header(
title = "Mental Health (GH_10) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
GH_10 = "Code",
GH_10_recode = "Mental Health Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| Mental Health (GH_10) Summary | ||||
| Frequency, Weighted Frequency, and Percentage Distribution | ||||
| Code | Mental Health Category | Count | % of Total | Weighted Frequency |
|---|---|---|---|---|
| 1 | Excellent | 6,728.00 | 16.41 | NA |
| 2 | Very Good | 12,655.00 | 30.87 | NA |
| 3 | Good | 13,519.00 | 32.98 | NA |
| 4 | Fair | 6,027.00 | 14.70 | NA |
| 5 | Poor | 1,779.00 | 4.34 | NA |
| 9 | Not Stated | 280.00 | 0.68 | NA |
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)
# Recode NES_05A
housing_data <- housing_data %>%
mutate(NES_05A_recode = case_when(
NES_05A == 1 ~ "Increased",
NES_05A == 2 ~ "Decreased",
NES_05A == 3 ~ "About the Same",
NES_05A == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
nes05a_freq_table <- housing_data %>%
count(NES_05A, NES_05A_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if "weight" column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(NES_05A) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
nes05a_freq_table <- left_join(nes05a_freq_table, weighted_freq, by = "NES_05A")
} else {
nes05a_freq_table <- nes05a_freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
nes05a_freq_table %>%
gt() %>%
tab_header(
title = "Neighborhood Satisfaction (NES_05A) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
NES_05A = "Code",
NES_05A_recode = "Satisfaction Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| Neighborhood Satisfaction (NES_05A) Summary | ||||
| Frequency, Weighted Frequency, and Percentage Distribution | ||||
| Code | Satisfaction Category | Count | % of Total | Weighted Frequency |
|---|---|---|---|---|
| 1 | Increased | 2,412.00 | 5.88 | NA |
| 2 | Decreased | 2,489.00 | 6.07 | NA |
| 3 | About the Same | 36,034.00 | 87.91 | NA |
| 9 | Not Stated | 53.00 | 0.13 | NA |
# Load necessary libraries
library(dplyr)
library(readr)
library(gt)
# Recode NEI_05A
housing_data <- housing_data %>%
mutate(NEI_05A_recode = case_when(
NEI_05A == 1 ~ "A big problem",
NEI_05A == 2 ~ "A moderate problem",
NEI_05A == 3 ~ "A small problem",
NEI_05A == 4 ~ "Not a problem",
NEI_05A == 9 ~ "Not Stated",
TRUE ~ "Unknown"
))
# Create frequency table
nei05a_freq_table <- housing_data %>%
count(NEI_05A, NEI_05A_recode, name = "Frequency") %>%
mutate(Percentage = round(Frequency / sum(Frequency) * 100, 2))
# Add weighted frequency if 'weight' column exists
if ("weight" %in% colnames(housing_data)) {
weighted_freq <- housing_data %>%
group_by(NEI_05A) %>%
summarise(Weighted_Frequency = sum(weight, na.rm = TRUE), .groups = "drop")
nei05a_freq_table <- left_join(nei05a_freq_table, weighted_freq, by = "NEI_05A")
} else {
nei05a_freq_table <- nei05a_freq_table %>%
mutate(Weighted_Frequency = NA) # If no weight, add NA
}
# Generate a well-formatted table with gt
nei05a_freq_table %>%
gt() %>%
tab_header(
title = "Neighbourhood Issues (NEI_05A) Summary",
subtitle = "Frequency, Weighted Frequency, and Percentage Distribution"
) %>%
cols_label(
NEI_05A = "Code",
NEI_05A_recode = "Neighbourhood Problem Category",
Frequency = "Count",
Percentage = "% of Total",
Weighted_Frequency = "Weighted Frequency"
) %>%
fmt_number(columns = c(Frequency, Percentage, Weighted_Frequency), decimals = 2) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.width = pct(80)
)
| Neighbourhood Issues (NEI_05A) Summary | ||||
| Frequency, Weighted Frequency, and Percentage Distribution | ||||
| Code | Neighbourhood Problem Category | Count | % of Total | Weighted Frequency |
|---|---|---|---|---|
| 1 | A big problem | 1,270.00 | 3.10 | NA |
| 2 | A moderate problem | 3,192.00 | 7.79 | NA |
| 3 | A small problem | 6,862.00 | 16.74 | NA |
| 4 | Not a problem | 29,607.00 | 72.23 | NA |
| 9 | Not Stated | 57.00 | 0.14 | NA |
library(dplyr)
library(ggplot2)
library(gt)
# Create a table summarizing the relationship
relationship_table <- housing_data %>%
count(GH_05_recode, GH_10_recode, name = "Count") %>%
mutate(Percentage = round(Count / sum(Count) * 100, 2))
# Display the table
relationship_table %>%
gt() %>%
tab_header(
title = "Relationship Between General Health and Mental Health",
subtitle = "Frequency and Percentage Distribution"
) %>%
cols_label(
GH_05_recode = "General Health",
GH_10_recode = "Mental Health",
Count = "Number of People",
Percentage = "% of Total"
)
| Relationship Between General Health and Mental Health | |||
| Frequency and Percentage Distribution | |||
| General Health | Mental Health | Number of People | % of Total |
|---|---|---|---|
| Excellent | Excellent | 3131 | 7.64 |
| Excellent | Fair | 165 | 0.40 |
| Excellent | Good | 627 | 1.53 |
| Excellent | Not Stated | 19 | 0.05 |
| Excellent | Poor | 29 | 0.07 |
| Excellent | Very Good | 1457 | 3.55 |
| Fair | Excellent | 400 | 0.98 |
| Fair | Fair | 2217 | 5.41 |
| Fair | Good | 2010 | 4.90 |
| Fair | Not Stated | 25 | 0.06 |
| Fair | Poor | 625 | 1.52 |
| Fair | Very Good | 931 | 2.27 |
| Good | Excellent | 1043 | 2.54 |
| Good | Fair | 2137 | 5.21 |
| Good | Good | 7372 | 17.99 |
| Good | Not Stated | 48 | 0.12 |
| Good | Poor | 410 | 1.00 |
| Good | Very Good | 2945 | 7.19 |
| Not Stated | Excellent | 11 | 0.03 |
| Not Stated | Fair | 6 | 0.01 |
| Not Stated | Good | 25 | 0.06 |
| Not Stated | Not Stated | 140 | 0.34 |
| Not Stated | Poor | 4 | 0.01 |
| Not Stated | Very Good | 19 | 0.05 |
| Poor | Excellent | 118 | 0.29 |
| Poor | Fair | 645 | 1.57 |
| Poor | Good | 485 | 1.18 |
| Poor | Not Stated | 12 | 0.03 |
| Poor | Poor | 569 | 1.39 |
| Poor | Very Good | 239 | 0.58 |
| Very Good | Excellent | 2025 | 4.94 |
| Very Good | Fair | 857 | 2.09 |
| Very Good | Good | 3000 | 7.32 |
| Very Good | Not Stated | 36 | 0.09 |
| Very Good | Poor | 142 | 0.35 |
| Very Good | Very Good | 7064 | 17.23 |
library(ggplot2)
housing_data <- housing_data %>%
mutate(GH_05_recode = factor(GH_05_recode,
levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))
housing_data <- housing_data %>%
mutate(GH_10_recode = factor(GH_10_recode,
levels = c("Excellent", "Very Good", "Good", "Fair", "Poor", "Not Stated")))
# Create the graph
ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
geom_bar(position = "fill") +
labs(
title = "Mental Health by General Health",
x = "General Health",
y = "Proportion of Mental Health",
fill = "Mental Health"
) +
theme_minimal()
# Create frequency table for NEI_05A (Noisy Neighbors) vs GH_10 (Mental Health)
noisy_neighbors_table <- housing_data %>%
count(NEI_05A, GH_10_recode, name = "Count") %>%
mutate(Percentage = round(Count / sum(Count) * 100, 2))
# Display the table
noisy_neighbors_table %>%
gt() %>%
tab_header(
title = "Mental Health by Noisy Neighbors",
subtitle = "Exploring the Impact of Noise Disturbances on Mental Health"
) %>%
cols_label(
NEI_05A = "Noise Disturbance Level",
GH_10_recode = "Mental Health Category",
Count = "Number of Respondents",
Percentage = "% of Total"
) %>%
fmt_number(columns = c(Count, Percentage), decimals = 2) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels(everything())
)
| Mental Health by Noisy Neighbors | |||
| Exploring the Impact of Noise Disturbances on Mental Health | |||
| Noise Disturbance Level | Mental Health Category | Number of Respondents | % of Total |
|---|---|---|---|
| 1 | Excellent | 182.00 | 0.44 |
| 1 | Very Good | 247.00 | 0.60 |
| 1 | Good | 379.00 | 0.92 |
| 1 | Fair | 272.00 | 0.66 |
| 1 | Poor | 183.00 | 0.45 |
| 1 | Not Stated | 7.00 | 0.02 |
| 2 | Excellent | 355.00 | 0.87 |
| 2 | Very Good | 774.00 | 1.89 |
| 2 | Good | 1,094.00 | 2.67 |
| 2 | Fair | 704.00 | 1.72 |
| 2 | Poor | 246.00 | 0.60 |
| 2 | Not Stated | 19.00 | 0.05 |
| 3 | Excellent | 792.00 | 1.93 |
| 3 | Very Good | 1,915.00 | 4.67 |
| 3 | Good | 2,413.00 | 5.89 |
| 3 | Fair | 1,310.00 | 3.20 |
| 3 | Poor | 391.00 | 0.95 |
| 3 | Not Stated | 41.00 | 0.10 |
| 4 | Excellent | 5,388.00 | 13.15 |
| 4 | Very Good | 9,703.00 | 23.67 |
| 4 | Good | 9,619.00 | 23.47 |
| 4 | Fair | 3,734.00 | 9.11 |
| 4 | Poor | 958.00 | 2.34 |
| 4 | Not Stated | 205.00 | 0.50 |
| 9 | Excellent | 11.00 | 0.03 |
| 9 | Very Good | 16.00 | 0.04 |
| 9 | Good | 14.00 | 0.03 |
| 9 | Fair | 7.00 | 0.02 |
| 9 | Poor | 1.00 | 0.00 |
| 9 | Not Stated | 8.00 | 0.02 |
library(ggplot2)
# Visualization of Mental Health by Noisy Neighbors
ggplot(housing_data, aes(x = NEI_05A, fill = GH_10_recode)) +
geom_bar(position = "fill") +
labs(
title = "Mental Health by Noisy Neighbors",
x = "Noise Disturbance Level",
y = "Proportion of Mental Health",
fill = "Mental Health"
) + scale_x_discrete(limits = c("A big problem", "A moderate problem", "A small problem", "Not a problem", "Not stated")) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1),
plot.margin = margin(20, 20, 20, 20)
)
library(ggplot2)
# Visualization of Mental Health by Neighborhood Satisfaction
ggplot(housing_data, aes(x = NES_05A, fill = GH_10_recode)) +
geom_bar(position = "fill") +
labs(
title = "Mental Health by Neighborhood Satisfaction",
x = "Neighborhood Satisfaction",
y = "Proportion of Mental Health",
fill = "Mental Health"
) +
scale_x_discrete(limits = c("Increased", "About the Same", "Decreased", "Not stated")) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
)
library(ggplot2)
ggplot(housing_data, aes(x = GH_05_recode, fill = GH_10_recode)) +
geom_bar(position = "fill") +
facet_wrap(~ NES_05A) +
labs(
title = "Mental Health by General Health and Neighborhood Satisfaction",
x = "General Health",
y = "Proportion of Mental Health",
fill = "Mental Health"
) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 10, angle = 30, vjust = 1, hjust = 1)
)