Neighbourhood Issues and Safety Survey

Initial Setup

# Load required packages
packages <- c("tidyverse", "fst", "gt")

# Install packages if needed
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gt"        "fst"       "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"

Load and Examine Data

chs_data <- read.csv("C:/Users/kathy/Documents/SOC_3320/CHS2021ECL_PUMF.csv")

# Read the CHS data csv directly (109 variables)
chs_data <- read.csv("C:/Users/kathy/Documents/SOC_3320/CHS2021ECL_PUMF.csv")

# Read the 1001 variables version -- compressed
chs_data_full <- read.csv("C:/Users/kathy/Documents/SOC_3320/CHS2021ECL_PUMF_BSW.csv")

# Basic variable/row summary of the data
dim(chs_data)

## [1] 40988   109

Explore Key Variables

Example of exploration of housing related variables (check the data dictionary PDF!)

# Neighborhood Issues
nei.issues_vars <- c("NEI_05A", "NEI_05B", "NEI_05C", "NEI_05D", "NEI_05E", " NEI_05F", " NEI_05G")

# Feeling of Safety in Neighborhood
safety_vars <- c("PNSC_15", "PDWS_10G")

# Level of community Satisfaction
satisfaction_vars <- c("COS_10")

Next Steps

Explore, with your data dictionary handy, your variables of interest. Look closely and what the values mean before summarizing them and doing some exploration.

library(gt)
library(dplyr)

# Select relevant variables
selected_vars <- c("NEI_05A", "NEI_05B", "NEI_05C", "NEI_05D", "NEI_05E", "NEI_05F", "NEI_05G", 
                   "PNSC_15", "PDWS_10G")
# Rename Variables
variable_labels <- c(
  "NEI_05A" = "Noise Pollution",
  "NEI_05B" = "Loitering",
  "NEI_05C" = "Littering",
  "NEI_05D" = "Vandalism",
  "NEI_05E" = "Discrimination",
  "NEI_05F" = "Drugs",
  "NEI_05G" = "Public Intoxication",
  "PNSC_15" = "Feeling of safety outside",
  "PDWS_10G" = "Feeling of safety within the home"
)

# Summary statistics for each variable
summary_list <- lapply(selected_vars, function(var) {
  data.frame(
    Variable = var,
    Statistic = c("Mean", "SD", "Min", "Max", "N"),
    Value = c(
      mean(chs_data[[var]], na.rm = TRUE),
      sd(chs_data[[var]], na.rm = TRUE),
      min(chs_data[[var]], na.rm = TRUE),
      max(chs_data[[var]], na.rm = TRUE),
      sum(!is.na(chs_data[[var]]))
    )
  )
})

# Combine all variable summaries into one dataframe
chs_summary <- bind_rows(summary_list)
chs_summary <- chs_summary %>%
 mutate(Variable = recode(Variable, !!!variable_labels))  # Rename variables

# Create gt table
chs_summary %>%
  gt() %>%
  tab_header(
    title = "Descriptive Summary of Neighborhood Issues & Safety Variables",
    subtitle = "Summary statistics based on CHS 2021 data"
  ) %>%
  fmt_number(columns = "Value", decimals = 2) %>%
  cols_label(
    Variable = "Variable",
    Statistic = "Statistic",
    Value = "Value"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  )

Variable	Statistic	Value
Descriptive Summary of Neighborhood Issues & Safety Variables
Summary statistics based on CHS 2021 data
Noise Pollution	Mean	3.59
Noise Pollution	SD	0.79
Noise Pollution	Min	1.00
Noise Pollution	Max	9.00
Noise Pollution	N	40,988.00
Loitering	Mean	3.66
Loitering	SD	0.79
Loitering	Min	1.00
Loitering	Max	9.00
Loitering	N	40,988.00
Littering	Mean	3.52
Littering	SD	0.89
Littering	Min	1.00
Littering	Max	9.00
Littering	N	40,988.00
Vandalism	Mean	3.63
Vandalism	SD	0.82
Vandalism	Min	1.00
Vandalism	Max	9.00
Vandalism	N	40,988.00
Discrimination	Mean	3.83
Discrimination	SD	0.68
Discrimination	Min	1.00
Discrimination	Max	9.00
Discrimination	N	40,988.00
Drugs	Mean	3.50
Drugs	SD	0.99
Drugs	Min	1.00
Drugs	Max	9.00
Drugs	N	40,988.00
Public Intoxication	Mean	3.66
Public Intoxication	SD	0.83
Public Intoxication	Min	1.00
Public Intoxication	Max	9.00
Public Intoxication	N	40,988.00
Feeling of safety outside	Mean	2.12
Feeling of safety outside	SD	1.02
Feeling of safety outside	Min	1.00
Feeling of safety outside	Max	9.00
Feeling of safety outside	N	40,988.00
Feeling of safety within the home	Mean	1.81
Feeling of safety within the home	SD	0.86
Feeling of safety within the home	Min	1.00
Feeling of safety within the home	Max	9.00
Feeling of safety within the home	N	40,988.00

Severity of Neighborhood Issues

library(ggplot2)
library(dplyr)

# Rename variables
chs_data <- chs_data %>%
  rename(
    Noise_Pollution = NEI_05A,
    Loitering = NEI_05B,
    Littering = NEI_05C,
    Vandalism = NEI_05D,
    Discrimination = NEI_05E,
    Drugs = NEI_05F,
    Public_Intoxication = NEI_05G
  )

# Create a long-format data frame
long_data <- chs_data %>%
  select(Noise_Pollution, Loitering, Littering, Vandalism, Discrimination, Drugs, Public_Intoxication) %>%
  gather(key = "Issue", value = "Severity")

# Convert severity levels to categorical labels
long_data$Severity <- factor(long_data$Severity, 
                             levels = c(1, 2, 3, 4, 9), 
                             labels = c("Big", "Moderate", "Small", "Not a Problem", "Not Stated"))

# Create bar plot
ggplot(long_data, aes(x = Severity, fill = Issue)) +
  geom_bar(stat = "count", position = "dodge", alpha = 0.8) +  # Count occurrences of each severity level
  labs(
    title = "Neighborhood Issues by Severity of Problem",
    x = "Severity of Issue",
    y = "Frequency Count",
    caption = "Data Source: CHS 2021"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 12),
    legend.title = element_blank()
  ) +
  scale_fill_brewer(palette = "Set3")

Feeling of Safety Outside

library(ggplot2)
library(dplyr)

# Recode PNSC_15 using CHS code categories
chs_data <- chs_data %>%
  mutate(
    Safety_Outside_Category = case_when(
      PNSC_15 == 1 ~ "Very safe",
      PNSC_15 == 2 ~ "Reasonably safe",
      PNSC_15 == 3 ~ "Unsafe",
      PNSC_15 == 4 ~ "Do not walk alone",
      PNSC_15 == 9 ~ "Not stated",
      TRUE ~ "Other" # In case of unexpected values
    )
  )
ggplot(chs_data, aes(x = Safety_Outside_Category, fill = Safety_Outside_Category)) +
  geom_bar() +
  labs(
    title = "Feeling of Safety When Walking Outside",
    x = "Level of Safety",
    y = "Frequency Count",
    fill = "Safety Level",
    caption = "Data Source: CHS 2021"
  ) +
  scale_fill_manual(values = c(
    "Very safe" = "#1b9e77", 
    "Reasonably safe" = "#d95f02", 
    "Unsafe" = "#e7298a",
    "Do not walk alone" = "#7570b3", 
    "Not stated" = "#999999"
  )) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.x = element_text(size = 12),
    axis.title = element_text(size = 12),
    legend.position = "none"
  )

Feeling of Safety Inside the Home

# Create Visualization for feeling of safety within the home
chs_data <- chs_data %>%
  mutate(Safety_Inside_Category = case_when(
      PDWS_10G == 1 ~ "Very satisfied",
      PDWS_10G == 2 ~ "Satisfied",
      PDWS_10G == 3 ~ "Neither",
      PDWS_10G == 4 ~ "Dissatisfied",
      PDWS_10G == 9 ~ "Not stated",
      TRUE ~ "Other")
      )
ggplot(chs_data, aes(x = Safety_Inside_Category, fill = Safety_Inside_Category)) +
  geom_bar() +
  labs(
    title = "Feeling of Safety Inside the Home",
    x = "Satisfaction Level",
    y = "Frequency Count",
    fill = "Satisfaction Level",
    caption = "Data Source: CHS 2021"
  ) +
  scale_fill_manual(values = c(
    "Very satisfied" = "#1b9e77", 
    "Satisfied" = "#d95f02", 
    "Neither" = "#e7298a",
    "Dissatisfied" = "#7570b3", 
    "Not stated" = "#999999"
  )) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.x = element_text(size = 12),
    axis.title = element_text(size = 12),
    legend.position = "none"
  )

Compare Neighborhood Issues and Feeling of Safety

library(ggplot2)
library(dplyr)
library(tidyr)

# Prepare Safety Outside data
safety_outside_data <- chs_data %>%
  mutate(Category = "Safety Outside") %>%
  rename(Response = Safety_Outside_Category) %>%
  select(Category, Response)

# Prepare Safety Inside data
safety_inside_data <- chs_data %>%
  mutate(Category = "Safety Inside") %>%
  rename(Response = Safety_Inside_Category) %>%
  select(Category, Response)

# Prepare Neighborhood Issues data
neighborhood_issues_data <- chs_data %>%
  select(Noise_Pollution, Loitering, Littering, Vandalism, Discrimination, Drugs, Public_Intoxication) %>%
  gather(key = "Issue", value = "Response") %>%
  mutate(Category = "Neighborhood Issues")

# Convert severity levels to categorical labels
neighborhood_issues_data$Response <- factor(
  neighborhood_issues_data$Response, 
  levels = c(1, 2, 3, 4, 9), 
  labels = c("Big Problem", "Moderate Problem", "Small Problem", "Not a Problem", "Not Stated")
)

# Combine all data
combined_data <- bind_rows(safety_outside_data, safety_inside_data, neighborhood_issues_data)

# Create Bar Chart
ggplot(combined_data, aes(x = Response, fill = Category)) +
  geom_bar(position = "dodge") +
  facet_wrap(~Category, scales = "free_x") +  # Facets for each category
  labs(
    title = "Comparison of Safety & Neighborhood Issues",
    x = "Response",
    y = "Frequency Count",
    caption = "Data Source: CHS 2021"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c(
    "Safety Outside" = "#1b9e77",
    "Safety Inside" = "#d95f02",
    "Neighborhood Issues" = "#7570b3"
  )) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.title = element_text(size = 12),
    legend.position = "top"
  )

Looking at Specific Neighborhood Issues & Safety

Now that we have all of our data regarding neighborhood issues and feeling of safety inside and outside of the home, we can compare specific variables. We use an example of a line graph as well as a bar graph.

# Create a line graph
library(ggplot2)
library(dplyr)
library(tidyr)

# Select Data
safety_outside_data <- chs_data %>%
  count(Safety_Outside_Category) %>%
  rename(Response = Safety_Outside_Category, Count = n) %>%
  mutate(Category = "Safety Outside")

drugs_data <- chs_data %>%
  count(Drugs) %>%
  rename(Response = Drugs, Count = n) %>%
  mutate(Category = "Drugs")

# Convert response levels to categories
# Define the same factor levels for both data sets
common_levels <- c("Very safe", "Reasonably safe", "Unsafe", "Do not walk alone", 
                   "Big Problem", "Moderate Problem", "Small Problem", "Not a Problem")

safety_outside_data$Response <- factor(safety_outside_data$Response, 
                                       levels = c("Very safe", "Reasonably safe", "Unsafe", "Do not walk alone"),
                                       ordered = TRUE)

drugs_data$Response <- factor(drugs_data$Response, 
                              levels = c(1, 2, 3, 4), 
                              labels = c("Big Problem", "Moderate Problem", "Small Problem", "Not a Problem"),
                              ordered = TRUE)

# Make sure that both datasets have the same levels
safety_outside_data$Response <- factor(safety_outside_data$Response, levels = common_levels, ordered = TRUE)
drugs_data$Response <- factor(drugs_data$Response, levels = common_levels, ordered = TRUE)

# Combine both datasets
comparison_data <- bind_rows(safety_outside_data, drugs_data)

# Create the Line Graph
ggplot(comparison_data, aes(x = Response, y = Count, group = Category, color = Category)) +
  geom_line(size = 1) +  # Line graph
  geom_point(size = 3) +  # Add points for clarity
  labs(
    title = "Comparison: Feeling of Safety Outside vs. Drugs in the Neighborhood",
    x = "Response Category",
    y = "Frquency Count",
    color = "Category",
    caption = "Data Source: CHS 2021"
  ) +
  scale_color_manual(values = c("Safety Outside" = "#1b9e77", "Drugs" = "#d95f02")) +  
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.title = element_text(size = 12),
    legend.position = "top"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(ggplot2)
library(dplyr)
library(tidyr)

# Select and Clean Data
safety_outside_data <- chs_data %>%
  count(Safety_Outside_Category) %>%
  rename(Response = Safety_Outside_Category, Count = n) %>%
  mutate(Category = "Safety Outside")

drugs_data <- chs_data %>%
  count(Drugs) %>%
  rename(Response = Drugs, Count = n) %>%
  mutate(Category = "Drugs")

# Convert response levels to categories
common_levels <- c("Very safe", "Reasonably safe", "Unsafe", "Do not walk alone", 
                   "Big Problem", "Moderate Problem", "Small Problem", "Not a Problem")

safety_outside_data$Response <- factor(safety_outside_data$Response, 
                                       levels = c("Very safe", "Reasonably safe", "Unsafe", "Do not walk alone"),
                                       ordered = TRUE)

drugs_data$Response <- factor(drugs_data$Response, 
                              levels = c(1, 2, 3, 4), 
                              labels = c("Big Problem", "Moderate Problem", "Small Problem", "Not a Problem"),
                              ordered = TRUE)

# Make sure that both datasets have the same levels
safety_outside_data$Response <- factor(safety_outside_data$Response, levels = common_levels, ordered = TRUE)
drugs_data$Response <- factor(drugs_data$Response, levels = common_levels, ordered = TRUE)

# Combine both datasets
comparison_data <- bind_rows(safety_outside_data, drugs_data)

# Create Bar Graph
ggplot(comparison_data, aes(x = Response, y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = "dodge") +  # Bar graph with dodge position
  labs(
    title = "Comparison: Feeling of Safety Outside vs. Drugs in the Neighborhood",
    x = "Response Category",
    y = "Frequency Count",
    fill = "Category",
    caption = "Data Source: CHS 2021"
  ) +
  scale_fill_manual(values = c("Safety Outside" = "#1b9e77", "Drugs" = "#d95f02")) +  
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.title = element_text(size = 12),
    legend.position = "top"
  )

Next Steps for Analysis

For our analysis section of the research project, we will refine our variables that we have available to us through the CHS 2021 Dataset. We will choose what specific aspects of neighborhood issues and safety within neighborhoods that we want to focus on and observe. We want to observe the patterns that are presented and explore the relationships between these two variables (safety and neighborhood issues).