Overview

Data on homelessness in the U.S.

Set up your work enviornment

Open up a new .Rmd file.

Use {r setup, include=F} in your first code chunk.

knitr::opts_chunk$set(echo = TRUE)

# Load necessary libraries
library(knitr)
library(kableExtra)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr) 
library(dplyr)
library(tidyr)

State summary data on homelessness

Scraping some data from online here.

Generating the table:

# Load necessary libraries
library(readr)
library(knitr)
library(kableExtra)

# Define the cleaned CSV formatted data
csv_data <- "state,tot_homeless_2023,pct_change_since_2022,pct_under_18,ptc_veterans,ptc_chronic_homelessness 
Alaska,2614,13%,14%,5%,30%
Alabama,3304,-12%,17%,9%,22%
Arkansas,2609,6%,12%,8%,34%
Arizona,14237,5%,11%,7%,22%
California,181399,6%,9%,6%,39%
Colorado,14439,39%,16%,7%,31%
Connecticut,3015,3%,19%,5%,4%
District_of_Columbia,4922,12%,15%,4%,28%
Delaware,1245,-47%,27%,6%,14%
Florida,30756,18%,16%,8%,19%
Georgia,12294,15%,19%,6%,14%
Hawaii,6223,4%,15%,5%,26%
Iowa,2653,10%,19%,5%,20%
Idaho,2298,15%,20%,8%,18%
Illinois,11947,30%,20%,4%,12%
Indiana,6017,10%,20%,8%,12%
Kansas,2636,10%,17%,8%,18%
Kentucky,4766,20%,12%,9%,20%
Louisiana,3169,-57%,12%,8%,14%
Massachusetts,19141,23%,39%,3%,14%
Maryland,5865,10%,20%,5%,18%
Maine,4258,-3%,29%,3%,9%
Michigan,8997,10%,25%,5 %,13%
Minnesota,8393 ,6 %,28 %,4 %,24 %
Missouri ,6708 ,12 %,18 %,8 %,24 %
Mississippi ,982 ,-18 %,13 %,6 %,13 %
Montana ,2178 ,37 %,14 %,10 %,26 %
North_Carolina ,9754 ,4 %,17 %,8 %,17 %
North_Dakota ,784 ,29 %,17 %,3 %,22 %
Nebraska ,2462 ,10 %,17 %,5 %,25 %
New_Hampshire ,2441 ,52 %,18 %,4 %,22 %
New_Jersey ,10264 ,17 %,24 %,4 %,19 %
New_Mexico ,3842 ,50 %,18 %,7 %,44 %
Nevada ,8666 ,14 %,8 %,13 %,28 %
New_York ,103200 ,39 %,28 %,1 %,6 %
Ohio ,11386 ,7 %,18 %,5 %,11 %
Oklahoma ,-4648 ,-24 ,-13 ,-6 ,-30
Oregon ,-20142 ,-12 ,-13 ,-8 ,-34
Pennsylvania ,-12556 ,-1 ,-21 ,-7 ,-16
Rhode_Island ,-1810 ,-15 ,-21 ,-6 ,-35
South_Carolina ,-4053 ,-12 ,-13 ,-10 ,-21
South_Dakota ,-1282 ,-8 ,-16 ,-5 ,-18
Tennessee ,-9215 ,-13 ,-11 ,-8 ,-22
Texas ,-27377 ,-12 ,-15 ,-7 ,-18
Utah ,-3687 ,-4 ,-16 ,-5 ,-27
Virginia ,-6761 ,-4 ,-23 ,-6 ,-16
Vermont,-3295,-19,-20,-4,-8
Washington,-28036,-11,-16,-6,-31
Wisconsin,-4861,-2,-24,-7,-12
West_Virginia,-1416,-3,-9,-6,-18
Wyoming,532,-18,-7,-17,-14"

# Read the CSV formatted data into a data frame
data <- read.csv(text = csv_data)

# Clean up percentage columns by removing '%' and converting to numeric
percentage_columns <- c("pct_change_since_2022", "pct_under_18", "ptc_veterans", "ptc_chronic_homelessness")

for (col in percentage_columns) {
    data[[col]] <- as.numeric(gsub("%", "", data[[col]]))
}

# Example of safely adding a new column based on existing data:
data$new_column <- NA # Initialize the new column first

# Assign values based on a condition (for demonstration)
data$new_column[which(data$tot_homeless_2023 > 10000)] <- "High"
data$new_column[which(data$tot_homeless_2023 <= 10000)] <- "Low"

# Create a Kable table for the data frame with enhanced formatting for web display
kbl(data,
    format = "html",
    caption = "Homelessness Data by State (2023)",
    escape = FALSE) %>%
    kable_styling(
        bootstrap_options = c("striped", "hover", "condensed", "responsive"),
        full_width = TRUE,
        position = "center"
    ) %>%
    column_spec(1:6,
                width = "auto") %>% # Adjust column widths as needed
    row_spec(0,
             bold = TRUE,
             color = "white",
             background = "#007bff") # Header styling

Homelessness Data by State (2023)
state	tot_homeless_2023	pct_change_since_2022	pct_under_18	ptc_veterans	ptc_chronic_homelessness	new_column
Alaska	2614	13	14	5	30	Low
Alabama	3304	-12	17	9	22	Low
Arkansas	2609	6	12	8	34	Low
Arizona	14237	5	11	7	22	High
California	181399	6	9	6	39	High
Colorado	14439	39	16	7	31	High
Connecticut	3015	3	19	5	4	Low
District_of_Columbia	4922	12	15	4	28	Low
Delaware	1245	-47	27	6	14	Low
Florida	30756	18	16	8	19	High
Georgia	12294	15	19	6	14	High
Hawaii	6223	4	15	5	26	Low
Iowa	2653	10	19	5	20	Low
Idaho	2298	15	20	8	18	Low
Illinois	11947	30	20	4	12	High
Indiana	6017	10	20	8	12	Low
Kansas	2636	10	17	8	18	Low
Kentucky	4766	20	12	9	20	Low
Louisiana	3169	-57	12	8	14	Low
Massachusetts	19141	23	39	3	14	High
Maryland	5865	10	20	5	18	Low
Maine	4258	-3	29	3	9	Low
Michigan	8997	10	25	5	13	Low
Minnesota	8393	6	28	4	24	Low
Missouri	6708	12	18	8	24	Low
Mississippi	982	-18	13	6	13	Low
Montana	2178	37	14	10	26	Low
North_Carolina	9754	4	17	8	17	Low
North_Dakota	784	29	17	3	22	Low
Nebraska	2462	10	17	5	25	Low
New_Hampshire	2441	52	18	4	22	Low
New_Jersey	10264	17	24	4	19	High
New_Mexico	3842	50	18	7	44	Low
Nevada	8666	14	8	13	28	Low
New_York	103200	39	28	1	6	High
Ohio	11386	7	18	5	11	High
Oklahoma	-4648	-24	-13	-6	-30	Low
Oregon	-20142	-12	-13	-8	-34	Low
Pennsylvania	-12556	-1	-21	-7	-16	Low
Rhode_Island	-1810	-15	-21	-6	-35	Low
South_Carolina	-4053	-12	-13	-10	-21	Low
South_Dakota	-1282	-8	-16	-5	-18	Low
Tennessee	-9215	-13	-11	-8	-22	Low
Texas	-27377	-12	-15	-7	-18	Low
Utah	-3687	-4	-16	-5	-27	Low
Virginia	-6761	-4	-23	-6	-16	Low
Vermont	-3295	-19	-20	-4	-8	Low
Washington	-28036	-11	-16	-6	-31	Low
Wisconsin	-4861	-2	-24	-7	-12	Low
West_Virginia	-1416	-3	-9	-6	-18	Low
Wyoming	532	-18	-7	-17	-14	Low

PIT and HIC Data

PIT and HIC data can be found here.

Point-in-Time (PIT)

The Point-in-Time (PIT) Count is an annual survey to estimate the number of individuals experiencing homelessness in the United States on a single night. This count includes both sheltered individuals in emergency shelters and transitional housing, as well as unsheltered individuals living in places not meant for habitation.

# Install the readxlsb package if not already installed
# install.packages("readxlsb")

# Load the readxlsb library
library(readxlsb)

# Read the .xlsb file from the URL
url <- "https://www.huduser.gov/portal/sites/default/files/xls/2007-2023-PIT-Counts-by-State.xlsb"

# Download the ZIP file
download.file(url, destfile = "pit_2017_2013_csv.zip")

# Unzip the file
unzip("pit_2017_2013_csv.zip", exdir = "homeless_data")

# List the contents of the unzipped directory
files <- list.files("homeless_data", full.names = TRUE)
print(files)

## [1] "homeless_data/_rels"               "homeless_data/[Content_Types].xml"
## [3] "homeless_data/customXml"           "homeless_data/docProps"           
## [5] "homeless_data/xl"

# Read a specific CSV file (replace 'your_file.csv' with the actual filename)
# data <- read_csv("homeless_data/xl")

Housing Inventory Count (HIC)

The 2023 HIC (Raw File) refers to the Housing Inventory Count (HIC) data, a component of the U.S. Department of Housing and Urban Development’s (HUD) efforts to track homelessness. The HIC provides a snapshot of the number of beds available in emergency shelters, transitional housing, and other supportive housing programs at a specific point in time.

This file is provided as a preliminary resource until official data is added to the critstats package. You may also use this code to gather data related to your class project, thesis, or other academic tasks beyond what is provided below. Content in this file comes from a host of different sources which you should be familiar with prior to access and analyzing any data.

# Load necessary library
# install.packages("readr") # Uncomment if 'readr' is not installed
library(readr)

# Read the CSV file from GitHub
hic <- read_csv("https://raw.githubusercontent.com/professornaite/critstats/main/data-raw/HIC-counts-by-state-2023.csv")

## Rows: 28871 Columns: 103
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (26): CocState, CoC, HudNum, Status, Organization Name, HMIS Org ID, use...
## dbl (76): Row #, Coc_ID, year, Organization ID, Project ID, Geo Code, HMIS P...
## lgl  (1): mergedDefunctYear
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# View the first few rows of the data
head(hic)

## # A tibble: 6 × 103
##   `Row #` CocState CoC              Coc_ID HudNum Status  year `Organization ID`
##     <dbl> <chr>    <chr>             <dbl> <chr>  <chr>  <dbl>             <dbl>
## 1  616866 OH       Akron, Barberto…   1350 OH-506 Compl…  2023               495
## 2  616875 OH       Akron, Barberto…   1350 OH-506 Compl…  2023               495
## 3  616890 OH       Akron, Barberto…   1350 OH-506 Compl…  2023               495
## 4  616923 OH       Akron, Barberto…   1350 OH-506 Compl…  2023             45277
## 5  703040 OH       Akron, Barberto…   1350 OH-506 Compl…  2023             45277
## 6  703010 OH       Akron, Barberto…   1350 OH-506 Compl…  2023              8321
## # ℹ 95 more variables: `Organization Name` <chr>, `HMIS Org ID` <chr>,
## #   useHmisDb <chr>, `Project ID` <dbl>, `Project Name` <chr>,
## #   `HMIS Project ID` <chr>, `HIC Date` <chr>, `Project Type` <chr>,
## #   `Bed Type` <chr>, `Geo Code` <dbl>, `HMIS Participating` <dbl>,
## #   `Inventory Type` <chr>, beginsOperationsWithinYear <dbl>,
## #   `Target Population` <chr>, mcKinneyVentoEsg <dbl>,
## #   mcKinneyVentoEsgEs <dbl>, mcKinneyVentoEsgRrh <dbl>, …

tail(hic)

## # A tibble: 6 × 103
##   `Row #` CocState CoC              Coc_ID HudNum Status  year `Organization ID`
##     <dbl> <chr>    <chr>             <dbl> <chr>  <chr>  <dbl>             <dbl>
## 1  656633 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023             38857
## 2  610893 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023             41520
## 3  610880 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023             18486
## 4  610830 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023             18486
## 5  610881 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023             18486
## 6  656634 WY       Wyoming Statewi…   1269 WY-500 Compl…  2023              7628
## # ℹ 95 more variables: `Organization Name` <chr>, `HMIS Org ID` <chr>,
## #   useHmisDb <chr>, `Project ID` <dbl>, `Project Name` <chr>,
## #   `HMIS Project ID` <chr>, `HIC Date` <chr>, `Project Type` <chr>,
## #   `Bed Type` <chr>, `Geo Code` <dbl>, `HMIS Participating` <dbl>,
## #   `Inventory Type` <chr>, beginsOperationsWithinYear <dbl>,
## #   `Target Population` <chr>, mcKinneyVentoEsg <dbl>,
## #   mcKinneyVentoEsgEs <dbl>, mcKinneyVentoEsgRrh <dbl>, …

Other sources

Data from the homelessdata site can be accessed for summary statistics.

Below are test/pseudo data for model building.

Dataframe 1

Logistic Regression Analysis

In this analysis, we aim to assess the intersectional relationships between race and gender in predicting homelessness status using logistic regression. The following steps outline the process and mathematical functions used in R.

Data Preparation

First, we convert the categorical variables for race and gender into factors to ensure proper handling in the logistic regression model.

# Convert categorical variables to factors
df1$Race <- as.factor(df1$Race)
df1$Gender <- as.factor(df1$Gender)

Logistic Regression Model

We fit a logistic regression model using the generalized linear model (GLM) function. The logistic regression model can be mathematically expressed as:

\[ \text{logit}(p) = \ln\left(\frac{p}{1-p}\right) = \beta_0 + \beta_1 \text{Race} + \beta_2 \text{Gender} + \beta_3 (\text{Race} \times \text{Gender}) \]

where:

$p$ is the probability of experiencing homelessness,
$\beta_0$ is the intercept,
$\beta_1, \beta_2,$ and $\beta_3$ are coefficients for race, gender, and their interaction, respectively.

The model is fitted using:

# Fit the logistic regression model
model1 <- glm(Homelessness_Status ~ Race * Gender, data = df1, family = binomial)

Model Summary

After fitting the model, we can summarize its results to understand the relationship between race, gender, and homelessness status:

# Summary of the model
summary(model1)

## 
## Call:
## glm(formula = Homelessness_Status ~ Race * Gender, family = binomial, 
##     data = df1)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                   -0.62571    0.25833  -2.422   0.0154 *
## RaceBlack                      0.44982    0.37075   1.213   0.2250  
## RaceHispanic                   0.59396    0.36089   1.646   0.0998 .
## RaceOther                      0.65469    0.35315   1.854   0.0638 .
## RaceWhite                      0.62571    0.36837   1.699   0.0894 .
## GenderMale                     0.56687    0.35441   1.599   0.1097  
## GenderNon-Binary               0.73107    0.37028   1.974   0.0483 *
## RaceBlack:GenderMale           0.07903    0.50049   0.158   0.8745  
## RaceHispanic:GenderMale       -0.44550    0.49894  -0.893   0.3719  
## RaceOther:GenderMale          -0.59585    0.49236  -1.210   0.2262  
## RaceWhite:GenderMale          -0.53788    0.50255  -1.070   0.2845  
## RaceBlack:GenderNon-Binary    -0.17381    0.51762  -0.336   0.7370  
## RaceHispanic:GenderNon-Binary -0.87117    0.50811  -1.715   0.0864 .
## RaceOther:GenderNon-Binary    -0.48134    0.50777  -0.948   0.3432  
## RaceWhite:GenderNon-Binary    -0.73107    0.50863  -1.437   0.1506  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1386.1  on 999  degrees of freedom
## Residual deviance: 1370.9  on 985  degrees of freedom
## AIC: 1400.9
## 
## Number of Fisher Scoring iterations: 4

Predicting Probabilities

We then predict the probabilities of homelessness based on race and gender using the fitted model. The predicted probability can be expressed as:

\[ \hat{p} = \frac{e^{(\beta_0 + \beta_1 \text{Race} + \beta_2 \text{Gender} + \beta_3 (\text{Race} \times \text{Gender}))}}{1 + e^{(\beta_0 + \beta_1 \text{Race} + \beta_2 \text{Gender} + \beta_3 (\text{Race} \times \text{Gender}))}} \]

# View the first few rows of the data
head(df1)

##       Race     Gender Homelessness_Status
## 1 Hispanic Non-Binary                   0
## 2 Hispanic     Female                   0
## 3    Black Non-Binary                   1
## 4    Black Non-Binary                   0
## 5 Hispanic     Female                   1
## 6    Other     Female                   0

# Create a logistic regression model to assess intersectional relationships
# Note: Convert categorical variables to factors
df1$Race <- as.factor(df1$Race)
df1$Gender <- as.factor(df1$Gender)

# Fit the logistic regression model
model1 <- glm(Homelessness_Status ~ Race * Gender, data = df1, family = binomial)

# Summary of the model
summary(model1)

## 
## Call:
## glm(formula = Homelessness_Status ~ Race * Gender, family = binomial, 
##     data = df1)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                   -0.62571    0.25833  -2.422   0.0154 *
## RaceBlack                      0.44982    0.37075   1.213   0.2250  
## RaceHispanic                   0.59396    0.36089   1.646   0.0998 .
## RaceOther                      0.65469    0.35315   1.854   0.0638 .
## RaceWhite                      0.62571    0.36837   1.699   0.0894 .
## GenderMale                     0.56687    0.35441   1.599   0.1097  
## GenderNon-Binary               0.73107    0.37028   1.974   0.0483 *
## RaceBlack:GenderMale           0.07903    0.50049   0.158   0.8745  
## RaceHispanic:GenderMale       -0.44550    0.49894  -0.893   0.3719  
## RaceOther:GenderMale          -0.59585    0.49236  -1.210   0.2262  
## RaceWhite:GenderMale          -0.53788    0.50255  -1.070   0.2845  
## RaceBlack:GenderNon-Binary    -0.17381    0.51762  -0.336   0.7370  
## RaceHispanic:GenderNon-Binary -0.87117    0.50811  -1.715   0.0864 .
## RaceOther:GenderNon-Binary    -0.48134    0.50777  -0.948   0.3432  
## RaceWhite:GenderNon-Binary    -0.73107    0.50863  -1.437   0.1506  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1386.1  on 999  degrees of freedom
## Residual deviance: 1370.9  on 985  degrees of freedom
## AIC: 1400.9
## 
## Number of Fisher Scoring iterations: 4

# Predict probabilities of homelessness based on race and gender
df1$Predicted_Probability <- predict(model1, type = "response")

# View the first few rows with predicted probabilities
head(df1)

##       Race     Gender Homelessness_Status Predicted_Probability
## 1 Hispanic Non-Binary                   0             0.4571429
## 2 Hispanic     Female                   0             0.4920635
## 3    Black Non-Binary                   1             0.5942029
## 4    Black Non-Binary                   0             0.5942029
## 5 Hispanic     Female                   1             0.4920635
## 6    Other     Female                   0             0.5072464

# Print the attributes of the data frame
print(attributes(df1))

## $names
## [1] "Race"                  "Gender"                "Homelessness_Status"  
## [4] "Predicted_Probability"
## 
## $row.names
##    [1]    1    2    3    4    5    6    7    8    9   10   11   12   13   14
##   [15]   15   16   17   18   19   20   21   22   23   24   25   26   27   28
##   [29]   29   30   31   32   33   34   35   36   37   38   39   40   41   42
##   [43]   43   44   45   46   47   48   49   50   51   52   53   54   55   56
##   [57]   57   58   59   60   61   62   63   64   65   66   67   68   69   70
##   [71]   71   72   73   74   75   76   77   78   79   80   81   82   83   84
##   [85]   85   86   87   88   89   90   91   92   93   94   95   96   97   98
##   [99]   99  100  101  102  103  104  105  106  107  108  109  110  111  112
##  [113]  113  114  115  116  117  118  119  120  121  122  123  124  125  126
##  [127]  127  128  129  130  131  132  133  134  135  136  137  138  139  140
##  [141]  141  142  143  144  145  146  147  148  149  150  151  152  153  154
##  [155]  155  156  157  158  159  160  161  162  163  164  165  166  167  168
##  [169]  169  170  171  172  173  174  175  176  177  178  179  180  181  182
##  [183]  183  184  185  186  187  188  189  190  191  192  193  194  195  196
##  [197]  197  198  199  200  201  202  203  204  205  206  207  208  209  210
##  [211]  211  212  213  214  215  216  217  218  219  220  221  222  223  224
##  [225]  225  226  227  228  229  230  231  232  233  234  235  236  237  238
##  [239]  239  240  241  242  243  244  245  246  247  248  249  250  251  252
##  [253]  253  254  255  256  257  258  259  260  261  262  263  264  265  266
##  [267]  267  268  269  270  271  272  273  274  275  276  277  278  279  280
##  [281]  281  282  283  284  285  286  287  288  289  290  291  292  293  294
##  [295]  295  296  297  298  299  300  301  302  303  304  305  306  307  308
##  [309]  309  310  311  312  313  314  315  316  317  318  319  320  321  322
##  [323]  323  324  325  326  327  328  329  330  331  332  333  334  335  336
##  [337]  337  338  339  340  341  342  343  344  345  346  347  348  349  350
##  [351]  351  352  353  354  355  356  357  358  359  360  361  362  363  364
##  [365]  365  366  367  368  369  370  371  372  373  374  375  376  377  378
##  [379]  379  380  381  382  383  384  385  386  387  388  389  390  391  392
##  [393]  393  394  395  396  397  398  399  400  401  402  403  404  405  406
##  [407]  407  408  409  410  411  412  413  414  415  416  417  418  419  420
##  [421]  421  422  423  424  425  426  427  428  429  430  431  432  433  434
##  [435]  435  436  437  438  439  440  441  442  443  444  445  446  447  448
##  [449]  449  450  451  452  453  454  455  456  457  458  459  460  461  462
##  [463]  463  464  465  466  467  468  469  470  471  472  473  474  475  476
##  [477]  477  478  479  480  481  482  483  484  485  486  487  488  489  490
##  [491]  491  492  493  494  495  496  497  498  499  500  501  502  503  504
##  [505]  505  506  507  508  509  510  511  512  513  514  515  516  517  518
##  [519]  519  520  521  522  523  524  525  526  527  528  529  530  531  532
##  [533]  533  534  535  536  537  538  539  540  541  542  543  544  545  546
##  [547]  547  548  549  550  551  552  553  554  555  556  557  558  559  560
##  [561]  561  562  563  564  565  566  567  568  569  570  571  572  573  574
##  [575]  575  576  577  578  579  580  581  582  583  584  585  586  587  588
##  [589]  589  590  591  592  593  594  595  596  597  598  599  600  601  602
##  [603]  603  604  605  606  607  608  609  610  611  612  613  614  615  616
##  [617]  617  618  619  620  621  622  623  624  625  626  627  628  629  630
##  [631]  631  632  633  634  635  636  637  638  639  640  641  642  643  644
##  [645]  645  646  647  648  649  650  651  652  653  654  655  656  657  658
##  [659]  659  660  661  662  663  664  665  666  667  668  669  670  671  672
##  [673]  673  674  675  676  677  678  679  680  681  682  683  684  685  686
##  [687]  687  688  689  690  691  692  693  694  695  696  697  698  699  700
##  [701]  701  702  703  704  705  706  707  708  709  710  711  712  713  714
##  [715]  715  716  717  718  719  720  721  722  723  724  725  726  727  728
##  [729]  729  730  731  732  733  734  735  736  737  738  739  740  741  742
##  [743]  743  744  745  746  747  748  749  750  751  752  753  754  755  756
##  [757]  757  758  759  760  761  762  763  764  765  766  767  768  769  770
##  [771]  771  772  773  774  775  776  777  778  779  780  781  782  783  784
##  [785]  785  786  787  788  789  790  791  792  793  794  795  796  797  798
##  [799]  799  800  801  802  803  804  805  806  807  808  809  810  811  812
##  [813]  813  814  815  816  817  818  819  820  821  822  823  824  825  826
##  [827]  827  828  829  830  831  832  833  834  835  836  837  838  839  840
##  [841]  841  842  843  844  845  846  847  848  849  850  851  852  853  854
##  [855]  855  856  857  858  859  860  861  862  863  864  865  866  867  868
##  [869]  869  870  871  872  873  874  875  876  877  878  879  880  881  882
##  [883]  883  884  885  886  887  888  889  890  891  892  893  894  895  896
##  [897]  897  898  899  900  901  902  903  904  905  906  907  908  909  910
##  [911]  911  912  913  914  915  916  917  918  919  920  921  922  923  924
##  [925]  925  926  927  928  929  930  931  932  933  934  935  936  937  938
##  [939]  939  940  941  942  943  944  945  946  947  948  949  950  951  952
##  [953]  953  954  955  956  957  958  959  960  961  962  963  964  965  966
##  [967]  967  968  969  970  971  972  973  974  975  976  977  978  979  980
##  [981]  981  982  983  984  985  986  987  988  989  990  991  992  993  994
##  [995]  995  996  997  998  999 1000
## 
## $class
## [1] "data.frame"

Dataframe 2: An extended analysis

Variable Name	Variable Type	Definition
ID	Integer	Unique identifier for each participant in the study.
Age	Numeric	Age of the participant in years.
Education Level	Categorical	Highest level of education attained (e.g., “High School”, “Some College”, “Bachelor’s”, etc.).
Employment Status	Categorical	Current employment status (e.g., “Employed”, “Unemployed”, “Part-time”, “Student”).
Annual Income	Numeric	Total annual income in USD.
Homeless Status	Categorical	Indicates if respondent experienced homelessness (e.g., “Yes”, “Housing insecure”, “No”).
Duration of Homelessness	Numeric	Length of time (in months) the participant has experienced homelessness, if applicable.
Family Size	Integer	Number of individuals in the participant’s household.
Access to Services	Categorical	Indicates access to social services (e.g., “Yes”, “No”).
Mental Health Status	Categorical	Self-reported mental health status (e.g., “Good”, “Fair”, “Poor”).

To write the mathematical model that corresponds to the code provided, we need to identify the key variables and their relationships as they pertain to the study of homelessness. The code creates a pseudo dataset with various demographic and situational variables. Below is a mathematical representation of the model that can be inferred from the dataset generation process.

Mathematical Model

Variables:

Let $n$ be the total number of individuals in the dataset.
Let $\text{age}_i$ be the age of individual $i$.
Let $\text{education_level}_i$ be the education level of individual $i$.
Let $\text{employment_status}_i$ be the employment status of individual $i$.
Let $\text{annual\_income}_i$ be the annual income of individual $i$.
Let $\text{homeless\_status}_i$ be a categorical variable indicating whether individual $i$ is homeless (“Yes”, “Housing insecure”, “No”).
Let $\text{race}_i$ be the race of individual $i$ (e.g., “Black”, “White”, “Hispanic”, “Asian”, “Other”).
Let $\text{gender}_i$ be the gender of individual $i$ (e.g., “Woman”, “Man”, “Non-binary”).
Let \[ \text{black_woman}_i = \begin{cases} 1 & \text{if } (\text{race}_i = ``Black" \text{ and } \text{gender}_i = ``Woman") \\ 0 & \text{otherwise} \end{cases} \]

be a binary flag indicating if individual $ i $ is a Black woman.

Let $\text{duration_of_homelessness}_i$ represent the duration of homelessness for individual $i$, measured in months.
Let $\text{family_size}_i$ represent the size of individual $i$’s family.
Let $\text{access_to_services}_i$ indicate whether individual $i$ has access to social services (“Yes” or “No”).
Let $\text{mental_health_status}_i$ represent the mental health status of individual $i$.

Homelessness Status Model: The relationship between these variables can be modeled using a logistic regression framework, where we predict the probability of an individual being homeless based on their demographic and situational characteristics:

The logistic regression model can be expressed as:

\[ P(\text{Homelessness}) = P(Y_i = 1 | X_i) = \frac{1}{1 + e^{-(\beta_0 + \beta_1 (\text{age}_i) + \beta_2 (\text{education_level}_i) + \beta_3 (\text{employment_status}_i) + ... + \beta_k (\text{mental_health_status}_i))}} \]

Where:

$Y_i = 1$ indicates that individual $i$ is homeless.
$X_i = (\text{age}_i, ... , \text{mental_health_status}_i)$ represents all independent variables affecting homelessness status.
The coefficients $(β_0, β_1, ..., β_k)$ represent the effect size of each predictor on the log-odds of homelessness.

Duration of Homelessness:

For individuals who are homeless, we can also model the duration of homelessness as a function of various predictors using a linear regression or survival analysis approach:

If we denote:
- \[ D_i = \begin{cases} d_i & \text{if } Y_i = 1 \\ NA & \text{if } Y_i = 0 \end{cases} \]
Then, we could model:

\[ D_i = f(\beta_0 + β_1 (\text{age}_i) + β_2 (\text{education_level}_i) + ... + β_k (\text{mental_health_status}_i)) \]

Summary

The dataset generated captures various demographic and situational factors that may influence homelessness among individuals, and specifically Black women. The mathematical models outlined above provide a framework for analyzing these relationships using logistic regression for predicting homelessness status and potentially linear regression or survival analysis for understanding the duration of homelessness. This approach allows researchers to explore how different factors interact and contribute to homelessness, particularly among vulnerable populations such as Black women.

The full data set is presented below.

# Display the dataset
df2 %>% 
  print(n=Inf)

## # A tibble: 150 × 13
##        ID   age education_level employment_status annual_income homeless_status 
##     <int> <int> <chr>           <chr>                     <dbl> <chr>           
##   1     1    48 High School     Unemployed                 1846 Housing insecure
##   2     2    32 Bachelor's      Unemployed                 6517 Housing insecure
##   3     3    31 Graduate        Part-time                  9419 No              
##   4     4    20 Graduate        Unemployed                 2043 No              
##   5     5    59 Bachelor's      Part-time                 12915 No              
##   6     6    60 Bachelor's      Employed                   4037 Housing insecure
##   7     7    54 Graduate        Employed                   1594 No              
##   8     8    31 High School     Student                    9236 No              
##   9     9    42 Graduate        Employed                  10443 No              
##  10    10    43 Some College    Employed                   5243 Housing insecure
##  11    11    44 Some College    Employed                   5397 Yes             
##  12    12    22 Graduate        Student                    4445 No              
##  13    13    44 High School     Part-time                  1960 No              
##  14    14    45 Graduate        Employed                   5385 Housing insecure
##  15    15    26 Graduate        Student                    3136 Housing insecure
##  16    16    46 Some College    Employed                   7433 Yes             
##  17    17    52 Bachelor's      Part-time                  7047 No              
##  18    18    25 Graduate        Part-time                 12340 No              
##  19    19    43 Graduate        Student                    5096 No              
##  20    20    24 Graduate        Unemployed                 6577 Housing insecure
##  21    21    59 Bachelor's      Employed                   1990 Housing insecure
##  22    22    26 High School     Employed                  11321 No              
##  23    23    36 Graduate        Employed                   5748 Housing insecure
##  24    24    53 Graduate        Unemployed                 9831 No              
##  25    25    31 Graduate        Employed                   3061 No              
##  26    26    34 Some College    Unemployed                 6457 Yes             
##  27    27    60 High School     Employed                  10242 No              
##  28    28    56 Some College    Student                    1752 No              
##  29    29    29 Bachelor's      Employed                  10781 No              
##  30    30    32 Graduate        Part-time                  4614 No              
##  31    31    49 Bachelor's      Part-time                  5376 No              
##  32    32    59 High School     Unemployed                 4745 No              
##  33    33    62 Bachelor's      Unemployed                 1448 No              
##  34    34    24 Graduate        Employed                   7226 No              
##  35    35    26 Some College    Part-time                  9148 No              
##  36    36    58 High School     Student                   11839 Housing insecure
##  37    37    27 High School     Employed                   1306 No              
##  38    38    40 Some College    Employed                  12869 No              
##  39    39    44 Bachelor's      Unemployed                 4635 No              
##  40    40    24 High School     Employed                  12270 Housing insecure
##  41    41    44 Some College    Unemployed                 9251 Housing insecure
##  42    42    49 Some College    Unemployed                 6364 Housing insecure
##  43    43    55 High School     Unemployed                10798 Housing insecure
##  44    44    42 Graduate        Unemployed                 1474 Yes             
##  45    45    51 Graduate        Student                    9868 No              
##  46    46    46 Graduate        Student                    5185 No              
##  47    47    22 Some College    Student                   10951 No              
##  48    48    25 Graduate        Student                    7426 No              
##  49    49    29 High School     Part-time                  4295 No              
##  50    50    30 High School     Student                   10611 No              
##  51    51    35 Some College    Part-time                  2100 No              
##  52    52    50 High School     Part-time                 10985 No              
##  53    53    44 Graduate        Part-time                  4322 No              
##  54    54    42 Bachelor's      Employed                  10037 No              
##  55    55    55 High School     Part-time                 12570 No              
##  56    56    38 High School     Part-time                  1978 No              
##  57    57    32 Graduate        Part-time                 11252 No              
##  58    58    58 Graduate        Student                   10627 No              
##  59    59    64 Some College    Unemployed                 5622 No              
##  60    60    43 High School     Part-time                  4931 Housing insecure
##  61    61    48 Graduate        Part-time                  3459 No              
##  62    62    33 Some College    Part-time                  7833 No              
##  63    63    47 High School     Employed                  11657 Housing insecure
##  64    64    23 High School     Employed                   7357 No              
##  65    65    60 High School     Part-time                  8044 No              
##  66    66    25 Bachelor's      Unemployed                 8989 No              
##  67    67    39 Bachelor's      Part-time                  7359 No              
##  68    68    39 Some College    Unemployed                 7118 No              
##  69    69    56 High School     Employed                   1194 No              
##  70    70    48 Graduate        Student                    1573 No              
##  71    71    65 Some College    Unemployed                12152 No              
##  72    72    34 Graduate        Unemployed                10231 Housing insecure
##  73    73    51 Bachelor's      Unemployed                 3413 Housing insecure
##  74    74    21 Some College    Student                    8803 No              
##  75    75    30 Bachelor's      Employed                   8845 No              
##  76    76    22 Some College    Student                    5743 No              
##  77    77    42 Bachelor's      Employed                  10748 No              
##  78    78    39 High School     Employed                   7564 Yes             
##  79    79    42 High School     Part-time                 11622 No              
##  80    80    49 High School     Part-time                  7640 No              
##  81    81    63 High School     Employed                  11873 Housing insecure
##  82    82    42 Bachelor's      Employed                   8050 Housing insecure
##  83    83    40 Some College    Part-time                  6082 Housing insecure
##  84    84    52 High School     Employed                  12395 No              
##  85    85    57 Some College    Part-time                  9508 Housing insecure
##  86    86    65 High School     Student                    5960 Housing insecure
##  87    87    47 Bachelor's      Unemployed                 1220 No              
##  88    88    29 Bachelor's      Employed                   7801 Housing insecure
##  89    89    48 High School     Employed                   6881 No              
##  90    90    63 Graduate        Part-time                 11544 No              
##  91    91    47 Some College    Student                   10754 Housing insecure
##  92    92    52 Bachelor's      Unemployed                11249 No              
##  93    93    31 High School     Part-time                  5415 Yes             
##  94    94    46 Bachelor's      Unemployed                11487 No              
##  95    95    49 Some College    Employed                   2816 No              
##  96    96    24 Graduate        Unemployed                 4382 Housing insecure
##  97    97    20 Graduate        Part-time                  9000 Yes             
##  98    98    40 High School     Unemployed                12729 No              
##  99    99    32 Graduate        Part-time                  7993 No              
## 100   100    38 High School     Part-time                  7319 Yes             
## 101   101    54 High School     Employed                   1729 No              
## 102   102    25 High School     Unemployed                12628 No              
## 103   103    27 High School     Student                    2443 No              
## 104   104    59 Bachelor's      Employed                   2060 No              
## 105   105    61 Some College    Unemployed                11569 No              
## 106   106    51 Some College    Unemployed                 7100 Housing insecure
## 107   107    27 Some College    Student                    5050 No              
## 108   108    39 Bachelor's      Unemployed                11732 Housing insecure
## 109   109    29 High School     Student                    1384 Yes             
## 110   110    37 Some College    Unemployed                 3847 No              
## 111   111    63 Some College    Employed                   9238 No              
## 112   112    34 High School     Student                    3710 No              
## 113   113    63 Some College    Unemployed                 4822 No              
## 114   114    52 High School     Student                    3088 No              
## 115   115    57 High School     Employed                  10617 No              
## 116   116    63 Some College    Student                    2755 No              
## 117   117    47 Bachelor's      Unemployed                10873 Housing insecure
## 118   118    32 High School     Employed                   4972 No              
## 119   119    41 Bachelor's      Student                    5490 No              
## 120   120    40 Graduate        Student                    8557 No              
## 121   121    60 High School     Part-time                  2160 No              
## 122   122    24 Some College    Unemployed                 1264 No              
## 123   123    46 High School     Employed                  12917 No              
## 124   124    32 Graduate        Part-time                  8007 No              
## 125   125    40 Some College    Unemployed                10382 No              
## 126   126    43 Some College    Student                   11703 No              
## 127   127    55 Some College    Part-time                 10058 No              
## 128   128    63 Bachelor's      Unemployed                12750 Housing insecure
## 129   129    49 High School     Part-time                  1530 Housing insecure
## 130   130    24 High School     Part-time                 11841 Yes             
## 131   131    44 Graduate        Employed                  11386 Housing insecure
## 132   132    59 High School     Student                   10305 Housing insecure
## 133   133    22 High School     Unemployed                 5522 Housing insecure
## 134   134    23 Graduate        Part-time                  1505 No              
## 135   135    33 Bachelor's      Employed                   5373 No              
## 136   136    41 High School     Employed                   4285 No              
## 137   137    49 Some College    Part-time                 11206 No              
## 138   138    38 High School     Unemployed                 5349 No              
## 139   139    28 Some College    Part-time                  4654 Housing insecure
## 140   140    53 Graduate        Unemployed                10110 No              
## 141   141    61 Some College    Unemployed                11138 Housing insecure
## 142   142    63 High School     Unemployed                 6495 No              
## 143   143    36 High School     Student                    9756 Housing insecure
## 144   144    42 High School     Employed                   2249 Housing insecure
## 145   145    56 High School     Student                    3640 No              
## 146   146    43 Graduate        Unemployed                12447 No              
## 147   147    26 Bachelor's      Employed                  10024 No              
## 148   148    24 High School     Student                   10827 No              
## 149   149    51 High School     Student                    6013 Yes             
## 150   150    65 High School     Employed                   8126 Yes             
## # ℹ 7 more variables: race <chr>, gender <chr>, black_woman <dbl>,
## #   duration_of_homelessness <int>, family_size <int>,
## #   access_to_services <chr>, mental_health_status <chr>

# Print the attributes of the data frame
print(attributes(df2))

## $class
## [1] "tbl_df"     "tbl"        "data.frame"
## 
## $row.names
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150
## 
## $names
##  [1] "ID"                       "age"                     
##  [3] "education_level"          "employment_status"       
##  [5] "annual_income"            "homeless_status"         
##  [7] "race"                     "gender"                  
##  [9] "black_woman"              "duration_of_homelessness"
## [11] "family_size"              "access_to_services"      
## [13] "mental_health_status"

# Save the dataset as a CSV file
# write.csv(data, "homelessness_test_data.csv", row.names = FALSE)