PUMS-estimates.knit

#function to get around proxy server for tidycensus
library(curl)

## Using libcurl 7.64.1 with Schannel

companyproxy <- curl::ie_proxy_info()$Proxy
Sys.setenv(http_proxy=companyproxy)
Sys.setenv(https_proxy=companyproxy)
#load packages
library(tidycensus)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()
## x readr::parse_date() masks curl::parse_date()

library(srvyr)

## 
## Attaching package: 'srvyr'

## The following object is masked from 'package:stats':
## 
##     filter

#PUMAs that make up Philadelphia
phillypuma <- c("03201","03202",
                "03203","03204", 
                "03205","03206", 
                "03207","03208", 
                "03209","03210", 
                "03211")

#variables of interest
pvariable <- c("ADJINC", #income adjustment factor
               "HINCP", #Household income
               "RNTP", #Rent
               "GRNTP", #Gross Rent
               "ADJHSG", #household cost adjustment factor
               "TEN", #Tenure
               "PWGTP", #Person weight
               "WGTP", #Household weight
               "NP", #Number of people in the Household
               "BDSP", #number of bedrooms
               "HISP", #Hispanic/non-Hispanic
               "RAC1P", #Race
               "RELP", #Relation to the Reference person
               "HHT", #Household/family type
               "HUPAC") #presence of children in the household

View(pums_variables)

#function to get PUMS data 
phl_pums <- get_pums(
  variables = pvariable,
  state = "PA",
  puma = phillypuma,
  survey = "acs5",
  year = 2018,
  rep_weights = "both",
  recode = TRUE,
  key = "82bbcc6711ff4a4923edffb104d47daaa8d191cd"
)

## Getting data from the 2014-2018 5-year ACS Public Use Microdata Sample

#write.csv(phl_pums, "C:/Users/sean.finnegan/Documents/GAIN Act PUMS Analysis/phl_pums18.csv")

#if tidycensus data download doesn't work use this downloaded dataset
#phl_pums <- read.csv("C:/Users/sean.finnegan/Documents/GAIN Act PUMS Analysis/phl_pums18.csv")

#put the adjusted income variable into the data frame
phl_pums$adjusted_inc <- as.double(phl_pums$HINCP) * as.double(phl_pums$ADJINC)

#create a column that tells you weather the household is below 80 AMI based on size of HH
phl_pums$AMI80 <- ifelse(phl_pums$NP == 1 & phl_pums$adjusted_inc <= 48950, "Below80AMI",
                  ifelse(phl_pums$NP == 2 & phl_pums$adjusted_inc <= 55950, "Below80AMI",
                  ifelse(phl_pums$NP == 3 & phl_pums$adjusted_inc <= 62950, "Below80AMI",
                  ifelse(phl_pums$NP == 4 & phl_pums$adjusted_inc <= 69900, "Below80AMI",
                  ifelse(phl_pums$NP == 5 & phl_pums$adjusted_inc <= 75500, "Below80AMI",
                  ifelse(phl_pums$NP == 6 & phl_pums$adjusted_inc <= 81100, "Below80AMI",
                  ifelse(phl_pums$NP == 7 & phl_pums$adjusted_inc <= 86700, "Below80AMI",
                  ifelse(phl_pums$NP == 8 & phl_pums$adjusted_inc <= 92300, "Below80AMI", 
                  ifelse(phl_pums$NP == 9 & phl_pums$adjusted_inc <= 97900 , "Below80AMI",
                  ifelse(phl_pums$NP == 10 & phl_pums$adjusted_inc <= 103500, "Below80AMI",
                  ifelse(phl_pums$NP == 11 & phl_pums$adjusted_inc <= 109100, "Below80AMI",
                  ifelse(phl_pums$NP == 12 & phl_pums$adjusted_inc <= 114700, "Below80AMI", "Above80AMI"))))))))))))
#count number of observations above and below 80% AMI
phl_pums %>% group_by(AMI80) %>% dplyr::count()

## # A tibble: 2 x 2
## # Groups:   AMI80 [2]
##   AMI80          n
##   <chr>      <int>
## 1 Above80AMI 19726
## 2 Below80AMI 27419

#create a column that tells you weather the household is below 50 AMI based on size of HH
phl_pums$AMI50 <- ifelse(phl_pums$NP == 1 & phl_pums$adjusted_inc <= 30600, "Below50AMI",
                  ifelse(phl_pums$NP == 2 & phl_pums$adjusted_inc <= 35000, "Below50AMI",
                  ifelse(phl_pums$NP == 3 & phl_pums$adjusted_inc <= 39350, "Below50AMI",
                  ifelse(phl_pums$NP == 4 & phl_pums$adjusted_inc <= 43700, "Below50AMI",
                  ifelse(phl_pums$NP == 5 & phl_pums$adjusted_inc <= 47200, "Below50AMI",
                  ifelse(phl_pums$NP == 6 & phl_pums$adjusted_inc <= 50700, "Below50AMI",
                  ifelse(phl_pums$NP == 7 & phl_pums$adjusted_inc <= 54200, "Below50AMI",
                  ifelse(phl_pums$NP == 8 & phl_pums$adjusted_inc <= 57700, "Below50AMI", 
                  ifelse(phl_pums$NP == 9 & phl_pums$adjusted_inc <= 61200, "Below50AMI",
                  ifelse(phl_pums$NP == 10 & phl_pums$adjusted_inc <= 64700, "Below50AMI",
                  ifelse(phl_pums$NP == 11 & phl_pums$adjusted_inc <= 68200, "Below50AMI",
                  ifelse(phl_pums$NP == 12 & phl_pums$adjusted_inc <= 71700, "Below50AMI", "Above50AMI"))))))))))))

#count number of observations above and below 50% AMI
phl_pums %>% group_by(AMI50) %>% dplyr::count()

## # A tibble: 2 x 2
## # Groups:   AMI50 [2]
##   AMI50          n
##   <chr>      <int>
## 1 Above50AMI 27404
## 2 Below50AMI 19741

#create a column that tells you weather the household is below 30 AMI based on size of HH
phl_pums$AMI30 <- ifelse(phl_pums$NP == 1 & phl_pums$adjusted_inc <= 18350, "Below30AMI",
                  ifelse(phl_pums$NP == 2 & phl_pums$adjusted_inc <= 21000, "Below30AMI",
                  ifelse(phl_pums$NP == 3 & phl_pums$adjusted_inc <= 23600, "Below30AMI",
                  ifelse(phl_pums$NP == 4 & phl_pums$adjusted_inc <= 26200, "Below30AMI",
                  ifelse(phl_pums$NP == 5 & phl_pums$adjusted_inc <= 29420, "Below30AMI",
                  ifelse(phl_pums$NP == 6 & phl_pums$adjusted_inc <= 33740, "Below30AMI",
                  ifelse(phl_pums$NP == 7 & phl_pums$adjusted_inc <= 38060, "Below30AMI",
                  ifelse(phl_pums$NP == 8 & phl_pums$adjusted_inc <= 42380, "Below30AMI", 
                  ifelse(phl_pums$NP == 9 & phl_pums$adjusted_inc <= 46700, "Below30AMI",
                  ifelse(phl_pums$NP == 10 & phl_pums$adjusted_inc <= 51020, "Below30AMI",
                  ifelse(phl_pums$NP == 11 & phl_pums$adjusted_inc <= 55340, "Below30AMI",
                  ifelse(phl_pums$NP == 12 & phl_pums$adjusted_inc <= 59660, "Below30AMI", "Above30AMI"))))))))))))


#count number of observations above and below 30% AMI
phl_pums %>% group_by(AMI30) %>% dplyr::count()

## # A tibble: 2 x 2
## # Groups:   AMI30 [2]
##   AMI30          n
##   <chr>      <int>
## 1 Above30AMI 33467
## 2 Below30AMI 13678

#create buckets so there is no overlap between below80AMI, below50AMI, and below30AMI
phl_pums$buckets <- ifelse(phl_pums$AMI80 == "Below80AMI" &
                             phl_pums$AMI50 == "Above50AMI" &
                             phl_pums$AMI30 == "Above30AMI", "50-80", 
                           ifelse(phl_pums$AMI50 == "Below50AMI" &
                                    phl_pums$AMI30 == "Above30AMI", "30-50", 
                                  ifelse(phl_pums$AMI30 == "Below30AMI", "0-30", ">80")))
#count the number of observations in each bucket
phl_pums %>% group_by(buckets) %>% dplyr::count()

## # A tibble: 4 x 2
## # Groups:   buckets [4]
##   buckets     n
##   <chr>   <int>
## 1 >80     19726
## 2 0-30    13678
## 3 30-50    6063
## 4 50-80    7678

#change the data to a survey object
phlsurvey <- to_survey(phl_pums, type = "housing")

## Warning: You have duplicate values in the SERIALNO column of your input data,
## are you sure you wish to proceed?

#count of people in Philadelphia

#count and median for 5 PUMAs with least amount of cb renters under 80 AMI
phlsurvey %>% 
  mutate(
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3),
    monthlyinc = (adjusted_hhinc/12)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & PUMA %in% c("03201",
                                                                    "03202",
                                                                    "03203",
                                                                    "03206",
                                                                    "03211")) %>% 
  group_by(buckets) %>%
  summarize(
    rent_burdened = survey_total(vartype = "ci"),
    mediangap = survey_median(rent_gap, vartype = "ci"),
    medianmonthlyinc = survey_median(monthlyinc, vartype = "ci"),
    median_gr_rent = survey_median(adjusted_gr_rent, vartype = "ci")
  )

## # A tibble: 4 x 13
##   buckets rent_burdened rent_burdened_low rent_burdened_upp mediangap
##   <chr>           <dbl>             <dbl>             <dbl>     <dbl>
## 1 >80              1570             1043.             2097.      216.
## 2 0-30            32232            30006.            34458.      634.
## 3 30-50           14948            13417.            16479.      324.
## 4 50-80            8345             7075.             9615.      211.
## # ... with 8 more variables: mediangap_low <dbl>, mediangap_upp <dbl>,
## #   medianmonthlyinc <dbl>, medianmonthlyinc_low <dbl>,
## #   medianmonthlyinc_upp <dbl>, median_gr_rent <dbl>, median_gr_rent_low <dbl>,
## #   median_gr_rent_upp <dbl>

#count of all cost burdened renters
phlsurvey %>% 
  mutate(
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3) %>% 
  summarize(
    rent_burden = survey_total(vartype = "ci")
  )

## # A tibble: 1 x 3
##   rent_burden rent_burden_low rent_burden_upp
##         <dbl>           <dbl>           <dbl>
## 1      155723         151463.         159983.

#count of all severely cost burdened renters
phlsurvey %>% 
  mutate(
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12)) %>% 
  filter(rent_pinc > 0.50 & SPORDER == 1 & TEN == 3) %>% 
  summarize(
    rent_burden = survey_total(vartype = "ci")
  )

## # A tibble: 1 x 3
##   rent_burden rent_burden_low rent_burden_upp
##         <dbl>           <dbl>           <dbl>
## 1       82144          78666.          85622.

####################
# number of cb renters by puma and bucket
cb_puma_bucket <- phlsurvey %>% 
  mutate( #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80") %>% 
  group_by(PUMA, buckets) %>%
  summarize(
    rent_burdened = survey_total(vartype = "ci")
  )
cb_puma_bucket

## # A tibble: 33 x 5
## # Groups:   PUMA [11]
##    PUMA  buckets rent_burdened rent_burdened_low rent_burdened_upp
##    <chr> <chr>           <dbl>             <dbl>             <dbl>
##  1 03201 0-30             5655             4667.             6643.
##  2 03201 30-50            3297             2590.             4004.
##  3 03201 50-80            2316             1628.             3004.
##  4 03202 0-30             7454             6344.             8564.
##  5 03202 30-50            3523             2645.             4401.
##  6 03202 50-80            1409              832.             1986.
##  7 03203 0-30             6587             5399.             7775.
##  8 03203 30-50            2582             1861.             3303.
##  9 03203 50-80            1248              608.             1888.
## 10 03204 0-30            10257             9173.            11341.
## # ... with 23 more rows

# median rent gap per puma
cbgap_median <- phlsurvey %>% 
  mutate( #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80") %>% 
  group_by(PUMA) %>%
  summarize(
    rentgap = survey_median(rent_gap, vartype = "ci")
  )
cbgap_median

## # A tibble: 11 x 4
##    PUMA  rentgap rentgap_low rentgap_upp
##    <chr>   <dbl>       <dbl>       <dbl>
##  1 03201    472.        394.        496.
##  2 03202    470.        397.        537.
##  3 03203    438.        395.        536.
##  4 03204    540.        498.        576.
##  5 03205    489.        439.        543.
##  6 03206    528.        451.        590.
##  7 03207    513.        446.        584.
##  8 03208    498.        451.        575.
##  9 03209    770.        655.        851.
## 10 03210    553.        507.        606.
## 11 03211    533.        452.        595.

# median monthly income per puma
cbinc_median <- phlsurvey %>% 
  mutate( #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    monthly_hhinc = (adjusted_hhinc/12)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80") %>% 
  group_by(PUMA) %>%
  summarize(
    med_inc = survey_median(monthly_hhinc, vartype = "ci")
  )
cbinc_median

## # A tibble: 11 x 4
##    PUMA  med_inc med_inc_low med_inc_upp
##    <chr>   <dbl>       <dbl>       <dbl>
##  1 03201   1771.       1596.       2068.
##  2 03202   1606.       1397.       1839.
##  3 03203   1537.       1318.       1748.
##  4 03204   1044.        819.       1394.
##  5 03205   1013.        879.       1160.
##  6 03206   1538.       1265.       1726.
##  7 03207    814.        769.        981.
##  8 03208   1036.        837.       1243.
##  9 03209   1769.       1554.       2072.
## 10 03210   1199.       1044.       1327.
## 11 03211   1571.       1295.       1784.

# median gross rent per puma
cb_grrent_median <- phlsurvey %>% 
  mutate( #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80") %>% 
  group_by(PUMA) %>%
  summarize(
    med_gr_rent = survey_median(adjusted_gr_rent, vartype = "ci")
  )
cb_grrent_median

## # A tibble: 11 x 4
##    PUMA  med_gr_rent med_gr_rent_low med_gr_rent_upp
##    <chr>       <dbl>           <dbl>           <dbl>
##  1 03201       1050             994.           1104.
##  2 03202       1023.            975.           1062.
##  3 03203       1001.            940            1065.
##  4 03204        922.            900             984.
##  5 03205        869.            840.            891.
##  6 03206       1060.            994.           1134.
##  7 03207        881.            830.            932.
##  8 03208        942.            900.            987.
##  9 03209       1350            1242.           1423.
## 10 03210        980             942.           1032.
## 11 03211        966.            901.           1035.

# number of cb renters by bucket
cb_renters_bucket <- phlsurvey %>% 
  mutate(
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3)) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3) %>% 
  group_by(buckets) %>%
  summarize(
    rent_burdened = survey_total(vartype = "ci"),
    median_rentgap = survey_median(rent_gap, vartype = "ci")
  )
cb_renters_bucket

## # A tibble: 4 x 7
##   buckets rent_burdened rent_burdened_low rent_burdened_upp median_rentgap
##   <chr>           <dbl>             <dbl>             <dbl>          <dbl>
## 1 >80              7227             6210.             8244.           240.
## 2 0-30            95281            91338.            99224.           634.
## 3 30-50           34715            32324.            37106.           321.
## 4 50-80           18500            16743.            20257.           268.
## # ... with 2 more variables: median_rentgap_low <dbl>, median_rentgap_upp <dbl>

# number of scb renters by bucket
scb_renters_bucket <- phlsurvey %>% 
  mutate(
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3)) %>% 
  filter(gr_rent_pinc > 0.50 & SPORDER == 1 & TEN == 3) %>% 
  group_by(buckets) %>%
  summarize(
    rent_burdened = survey_total(vartype = "ci"),
    median_rentgap = survey_median(rent_gap, vartype = "ci")
  )
scb_renters_bucket

## # A tibble: 4 x 7
##   buckets rent_burdened rent_burdened_low rent_burdened_upp median_rentgap
##   <chr>           <dbl>             <dbl>             <dbl>          <dbl>
## 1 >80               521              287.              755.          1492.
## 2 0-30            83267            79699.            86835.           694.
## 3 30-50           12315            11127.            13503.           654.
## 4 50-80            2693             2011.             3375.           996.
## # ... with 2 more variables: median_rentgap_low <dbl>, median_rentgap_upp <dbl>

# number of cb nonwhite households by PUMA
nonwhite_count1 <- phlsurvey %>% 
  mutate( 
    nonwhite = ifelse(RAC1P == "1" & HISP == "01", 0, 1),
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3),
    monthly_hhinc = (adjusted_hhinc/12)
    ) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80" & nonwhite == 1 & adjusted_hhinc > 0) %>% 
  group_by(PUMA) %>%
  summarize(
    nonwhite_burd = survey_total(vartype = "ci"),
    med_inc = survey_median(monthly_hhinc, vartype = "ci"),
    med_rentgap = survey_median(rent_gap, vartype = "ci")
  )
#add total column
nonwhite_count1$totalcost <- nonwhite_count1$nonwhite_burd * nonwhite_count1$med_rentgap
nonwhite_count1

## # A tibble: 11 x 11
##    PUMA  nonwhite_burd nonwhite_burd_low nonwhite_burd_upp med_inc med_inc_low
##    <chr>         <dbl>             <dbl>             <dbl>   <dbl>       <dbl>
##  1 03201          3762             2872.             4652.   2186.       1688.
##  2 03202          7466             6238.             8694.   1742.       1362.
##  3 03203          6580             5407.             7753.   1617.       1381.
##  4 03204         11595            10182.            13008.   1461.       1098.
##  5 03205         11942            10432.            13452.   1160.       1028.
##  6 03206          8664             7557.             9771.   1353.       1195.
##  7 03207         10708             9595.            11821.   1124.        984.
##  8 03208         12061            10939.            13183.   1222.       1036.
##  9 03209          5636             4651.             6621.   1624.       1365.
## 10 03210         12027            10783.            13271.   1318.       1151.
## 11 03211          4854             3962.             5746.   1486.        973.
## # ... with 5 more variables: med_inc_upp <dbl>, med_rentgap <dbl>,
## #   med_rentgap_low <dbl>, med_rentgap_upp <dbl>, totalcost <dbl>

#write.csv(nonwhite_count1, "C:/Users/sean.finnegan/Documents/GAIN Act PUMS Analysis/Nonwhite rent burden 0inc removed.csv")

# number of cb single female hoh women of color by PUMA
singlemother1 <- phlsurvey %>% 
  mutate( 
    singlef = ifelse(HHT == 3, 1, 0),
    nonwhite = ifelse(RAC1P == "1" & HISP == "01", 0, 1),
    #Create income, rent, and gross rent variables that are inflation adjusted  
    adjusted_hhinc = as.double(HINCP) * as.double(ADJINC),
    adjusted_rent = as.double(RNTP) * as.double(ADJHSG),
    adjusted_gr_rent = as.double(GRNTP) * as.double(ADJHSG),
    #create rent as percentage of monthly household income
    gr_rent_pinc = adjusted_gr_rent / (adjusted_hhinc/12),
    rent_pinc = adjusted_rent / (adjusted_hhinc/12),
    monthly_hhinc = (adjusted_hhinc/12),
    rent_gap = adjusted_gr_rent - ((adjusted_hhinc/12) * 0.3)
  ) %>% 
  filter(gr_rent_pinc > 0.30 & SPORDER == 1 & TEN == 3 & buckets != ">80" & singlef == 1 & HUPAC %in% c(1, 2, 3) & adjusted_hhinc > 0 & nonwhite == 1) %>% 
  group_by(PUMA) %>%
  summarize(
    female_burdened = survey_total(vartype = "ci"),
    med_rentgap = survey_median(rent_gap, vartype = "ci"),
    med_inc = survey_median(monthly_hhinc, vartype = "ci")
  )

#add total column
singlemother1$totalcost <- singlemother1$female_burdened * singlemother1$med_rentgap
singlemother1

## # A tibble: 11 x 11
##    PUMA  female_burdened female_burdened_low female_burdened_upp med_rentgap
##    <chr>           <dbl>               <dbl>               <dbl>       <dbl>
##  1 03201             625                243.               1007.        306.
##  2 03202            2406               1686.               3126.        560.
##  3 03203            1811               1196.               2426.        413.
##  4 03204            3218               2387.               4049.        574.
##  5 03205            4091               3244.               4938.        434.
##  6 03206            1842               1233.               2451.        634.
##  7 03207            2884               2211.               3557.        430.
##  8 03208            3174               2458.               3890.        429.
##  9 03209             726                367.               1085.        241.
## 10 03210            3238               2485.               3991.        502.
## 11 03211             975                417.               1533.        592.
## # ... with 6 more variables: med_rentgap_low <dbl>, med_rentgap_upp <dbl>,
## #   med_inc <dbl>, med_inc_low <dbl>, med_inc_upp <dbl>, totalcost <dbl>

#write.csv(singlemother1, "C:/Users/sean.finnegan/Documents/GAIN Act PUMS Analysis/Single Female HOH cost burden.csv")