Read in the Proposal Data

#read in the proposal data
proposedPhasingSchedule <- read.csv2(
  file = "ProposedPhasingSchedule.csv",
  header = TRUE,
  sep = ","
)

head(proposedPhasingSchedule)

Read in the Attrition Data, Clean It, and Get the Regression of the Attrition Rate

require(dplyr)
require(reshape2)

#read in attrition rate
attritionRates2009raw <- read.csv2(
  file = "LIPHAttritionRate2009Cohort.csv",
  header = TRUE,
  sep = ",",
  na.strings = "N/A"
)

#get attriton rate percentage each year
attritionRates2009 <- attritionRates2009raw %>% 
  melt(id.vars = c("Stat")) %>%
  rename(Year = variable) %>%
  dcast(Year ~ Stat) %>%
  filter(!is.na((AttritionRate))) %>%
  mutate(Year = substring(Year, 2)) %>%
  mutate(Year = as.numeric(Year)) %>%
  mutate(AttritionRate = as.numeric(AttritionRate)) %>%
  mutate(AttritionRatePercentage = round(AttritionRate * 100)) %>% 
  mutate(YearSinceVoucher = Year - 2009) %>%
  mutate(DataSet = "2009Study")

head(attritionRates2009)
##   Year Attrition AttritionRate StillLivingInLIPH AttritionRatePercentage
## 1 2010        30        0.0850               323                       8
## 2 2011        52        0.1610               271                      16
## 3 2012        20        0.0738               251                       7
## 4 2013        25        0.0996               226                      10
## 5 2014        27        0.1195               199                      12
## 6 2015        28        0.1410               171                      14
##   YearSinceVoucher   DataSet
## 1                1 2009Study
## 2                2 2009Study
## 3                3 2009Study
## 4                4 2009Study
## 5                5 2009Study
## 6                6 2009Study
#linear regression of the 2009 attrition rates
attritionRates2009lm <- lm(AttritionRatePercentage ~ YearSinceVoucher, attritionRates2009)

#data frame of the attrition rates each year
averagedAttritionRates2009 <- tibble(
  YearSinceVoucher = attritionRates2009$YearSinceVoucher,
  AttritionRatePercentage = predict(attritionRates2009lm, attritionRates2009),
  DataSet = "2009Study",
  DataType = "Averaged"
)

head(averagedAttritionRates2009)
## # A tibble: 6 x 4
##   YearSinceVoucher AttritionRatePercentage   DataSet DataType
##              <dbl>                   <dbl>     <chr>    <chr>
## 1                1                9.666667 2009Study Averaged
## 2                2               10.266667 2009Study Averaged
## 3                3               10.866667 2009Study Averaged
## 4                4               11.466667 2009Study Averaged
## 5                5               12.066667 2009Study Averaged
## 6                6               12.666667 2009Study Averaged

Clean the Proposal Data for Tenants and Attrition By Year

#Clean Up the Proposal Data
totalsAndAttrition <- proposedPhasingSchedule  %>%
  rbind(c("Tidewater", 618, 0, 0, 0, 0, 618, 2018))  %>%
  rbind(c("Young", 746, 0, 0, 0, 0, 746, 2018))  %>%
  rbind(c("Calvert", 310, 0, 0, 0, 0, 310, 2018)) %>%
  mutate(Attrition = as.numeric(Attrition)) %>%
  mutate(Remain = as.numeric(Remain)) %>%
  mutate(Year = as.numeric(Year)) %>%
  mutate(YearSinceVoucher = Year - 2018) %>%
  arrange(YearSinceVoucher)

head(totalsAndAttrition)
##     Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater   618   0            0           0         0    618 2018
## 2     Young   746   0            0           0         0    746 2018
## 3   Calvert   310   0            0           0         0    310 2018
## 4 Tidewater   618 173           83           0        36    327 2019
## 5     Young   746   0            0          60        60    746 2019
## 6   Calvert   310  36           17          39        22    274 2019
##   YearSinceVoucher
## 1                0
## 2                0
## 3                0
## 4                1
## 5                1
## 6                1
#Get Statistics Across All Housing Areas
totalsAndAttrition <- totalsAndAttrition %>%
  group_by(YearSinceVoucher) %>%
    mutate(Tenants = sum(Remain)) %>%
    mutate(TotalAttritionThatYear = sum(Attrition)) %>% 
  ungroup() %>%
  select(YearSinceVoucher, Tenants, TotalAttritionThatYear) %>%
  distinct(YearSinceVoucher, Tenants, TotalAttritionThatYear)

head(totalsAndAttrition)
## # A tibble: 6 x 3
##   YearSinceVoucher Tenants TotalAttritionThatYear
##              <dbl>   <dbl>                  <dbl>
## 1                0    1674                      0
## 2                1    1347                    118
## 3                2    1129                    102
## 4                3     852                     70
## 5                4     600                     49
## 6                5     386                     29

Calculate Attritioned per Year Using 2009 Regression Attrition Rate

#this library has a shift method that's helpful for checking the row before it
require(data.table)

#get the total tenants in the first year
totalFirstYear <- filter(totalsAndAttrition, YearSinceVoucher == 0)$Tenants[1]

#print out the total the first year
totalFirstYear
## [1] 1674
#get a the predict attrition rate using the regression from the 2009 study
totalsAndAttrition <- totalsAndAttrition %>% 
  mutate(TenantsLastYear = shift(
      totalsAndAttrition,
      fill = totalFirstYear, 
      give.names = TRUE
    )$Tenants_lag_1 #use shift to get the Tenants values from the previous row
  ) %>%
  mutate(
    ProposedAttritionRatePercentage = round(
      TotalAttritionThatYear/TenantsLastYear * 100
    )
  ) %>% #calculate the attrition rate from the proposal
  mutate(
    PredictedAttritionRatePercentage = predict(
      attritionRates2009lm, 
      totalsAndAttrition
    )
  ) %>% #calculated the attrition rate from the 2009 regression data
  mutate(
    PredictedAttritionedThatYear = round(
      TenantsLastYear * PredictedAttritionRatePercentage / 100
    )
  ) %>% #convert the predict attrition rate to actual people attritions
  mutate(
    PredictedAttritionedThatYear = ifelse(
      YearSinceVoucher < 0.5, 
      0, 
      PredictedAttritionedThatYear
    )
  ) %>% #set the predicted attrition rate to 0 for the first year when attrition hasn't started
  rename(ProposedAttritionedThatYear = TotalAttritionThatYear) %>% #rename a column
  mutate(Year = YearSinceVoucher + 2018) %>% #put back into tearm of actual years
  arrange(Year) %>% #sort by year
  select(Year, Tenants, ProposedAttritionedThatYear, PredictedAttritionedThatYear) #get columns

head(totalsAndAttrition)
## # A tibble: 6 x 4
##    Year Tenants ProposedAttritionedThatYear PredictedAttritionedThatYear
##   <dbl>   <dbl>                       <dbl>                        <dbl>
## 1  2018    1674                           0                            0
## 2  2019    1347                         118                          162
## 3  2020    1129                         102                          138
## 4  2021     852                          70                          123
## 5  2022     600                          49                           98
## 6  2023     386                          29                           72

Calculate Cumulative Attritioned People For Each Year

#use within to calculate the cumulative attritioned people proposed
totalsAndAttrition <- within(
  totalsAndAttrition, 
  ProposedAttritioned <- cumsum(ProposedAttritionedThatYear)
)

#use within to calculare the cumulative attritioned people predicted from 2009 data
totalsAndAttrition <- within(
  totalsAndAttrition, 
  PredictedAttritioned <- cumsum(PredictedAttritionedThatYear)
)

#do a select to have only the year and the cumulative columns
totalsAndAttrition <- totalsAndAttrition %>%
  select(Year, Tenants, ProposedAttritioned, PredictedAttritioned)

head(totalsAndAttrition)
## # A tibble: 6 x 4
##    Year Tenants ProposedAttritioned PredictedAttritioned
##   <dbl>   <dbl>               <dbl>                <dbl>
## 1  2018    1674                   0                    0
## 2  2019    1347                 118                  162
## 3  2020    1129                 220                  300
## 4  2021     852                 290                  423
## 5  2022     600                 339                  521
## 6  2023     386                 368                  593

Prepare Attrition Date to be Graphs By Tenants vs Attritioned, Proposal vs Predicted

#have attrition and tenants be in one column and another column to distinguish type
totalsVsAttrition <- totalsAndAttrition %>% 
  melt(id.vars = c("Year")) %>%
  rename(Situation = variable) %>%
  rename(People = value)

head(totalsVsAttrition)
##   Year Situation People
## 1 2018   Tenants   1674
## 2 2019   Tenants   1347
## 3 2020   Tenants   1129
## 4 2021   Tenants    852
## 5 2022   Tenants    600
## 6 2023   Tenants    386
#have a column for tenant vs attritioned another for which type of attrition
totalsVsAttrition <- totalsVsAttrition %>%
  mutate(IsTenant = Situation == "Tenants") %>% #column for tenants vs attritioned
  mutate(Situation = factor(
      Situation, 
      levels = c("Tenants", "ProposedAttritioned", "PredictedAttritioned")
    )
  ) %>% #change the ordering of the types
  arrange(Year) #sort by year

head(totalsVsAttrition)
##   Year            Situation People IsTenant
## 1 2018              Tenants   1674     TRUE
## 2 2018  ProposedAttritioned      0    FALSE
## 3 2018 PredictedAttritioned      0    FALSE
## 4 2019              Tenants   1347     TRUE
## 5 2019  ProposedAttritioned    118    FALSE
## 6 2019 PredictedAttritioned    162    FALSE

Plot Tenants, Proposal Total Attritioned, and Predicted Total Attritioned

require(ggplot2)

#plot tenants each year, proposal attritioned each year, and predicted attritioned each year
ggplot(
    totalsVsAttrition, 
    aes(x = Year, y = People, color = IsTenant, linetype = Situation)
  ) + 
  geom_line(size = 2) + 
  labs(title = "Total Tenants vs Total People Attritioned, Proposal vs Predicted", color = "", linetype = "") + 
  scale_color_manual(labels = c("Attritioned", "Tenants"), values = c("red", "blue")) + 
  scale_linetype_manual(labels = c("Tenants", "Proposal Attritioned", "Predicted Attritioned"), values = c(1, 2, 3))

#get and print the total number attritioned according to the proposal
proposedTotalAttritioned <- filter(totalsAndAttrition, Year == 2025)$ProposedAttritioned[1]
proposedTotalAttritioned
## [1] 375
#print the fraction of the total tenants that will be attritioned according to the proposal
proposedTotalAttritioned / totalFirstYear
## [1] 0.2240143
#get and print the total number attritioned according to the prediction
predictedTotalAttritioned <- filter(totalsAndAttrition, Year == 2025)$PredictedAttritioned[1]
predictedTotalAttritioned
## [1] 658
#print the fraction of the total tenants that will be attritioned according to the prefiction
predictedTotalAttritioned / totalFirstYear
## [1] 0.3930705