Read in the Proposal Data
#read in the proposal data
proposedPhasingSchedule <- read.csv2(
file = "ProposedPhasingSchedule.csv",
header = TRUE,
sep = ","
)
head(proposedPhasingSchedule)
Read in the Attrition Data, Clean It, and Get the Regression of the Attrition Rate
require(dplyr)
require(reshape2)
#read in attrition rate
attritionRates2009raw <- read.csv2(
file = "LIPHAttritionRate2009Cohort.csv",
header = TRUE,
sep = ",",
na.strings = "N/A"
)
#get attriton rate percentage each year
attritionRates2009 <- attritionRates2009raw %>%
melt(id.vars = c("Stat")) %>%
rename(Year = variable) %>%
dcast(Year ~ Stat) %>%
filter(!is.na((AttritionRate))) %>%
mutate(Year = substring(Year, 2)) %>%
mutate(Year = as.numeric(Year)) %>%
mutate(AttritionRate = as.numeric(AttritionRate)) %>%
mutate(AttritionRatePercentage = round(AttritionRate * 100)) %>%
mutate(YearSinceVoucher = Year - 2009) %>%
mutate(DataSet = "2009Study")
head(attritionRates2009)
## Year Attrition AttritionRate StillLivingInLIPH AttritionRatePercentage
## 1 2010 30 0.0850 323 8
## 2 2011 52 0.1610 271 16
## 3 2012 20 0.0738 251 7
## 4 2013 25 0.0996 226 10
## 5 2014 27 0.1195 199 12
## 6 2015 28 0.1410 171 14
## YearSinceVoucher DataSet
## 1 1 2009Study
## 2 2 2009Study
## 3 3 2009Study
## 4 4 2009Study
## 5 5 2009Study
## 6 6 2009Study
#linear regression of the 2009 attrition rates
attritionRates2009lm <- lm(AttritionRatePercentage ~ YearSinceVoucher, attritionRates2009)
#data frame of the attrition rates each year
averagedAttritionRates2009 <- tibble(
YearSinceVoucher = attritionRates2009$YearSinceVoucher,
AttritionRatePercentage = predict(attritionRates2009lm, attritionRates2009),
DataSet = "2009Study",
DataType = "Averaged"
)
head(averagedAttritionRates2009)
## # A tibble: 6 x 4
## YearSinceVoucher AttritionRatePercentage DataSet DataType
## <dbl> <dbl> <chr> <chr>
## 1 1 9.666667 2009Study Averaged
## 2 2 10.266667 2009Study Averaged
## 3 3 10.866667 2009Study Averaged
## 4 4 11.466667 2009Study Averaged
## 5 5 12.066667 2009Study Averaged
## 6 6 12.666667 2009Study Averaged
Clean the Proposal Data for Tenants and Attrition By Year
#Clean Up the Proposal Data
totalsAndAttrition <- proposedPhasingSchedule %>%
rbind(c("Tidewater", 618, 0, 0, 0, 0, 618, 2018)) %>%
rbind(c("Young", 746, 0, 0, 0, 0, 746, 2018)) %>%
rbind(c("Calvert", 310, 0, 0, 0, 0, 310, 2018)) %>%
mutate(Attrition = as.numeric(Attrition)) %>%
mutate(Remain = as.numeric(Remain)) %>%
mutate(Year = as.numeric(Year)) %>%
mutate(YearSinceVoucher = Year - 2018) %>%
arrange(YearSinceVoucher)
head(totalsAndAttrition)
## Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater 618 0 0 0 0 618 2018
## 2 Young 746 0 0 0 0 746 2018
## 3 Calvert 310 0 0 0 0 310 2018
## 4 Tidewater 618 173 83 0 36 327 2019
## 5 Young 746 0 0 60 60 746 2019
## 6 Calvert 310 36 17 39 22 274 2019
## YearSinceVoucher
## 1 0
## 2 0
## 3 0
## 4 1
## 5 1
## 6 1
#Get Statistics Across All Housing Areas
totalsAndAttrition <- totalsAndAttrition %>%
group_by(YearSinceVoucher) %>%
mutate(Tenants = sum(Remain)) %>%
mutate(TotalAttritionThatYear = sum(Attrition)) %>%
ungroup() %>%
select(YearSinceVoucher, Tenants, TotalAttritionThatYear) %>%
distinct(YearSinceVoucher, Tenants, TotalAttritionThatYear)
head(totalsAndAttrition)
## # A tibble: 6 x 3
## YearSinceVoucher Tenants TotalAttritionThatYear
## <dbl> <dbl> <dbl>
## 1 0 1674 0
## 2 1 1347 118
## 3 2 1129 102
## 4 3 852 70
## 5 4 600 49
## 6 5 386 29
Calculate Attritioned per Year Using 2009 Regression Attrition Rate
#this library has a shift method that's helpful for checking the row before it
require(data.table)
#get the total tenants in the first year
totalFirstYear <- filter(totalsAndAttrition, YearSinceVoucher == 0)$Tenants[1]
#print out the total the first year
totalFirstYear
## [1] 1674
#get a the predict attrition rate using the regression from the 2009 study
totalsAndAttrition <- totalsAndAttrition %>%
mutate(TenantsLastYear = shift(
totalsAndAttrition,
fill = totalFirstYear,
give.names = TRUE
)$Tenants_lag_1 #use shift to get the Tenants values from the previous row
) %>%
mutate(
ProposedAttritionRatePercentage = round(
TotalAttritionThatYear/TenantsLastYear * 100
)
) %>% #calculate the attrition rate from the proposal
mutate(
PredictedAttritionRatePercentage = predict(
attritionRates2009lm,
totalsAndAttrition
)
) %>% #calculated the attrition rate from the 2009 regression data
mutate(
PredictedAttritionedThatYear = round(
TenantsLastYear * PredictedAttritionRatePercentage / 100
)
) %>% #convert the predict attrition rate to actual people attritions
mutate(
PredictedAttritionedThatYear = ifelse(
YearSinceVoucher < 0.5,
0,
PredictedAttritionedThatYear
)
) %>% #set the predicted attrition rate to 0 for the first year when attrition hasn't started
rename(ProposedAttritionedThatYear = TotalAttritionThatYear) %>% #rename a column
mutate(Year = YearSinceVoucher + 2018) %>% #put back into tearm of actual years
arrange(Year) %>% #sort by year
select(Year, Tenants, ProposedAttritionedThatYear, PredictedAttritionedThatYear) #get columns
head(totalsAndAttrition)
## # A tibble: 6 x 4
## Year Tenants ProposedAttritionedThatYear PredictedAttritionedThatYear
## <dbl> <dbl> <dbl> <dbl>
## 1 2018 1674 0 0
## 2 2019 1347 118 162
## 3 2020 1129 102 138
## 4 2021 852 70 123
## 5 2022 600 49 98
## 6 2023 386 29 72
Calculate Cumulative Attritioned People For Each Year
#use within to calculate the cumulative attritioned people proposed
totalsAndAttrition <- within(
totalsAndAttrition,
ProposedAttritioned <- cumsum(ProposedAttritionedThatYear)
)
#use within to calculare the cumulative attritioned people predicted from 2009 data
totalsAndAttrition <- within(
totalsAndAttrition,
PredictedAttritioned <- cumsum(PredictedAttritionedThatYear)
)
#do a select to have only the year and the cumulative columns
totalsAndAttrition <- totalsAndAttrition %>%
select(Year, Tenants, ProposedAttritioned, PredictedAttritioned)
head(totalsAndAttrition)
## # A tibble: 6 x 4
## Year Tenants ProposedAttritioned PredictedAttritioned
## <dbl> <dbl> <dbl> <dbl>
## 1 2018 1674 0 0
## 2 2019 1347 118 162
## 3 2020 1129 220 300
## 4 2021 852 290 423
## 5 2022 600 339 521
## 6 2023 386 368 593
Prepare Attrition Date to be Graphs By Tenants vs Attritioned, Proposal vs Predicted
#have attrition and tenants be in one column and another column to distinguish type
totalsVsAttrition <- totalsAndAttrition %>%
melt(id.vars = c("Year")) %>%
rename(Situation = variable) %>%
rename(People = value)
head(totalsVsAttrition)
## Year Situation People
## 1 2018 Tenants 1674
## 2 2019 Tenants 1347
## 3 2020 Tenants 1129
## 4 2021 Tenants 852
## 5 2022 Tenants 600
## 6 2023 Tenants 386
#have a column for tenant vs attritioned another for which type of attrition
totalsVsAttrition <- totalsVsAttrition %>%
mutate(IsTenant = Situation == "Tenants") %>% #column for tenants vs attritioned
mutate(Situation = factor(
Situation,
levels = c("Tenants", "ProposedAttritioned", "PredictedAttritioned")
)
) %>% #change the ordering of the types
arrange(Year) #sort by year
head(totalsVsAttrition)
## Year Situation People IsTenant
## 1 2018 Tenants 1674 TRUE
## 2 2018 ProposedAttritioned 0 FALSE
## 3 2018 PredictedAttritioned 0 FALSE
## 4 2019 Tenants 1347 TRUE
## 5 2019 ProposedAttritioned 118 FALSE
## 6 2019 PredictedAttritioned 162 FALSE
Plot Tenants, Proposal Total Attritioned, and Predicted Total Attritioned
require(ggplot2)
#plot tenants each year, proposal attritioned each year, and predicted attritioned each year
ggplot(
totalsVsAttrition,
aes(x = Year, y = People, color = IsTenant, linetype = Situation)
) +
geom_line(size = 2) +
labs(title = "Total Tenants vs Total People Attritioned, Proposal vs Predicted", color = "", linetype = "") +
scale_color_manual(labels = c("Attritioned", "Tenants"), values = c("red", "blue")) +
scale_linetype_manual(labels = c("Tenants", "Proposal Attritioned", "Predicted Attritioned"), values = c(1, 2, 3))

#get and print the total number attritioned according to the proposal
proposedTotalAttritioned <- filter(totalsAndAttrition, Year == 2025)$ProposedAttritioned[1]
proposedTotalAttritioned
## [1] 375
#print the fraction of the total tenants that will be attritioned according to the proposal
proposedTotalAttritioned / totalFirstYear
## [1] 0.2240143
#get and print the total number attritioned according to the prediction
predictedTotalAttritioned <- filter(totalsAndAttrition, Year == 2025)$PredictedAttritioned[1]
predictedTotalAttritioned
## [1] 658
#print the fraction of the total tenants that will be attritioned according to the prefiction
predictedTotalAttritioned / totalFirstYear
## [1] 0.3930705