Read in the Proposal Data
#Read in the Proposed Phasing Schedule
proposedPhasingSchedule <- read.csv2(
file = "ProposedPhasingSchedule.csv",
header = TRUE,
sep = ","
)
head(proposedPhasingSchedule)
## Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater 618 173 83 0 36 327 2019
## 2 Young 746 0 0 60 60 746 2019
## 3 Calvert 310 36 17 39 22 274 2019
## 4 Tidewater 327 124 75 0 20 109 2020
## 5 Young 746 0 0 60 60 746 2020
## 6 Calvert 274 0 0 22 22 274 2020
Read in the 2009 Study Data
#Read in the 2009 Attrition Study
attritionRates2009raw <- read.csv2(
file = "LIPHAttritionRate2009Cohort.csv",
header = TRUE,
sep = ",",
na.strings = "N/A"
)
head(attritionRates2009raw)
## Stat X2009 X2010 X2011 X2012 X2013 X2014 X2015
## 1 StillLivingInLIPH 353 323 271 251 226 199 171
## 2 Attrition NA 30 52 20 25 27 28
## 3 AttritionRate NA 0.0850 0.1610 0.0738 0.0996 0.1195 0.1410
Clean the Proposal Data for Yearly Attrition Rate
#library for manipulating data frames
require(dplyr)
#clean the data to get the attrition rate each year
projectedAttritionRates = proposedPhasingSchedule %>%
group_by(Year) %>% #group for statistics across all housing
mutate(TotalAll = sum(Total)) %>% #get total across all housing
mutate(AttritionAll = sum(Attrition)) #get attrition across all housing
head(projectedAttritionRates)
## # A tibble: 6 x 10
## # Groups: Year [2]
## Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## <fctr> <int> <int> <int> <int> <int> <int> <int>
## 1 Tidewater 618 173 83 0 36 327 2019
## 2 Young 746 0 0 60 60 746 2019
## 3 Calvert 310 36 17 39 22 274 2019
## 4 Tidewater 327 124 75 0 20 109 2020
## 5 Young 746 0 0 60 60 746 2020
## 6 Calvert 274 0 0 22 22 274 2020
## # ... with 2 more variables: TotalAll <int>, AttritionAll <int>
projectedAttritionRates <- projectedAttritionRates %>%
distinct(Year, TotalAll, AttritionAll) %>% #remove duplicates
ungroup() %>% #we have the statistics we need
mutate(AttritionRatePercentage = round(AttritionAll/TotalAll * 100)) %>% #calc attrition rate
select(Year, TotalAll, AttritionAll, AttritionRatePercentage) #we only need these columns
head(projectedAttritionRates)
## # A tibble: 6 x 4
## Year TotalAll AttritionAll AttritionRatePercentage
## <int> <int> <int> <dbl>
## 1 2019 1674 118 7
## 2 2020 1347 102 8
## 3 2021 1129 70 6
## 4 2022 852 49 6
## 5 2023 600 29 5
## 6 2024 386 5 1
Plot the Proposal Yearly Attrition Rate
#library for plotting data
require(ggplot2)
#plot the attrition rate each year
ggplot(projectedAttritionRates, aes(x = Year, y = AttritionRatePercentage)) +
geom_line(size = 2) +
labs(title = "Attrition Rate, Year-to-Year ")

Clean the 2009 Study Data for Yearly Attrition Rate
#library for changing the shape of the data
require(reshape2)
#go from column per year to one column for year
attritionRates2009 <- attritionRates2009raw %>%
melt(id.vars = c("Stat")) %>% #combine all columns except stat
rename(Year = variable) #change the variable column to year
head(attritionRates2009)
## Stat Year value
## 1 StillLivingInLIPH X2009 353
## 2 Attrition X2009 <NA>
## 3 AttritionRate X2009 <NA>
## 4 StillLivingInLIPH X2010 323
## 5 Attrition X2010 30
## 6 AttritionRate X2010 0.0850
#have one column for each measurement each year
attritionRates2009 <- attritionRates2009 %>%
dcast(Year ~ Stat) #make a column for each type of Stat that drives Year
head(attritionRates2009)
## Year Attrition AttritionRate StillLivingInLIPH
## 1 X2009 <NA> <NA> 353
## 2 X2010 30 0.0850 323
## 3 X2011 52 0.1610 271
## 4 X2012 20 0.0738 251
## 5 X2013 25 0.0996 226
## 6 X2014 27 0.1195 199
#clean to get the attrition rate each year
attritionRates2009 <- attritionRates2009 %>%
filter(!is.na((AttritionRate))) %>% #remove the column there was no attrition
mutate(Year = substring(Year, 2)) %>% #get rid of the X in front of the year
mutate(Year = as.numeric(Year)) %>% #convert Year to a number
mutate(AttritionRate = as.numeric(AttritionRate)) %>% #convert attrition rate to a number
select(Year, AttritionRate) #get only year and attrition rate
head(attritionRates2009)
## Year AttritionRate
## 1 2010 0.0850
## 2 2011 0.1610
## 3 2012 0.0738
## 4 2013 0.0996
## 5 2014 0.1195
## 6 2015 0.1410
Combine Proposal Data and 2009 Study Data
#standardize the 2009 study columns so it can be combined
attritionRates2009 <- attritionRates2009 %>%
mutate(AttritionRatePercentage = round(AttritionRate * 100)) %>% #round the attrition rate
mutate(YearSinceVoucher = Year - 2009) %>% #just count years instead of the specific year
mutate(DataSet = "2009Study") %>% #tag these findings as coming from the 2009 study
select(YearSinceVoucher, AttritionRatePercentage, DataSet) #only use these columns
#standardize the proposal columns so it can be combined
projectedAttritionRates <- projectedAttritionRates %>%
mutate(YearSinceVoucher = Year - 2018) %>% #just count years instead of the specific year
mutate(DataSet = "ProjectedAttritionRate") %>% #tag this as coming from the proposal
select(YearSinceVoucher, AttritionRatePercentage, DataSet) #only use these columns
head(attritionRates2009)
## YearSinceVoucher AttritionRatePercentage DataSet
## 1 1 8 2009Study
## 2 2 16 2009Study
## 3 3 7 2009Study
## 4 4 10 2009Study
## 5 5 12 2009Study
## 6 6 14 2009Study
head(projectedAttritionRates)
## # A tibble: 6 x 3
## YearSinceVoucher AttritionRatePercentage DataSet
## <dbl> <dbl> <chr>
## 1 1 7 ProjectedAttritionRate
## 2 2 8 ProjectedAttritionRate
## 3 3 6 ProjectedAttritionRate
## 4 4 6 ProjectedAttritionRate
## 5 5 5 ProjectedAttritionRate
## 6 6 1 ProjectedAttritionRate
#combined the dataframes into one
attritionRatesCompared = rbind(attritionRates2009, projectedAttritionRates)
head(attritionRatesCompared)
## YearSinceVoucher AttritionRatePercentage DataSet
## 1 1 8 2009Study
## 2 2 16 2009Study
## 3 3 7 2009Study
## 4 4 10 2009Study
## 5 5 12 2009Study
## 6 6 14 2009Study
Plot Proposal and 2009 Study Yearly Attrition Rate
#plot the attrition rates each year, using color for different data sets
ggplot(
attritionRatesCompared,
aes(x = YearSinceVoucher, y = AttritionRatePercentage, color = DataSet)
) +
geom_line(size = 2) +
labs(title = "Attrition Rates Compared Year-to-Year ")

Get a Linear Regression of 2009 Study Yearly Attrition Rate
#create a linear regression of the 2009 attrition rates
attritionRates2009lm <- lm(AttritionRatePercentage ~ YearSinceVoucher, attritionRates2009)
summary(attritionRates2009lm)
##
## Call:
## lm(formula = AttritionRatePercentage ~ YearSinceVoucher, data = attritionRates2009)
##
## Residuals:
## 1 2 3 4 5 6
## -1.66667 5.73333 -3.86667 -1.46667 -0.06667 1.33333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.0667 3.4374 2.638 0.0577 .
## YearSinceVoucher 0.6000 0.8826 0.680 0.5339
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.692 on 4 degrees of freedom
## Multiple R-squared: 0.1036, Adjusted R-squared: -0.1205
## F-statistic: 0.4621 on 1 and 4 DF, p-value: 0.5339
Add 2009 Study Regression Data
# library for explicitly creating data frames
require(tibble)
#create a data frame with the predicted values from the linear regression
averagedAttritionRates2009 <- tibble(
YearSinceVoucher = attritionRates2009$YearSinceVoucher, #year, from the 2009 data
AttritionRatePercentage = predict(attritionRates2009lm, attritionRates2009), #predicted value
DataSet = "2009Study", #tag as coming from the 2009 study
DataType = "Averaged" #tag as a regression, not the actual data
)
#add the regression values to our dataframe
attritionRatesCompared <- attritionRatesCompared %>%
mutate(DataType = "Literal") %>% #tag all data not from the regression as Literal
rbind(averagedAttritionRates2009) %>% #add the regression data
mutate(DataType = factor(DataType, levels = c("Literal", "Averaged"))) #reverse label order
head(attritionRatesCompared)
## YearSinceVoucher AttritionRatePercentage DataSet DataType
## 1 1 8 2009Study Literal
## 2 2 16 2009Study Literal
## 3 3 7 2009Study Literal
## 4 4 10 2009Study Literal
## 5 5 12 2009Study Literal
## 6 6 14 2009Study Literal
Plot Proposal Attrition Rate, 2009 Study Attrition Rate, and 2009 Study Regression Attrition Rate
#plot the attrition rates from the proposal and the study and the regression
ggplot(
attritionRatesCompared,
aes(
x = YearSinceVoucher,
y = AttritionRatePercentage,
color = DataSet,
linetype = DataType
)
) +
geom_line(size = 2) +
labs(
title = "Attrition Rates Compared",
x = "Years with Voucher",
y = "Attrition Rate (Percentage)"
) +
scale_color_manual(labels = c("2009 Study", "Proposal Estimation"), values = c("red", "blue")) +
scale_linetype_discrete(guide = FALSE)
