Read in the proposed phasing schedule as a csv.
#load the contents of the file into a data frame
proposedPhasingSchedule <- read.csv2(
file = "ProposedPhasingSchedule.csv",
header = TRUE,
sep = ","
)
#print the top part of the data frame
head(proposedPhasingSchedule)
## Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater 618 173 83 0 36 327 2019
## 2 Young 746 0 0 60 60 746 2019
## 3 Calvert 310 36 17 39 22 274 2019
## 4 Tidewater 327 124 75 0 20 109 2020
## 5 Young 746 0 0 60 60 746 2020
## 6 Calvert 274 0 0 22 22 274 2020
Clean the data to where we can display tenants per year.
#load a library for manipulating data frames
require(dplyr)
#clean up the data into a variable totalsByScheduleAndYear
totalsByScheduleAndYear <- proposedPhasingSchedule %>%
select(Housing, Total, Year) %>% #we only need these rows
rbind(c("Tidewater", 0, 2026)) %>% #add a row for the year before attrition starts
rbind(c("Young", 0, 2026)) %>% #add a row for the year before attrition starts
rbind(c("Calvert", 0, 2026)) %>% #add a row for the year before attrition starts
mutate(Total = as.numeric(Total)) %>% #convert the Total column data to numbers
mutate(Year = as.numeric(Year)) %>% #convert the Year column data to numbers
mutate(Year = Year - 1) #subtract a year because we want to track what happens by the end of the year
#print the last few rows of the data frame
tail(totalsByScheduleAndYear)
## Housing Total Year
## 19 Tidewater 0 2024
## 20 Young 61 2024
## 21 Calvert 57 2024
## 22 Tidewater 0 2025
## 23 Young 0 2025
## 24 Calvert 0 2025
Plot The Tenants Each Year, distinguishing by housing area.
#load a library for plotting data
require(ggplot2)
#display a stacked line plot that shows how the tenant totals decline each year
ggplot(totalsByScheduleAndYear, aes(x = Year, y = Total, fill = Housing)) +
geom_area(position = 'stack') +
labs(title = "Decline in Tenants Year-to-Year, by Housing Area")

Clean the data for cumulative attrition each year.
#clean up the data into a totalsAndAttrition variable for tracking totals vs attrition
totalsAndAttrition <- proposedPhasingSchedule %>%
rbind(c("Tidewater", 618, 0, 0, 0, 0, 618, 2018)) %>% #add the first year
rbind(c("Young", 746, 0, 0, 0, 0, 746, 2018)) %>% #add the first year
rbind(c("Calvert", 310, 0, 0, 0, 0, 310, 2018)) %>% #add the first year
mutate(Attrition = as.numeric(Attrition)) %>% #convert column to numbers
mutate(Remain = as.numeric(Remain)) %>% #convert column to numbers
mutate(Year = as.numeric(Year)) %>% #convert column to numbers
arrange(Year) #sort by year ascending
head(totalsAndAttrition)
## Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater 618 0 0 0 0 618 2018
## 2 Young 746 0 0 0 0 746 2018
## 3 Calvert 310 0 0 0 0 310 2018
## 4 Tidewater 618 173 83 0 36 327 2019
## 5 Young 746 0 0 60 60 746 2019
## 6 Calvert 310 36 17 39 22 274 2019
#get the total tenants across all housing areas
#get the cumulative attrition across all the years
totalsAndAttrition <- totalsAndAttrition %>%
group_by(Year) %>% #group by year so we can get the sum for all areas that year
mutate(Tenants = sum(Remain)) %>% #get the sum of all remaining people that year
mutate(TotalAttritionThatYear = sum(Attrition)) %>% #sum of all attritioned that year
group_by(Housing) %>% #group by housing so we can total the attrition year to year
mutate(Attritioned = cumsum(TotalAttritionThatYear)) %>% #all attritioned up to that year
ungroup() %>% #ungroup because we're done with the group based operations
select(Tenants, Attritioned, Year) #we only need these columns
head(totalsAndAttrition)
## # A tibble: 6 x 3
## Tenants Attritioned Year
## <dbl> <dbl> <dbl>
## 1 1674 0 2018
## 2 1674 0 2018
## 3 1674 0 2018
## 4 1347 118 2019
## 5 1347 118 2019
## 6 1347 118 2019
Plot the total tenants vs total attrition
#load a library for change the shape of the data frame
require(reshape2)
#convert to one column for tenant count and attrition count and a column to distinguish
totalsVsAttrition <- melt(totalsAndAttrition, id.vars = c("Year")) %>%
rename(Situation = variable) %>%
rename(People = value) %>%
arrange(Year)
head(totalsVsAttrition)
## Year Situation People
## 1 2018 Tenants 1674
## 2 2018 Tenants 1674
## 3 2018 Tenants 1674
## 4 2018 Attritioned 0
## 5 2018 Attritioned 0
## 6 2018 Attritioned 0
tail(totalsVsAttrition)
## Year Situation People
## 43 2025 Tenants 0
## 44 2025 Tenants 0
## 45 2025 Tenants 0
## 46 2025 Attritioned 375
## 47 2025 Attritioned 375
## 48 2025 Attritioned 375
#plot the people each year, one color for tenants and one color for attritioned
ggplot(totalsVsAttrition, aes(x = Year, y = People, color = Situation)) +
geom_line(size = 2) +
labs(title = "Total Tenants vs Total People Attritioned, Year-to-Year ")
