Read in the proposed phasing schedule as a csv.

#load the contents of the file into a data frame
proposedPhasingSchedule <- read.csv2(
  file = "ProposedPhasingSchedule.csv",
  header = TRUE,
  sep = ","
)

#print the top part of the data frame
head(proposedPhasingSchedule)
##     Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater   618 173           83           0        36    327 2019
## 2     Young   746   0            0          60        60    746 2019
## 3   Calvert   310  36           17          39        22    274 2019
## 4 Tidewater   327 124           75           0        20    109 2020
## 5     Young   746   0            0          60        60    746 2020
## 6   Calvert   274   0            0          22        22    274 2020

Clean the data to where we can display tenants per year.

#load a library for manipulating data frames
require(dplyr)

#clean up the data into a variable totalsByScheduleAndYear
totalsByScheduleAndYear <- proposedPhasingSchedule  %>%
  select(Housing, Total, Year) %>% #we only need these rows
  rbind(c("Tidewater", 0, 2026)) %>% #add a row for the year before attrition starts
  rbind(c("Young", 0, 2026)) %>% #add a row for the year before attrition starts
  rbind(c("Calvert", 0, 2026)) %>% #add a row for the year before attrition starts
  mutate(Total = as.numeric(Total)) %>% #convert the Total column data to numbers
  mutate(Year = as.numeric(Year)) %>% #convert the Year column data to numbers
  mutate(Year = Year - 1) #subtract a year because we want to track what happens by the end of the year

#print the last few rows of the data frame
tail(totalsByScheduleAndYear)
##      Housing Total Year
## 19 Tidewater     0 2024
## 20     Young    61 2024
## 21   Calvert    57 2024
## 22 Tidewater     0 2025
## 23     Young     0 2025
## 24   Calvert     0 2025

Plot The Tenants Each Year, distinguishing by housing area.

#load a library for plotting data
require(ggplot2)

#display a stacked line plot that shows how the tenant totals decline each year
ggplot(totalsByScheduleAndYear, aes(x = Year, y = Total, fill = Housing)) + 
  geom_area(position = 'stack') + 
  labs(title = "Decline in Tenants Year-to-Year, by Housing Area")

Clean the data for cumulative attrition each year.

#clean up the data into a totalsAndAttrition variable for tracking totals vs attrition
totalsAndAttrition <- proposedPhasingSchedule %>%
  rbind(c("Tidewater", 618, 0, 0, 0, 0, 618, 2018)) %>% #add the first year
  rbind(c("Young", 746, 0, 0, 0, 0, 746, 2018)) %>% #add the first year
  rbind(c("Calvert", 310, 0, 0, 0, 0, 310, 2018)) %>% #add the first year
  mutate(Attrition = as.numeric(Attrition)) %>% #convert column to numbers
  mutate(Remain = as.numeric(Remain)) %>% #convert column to numbers
  mutate(Year = as.numeric(Year)) %>% #convert column to numbers
  arrange(Year) #sort by year ascending

head(totalsAndAttrition)
##     Housing Total TPV Transfer.out Transfer.in Attrition Remain Year
## 1 Tidewater   618   0            0           0         0    618 2018
## 2     Young   746   0            0           0         0    746 2018
## 3   Calvert   310   0            0           0         0    310 2018
## 4 Tidewater   618 173           83           0        36    327 2019
## 5     Young   746   0            0          60        60    746 2019
## 6   Calvert   310  36           17          39        22    274 2019
#get the total tenants across all housing areas
#get the cumulative attrition across all the years
totalsAndAttrition <- totalsAndAttrition %>%
  group_by(Year) %>% #group by year so we can get the sum for all areas that year
    mutate(Tenants = sum(Remain)) %>% #get the sum of all remaining people that year
    mutate(TotalAttritionThatYear = sum(Attrition)) %>% #sum of all attritioned that year
    group_by(Housing) %>% #group by housing so we can total the attrition year to year
      mutate(Attritioned = cumsum(TotalAttritionThatYear)) %>% #all attritioned up to that year
  ungroup() %>% #ungroup because we're done with the group based operations
  select(Tenants, Attritioned, Year) #we only need these columns

head(totalsAndAttrition)
## # A tibble: 6 x 3
##   Tenants Attritioned  Year
##     <dbl>       <dbl> <dbl>
## 1    1674           0  2018
## 2    1674           0  2018
## 3    1674           0  2018
## 4    1347         118  2019
## 5    1347         118  2019
## 6    1347         118  2019

Plot the total tenants vs total attrition

#load a library for change the shape of the data frame
require(reshape2)

#convert to one column for tenant count and attrition count and a column to distinguish
totalsVsAttrition <- melt(totalsAndAttrition, id.vars = c("Year")) %>%
  rename(Situation = variable) %>%
  rename(People = value) %>%
  arrange(Year)

head(totalsVsAttrition)
##   Year   Situation People
## 1 2018     Tenants   1674
## 2 2018     Tenants   1674
## 3 2018     Tenants   1674
## 4 2018 Attritioned      0
## 5 2018 Attritioned      0
## 6 2018 Attritioned      0
tail(totalsVsAttrition)
##    Year   Situation People
## 43 2025     Tenants      0
## 44 2025     Tenants      0
## 45 2025     Tenants      0
## 46 2025 Attritioned    375
## 47 2025 Attritioned    375
## 48 2025 Attritioned    375
#plot the people each year, one color for tenants and one color for attritioned
ggplot(totalsVsAttrition, aes(x = Year, y = People, color = Situation)) + 
  geom_line(size = 2) + 
  labs(title = "Total Tenants vs Total People Attritioned, Year-to-Year ")