Read excel

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)

path <- "data/rentals"
xlsx_files <- list.files(path)
xlsx_files

##  [1] "abs-report-august-2016.xlsx"                   
##  [2] "abs-report-july-2016.xlsx"                     
##  [3] "abs-report-june-2016.xlsx"                     
##  [4] "abs-report-may2016.xlsx"                       
##  [5] "igate-absreportoct2016-ep.xlsx"                
##  [6] "rda-data-report-april-2017.xlsx"               
##  [7] "rda-data-report-feb-2017-.xlsx"                
##  [8] "rda-data-report-jan2017-ep.xlsx"               
##  [9] "rda-data-report-july-2017-.xlsx"               
## [10] "rda-data-report-june-2017-.xlsx"               
## [11] "rda-data-report-mar-2017-.xlsx"                
## [12] "rda-data-report-may-2017.xlsx"                 
## [13] "rda-datareportdec2016-erik.xlsx"               
## [14] "rental-bond-and-rental-data-november-2016.xlsx"

rentals <- read_xlsx(str_c(path,"/", xlsx_files[1]))

rentals <- map_dfr(str_c(path, "/", xlsx_files), read_xlsx)
rentals

## # A tibble: 18,263 x 12
##    `Bond Status` Suburb     State Postcode `Bond Amount` `Weekly Rent`
##    <chr>         <chr>      <chr>    <dbl>         <dbl>         <dbl>
##  1 Active        SMITHTON   TAS       7330          1000           250
##  2 Active        SMITHTON   TAS       7330           800           200
##  3 Active        SMITHTON   TAS       7330          1240           310
##  4 Active        STANLEY    TAS       7331           480           140
##  5 Active        SMITHTON   TAS       7330          1120           280
##  6 Active        LILEAH     TAS       7330           960           240
##  7 Active        STANLEY    TAS       7331           840           210
##  8 Active        PENGUIN    TAS       7316          1200           300
##  9 Active        ST HELENS  TAS       7216           980           245
## 10 Active        SCOTTSDALE TAS       7260           880           220
## # ... with 18,253 more rows, and 6 more variables:
## #   Bond Lodgement date (DD/MM/YYYY) <dttm>,
## #   Bond Activation date (DD/MM/YYYY) <dttm>, No of Bedrooms <dbl>,
## #   Dwelling/Premises Type <chr>, Length of Tenancy (In Months) <dbl>,
## #   Street Name <chr>

rentals <- rename(rentals, 
                  bond_lodgement_date = `Bond Lodgement date (DD/MM/YYYY)`)

Wrangling task 2: Create new columns Use mutate() to create new columns to represent the month and year of bond lodgement (Hint: use the lubridate package).

rentals <- rentals %>% 
  mutate(month = lubridate::month(bond_lodgement_date),
         year = lubridate::year(bond_lodgement_date))

Wrangling task 3: Remove observations Use filter() to remove observations that are missing the number of bedrooms, or have bedrooms that are not between 1 and 5.

rentals <- rentals %>% 
  filter(!is.na(`No of Bedrooms`), `No of Bedrooms` > 0, `No of Bedrooms` < 6)

Wrangling task 4: Compute average weekly rent Compute the average weekly rent by postcode, month, year and number of bedrooms and create a new column called time on your summarised dataset that represents the first day of each month.

summary <- rentals %>% 
  group_by(Postcode, month, year, `No of Bedrooms`) %>% 
  summarise(rent = mean(`Weekly Rent`)) %>% 
  mutate(time = lubridate::dmy(paste("01", month, year, sep = "-")))

## `summarise()` has grouped output by 'Postcode', 'month', 'year'. You can override using the `.groups` argument.

library(ggplot2)

ggplot(data = summary, aes(x = time, y = rent, group = Postcode)) +
  geom_line()

Then, create a line plot grouped by postcode of the average weekly rents over time. Your plot should appear similar to the following:

library(ggplot2)

## 
bedroom.labs <- c("One bedroom", "Two bedroom", "Three bedroom", "Four bedroom", "Five bedroom")
names(bedroom.labs) <- c(1:5)

ggplot(data = summary, aes(x = time, y = rent, group = Postcode)) +
  geom_line(color = "darkgrey", alpha = 0.3) +
  geom_line(data = filter(summary, Postcode == 7000), color = "red", show.legend = TRUE) + 
  facet_wrap(ncol = 1, "`No of Bedrooms`", labeller = labeller(`No of Bedrooms`= bedroom.labs)) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Type of house and average weekly rent by postcode 2016-2017", subtitle = "Hobart (7000) shown in red", x = "Month", y = "Average weekly rent in $")

Read excel

Emily Kothe

8/31/2021