library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
path <- "data/rentals"
xlsx_files <- list.files(path)
xlsx_files
## [1] "abs-report-august-2016.xlsx"
## [2] "abs-report-july-2016.xlsx"
## [3] "abs-report-june-2016.xlsx"
## [4] "abs-report-may2016.xlsx"
## [5] "igate-absreportoct2016-ep.xlsx"
## [6] "rda-data-report-april-2017.xlsx"
## [7] "rda-data-report-feb-2017-.xlsx"
## [8] "rda-data-report-jan2017-ep.xlsx"
## [9] "rda-data-report-july-2017-.xlsx"
## [10] "rda-data-report-june-2017-.xlsx"
## [11] "rda-data-report-mar-2017-.xlsx"
## [12] "rda-data-report-may-2017.xlsx"
## [13] "rda-datareportdec2016-erik.xlsx"
## [14] "rental-bond-and-rental-data-november-2016.xlsx"
rentals <- read_xlsx(str_c(path,"/", xlsx_files[1]))
rentals <- map_dfr(str_c(path, "/", xlsx_files), read_xlsx)
rentals
## # A tibble: 18,263 x 12
## `Bond Status` Suburb State Postcode `Bond Amount` `Weekly Rent`
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Active SMITHTON TAS 7330 1000 250
## 2 Active SMITHTON TAS 7330 800 200
## 3 Active SMITHTON TAS 7330 1240 310
## 4 Active STANLEY TAS 7331 480 140
## 5 Active SMITHTON TAS 7330 1120 280
## 6 Active LILEAH TAS 7330 960 240
## 7 Active STANLEY TAS 7331 840 210
## 8 Active PENGUIN TAS 7316 1200 300
## 9 Active ST HELENS TAS 7216 980 245
## 10 Active SCOTTSDALE TAS 7260 880 220
## # ... with 18,253 more rows, and 6 more variables:
## # Bond Lodgement date (DD/MM/YYYY) <dttm>,
## # Bond Activation date (DD/MM/YYYY) <dttm>, No of Bedrooms <dbl>,
## # Dwelling/Premises Type <chr>, Length of Tenancy (In Months) <dbl>,
## # Street Name <chr>
rentals <- rename(rentals,
bond_lodgement_date = `Bond Lodgement date (DD/MM/YYYY)`)
Wrangling task 2: Create new columns Use mutate() to create new columns to represent the month and year of bond lodgement (Hint: use the lubridate package).
rentals <- rentals %>%
mutate(month = lubridate::month(bond_lodgement_date),
year = lubridate::year(bond_lodgement_date))
Wrangling task 3: Remove observations Use filter() to remove observations that are missing the number of bedrooms, or have bedrooms that are not between 1 and 5.
rentals <- rentals %>%
filter(!is.na(`No of Bedrooms`), `No of Bedrooms` > 0, `No of Bedrooms` < 6)
Wrangling task 4: Compute average weekly rent Compute the average weekly rent by postcode, month, year and number of bedrooms and create a new column called time on your summarised dataset that represents the first day of each month.
summary <- rentals %>%
group_by(Postcode, month, year, `No of Bedrooms`) %>%
summarise(rent = mean(`Weekly Rent`)) %>%
mutate(time = lubridate::dmy(paste("01", month, year, sep = "-")))
## `summarise()` has grouped output by 'Postcode', 'month', 'year'. You can override using the `.groups` argument.
library(ggplot2)
ggplot(data = summary, aes(x = time, y = rent, group = Postcode)) +
geom_line()
Then, create a line plot grouped by postcode of the average weekly rents over time. Your plot should appear similar to the following:
library(ggplot2)
##
bedroom.labs <- c("One bedroom", "Two bedroom", "Three bedroom", "Four bedroom", "Five bedroom")
names(bedroom.labs) <- c(1:5)
ggplot(data = summary, aes(x = time, y = rent, group = Postcode)) +
geom_line(color = "darkgrey", alpha = 0.3) +
geom_line(data = filter(summary, Postcode == 7000), color = "red", show.legend = TRUE) +
facet_wrap(ncol = 1, "`No of Bedrooms`", labeller = labeller(`No of Bedrooms`= bedroom.labs)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Type of house and average weekly rent by postcode 2016-2017", subtitle = "Hobart (7000) shown in red", x = "Month", y = "Average weekly rent in $")