We import the libraries and data we need first.
library(tidyverse)
library(here)
sales <- read_csv(here::here("data/hdb_resale_2015_onwards.csv"))
Let’s take a look at the data
sales %>% glimpse()
## Observations: 79,100
## Variables: 11
## $ month <chr> "2015-01", "2015-01", "2015-01", "2015-01", "2015…
## $ town <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO…
## $ flat_type <chr> "3 ROOM", "3 ROOM", "3 ROOM", "3 ROOM", "3 ROOM",…
## $ block <chr> "174", "541", "163", "446", "557", "603", "709", …
## $ street_name <chr> "ANG MO KIO AVE 4", "ANG MO KIO AVE 10", "ANG MO …
## $ storey_range <chr> "07 TO 09", "01 TO 03", "01 TO 03", "01 TO 03", "…
## $ floor_area_sqm <dbl> 60, 68, 69, 68, 68, 67, 68, 68, 67, 68, 67, 68, 6…
## $ flat_model <chr> "Improved", "New Generation", "New Generation", "…
## $ lease_commence_date <dbl> 1986, 1981, 1980, 1979, 1980, 1980, 1980, 1981, 1…
## $ remaining_lease <dbl> 70, 65, 64, 63, 64, 64, 64, 65, 62, 69, 60, 64, 6…
## $ resale_price <dbl> 255000, 275000, 285000, 290000, 290000, 290000, 2…
Load up more libraries
library(lubridate)
library(forcats)
Clean up the data
sales <- sales %>%
mutate(month = ymd(month, truncated = 1),
flat_type = as_factor(flat_type),
storey_range = as_factor(storey_range),
flat_model = as_factor(flat_model))
Earlier lease commencement date.
sales %>%
filter(lease_commence_date == min(lease_commence_date))
## # A tibble: 9 x 11
## month town flat_type block street_name storey_range floor_area_sqm
## <date> <chr> <fct> <chr> <chr> <fct> <dbl>
## 1 2015-03-01 JURO… 3 ROOM 39 TEBAN GDNS… 01 TO 03 67
## 2 2015-08-01 JURO… 3 ROOM 37 TEBAN GDNS… 04 TO 06 67
## 3 2016-11-01 JURO… 3 ROOM 37 TEBAN GDNS… 04 TO 06 67
## 4 2017-01-01 JURO… 3 ROOM 37 TEBAN GDNS… 04 TO 06 67
## 5 2017-10-01 JURO… 3 ROOM 37 TEBAN GDNS… 01 TO 03 67
## 6 2017-11-01 JURO… 3 ROOM 37 TEBAN GDNS… 01 TO 03 67
## 7 2018-01-01 JURO… 3 ROOM 38 TEBAN GDNS… 01 TO 03 67
## 8 2018-06-01 JURO… 3 ROOM 37 TEBAN GDNS… 01 TO 03 67
## 9 2018-09-01 JURO… 3 ROOM 37 TEBAN GDNS… 01 TO 03 67
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## # remaining_lease <dbl>, resale_price <dbl>
Where are the earliest commencement flats?
sales %>%
filter(lease_commence_date == min(lease_commence_date)) %>%
select(town) %>%
distinct()
## # A tibble: 1 x 1
## town
## <chr>
## 1 JURONG EAST
What are the largest hdb flats?
sales %>%
arrange(desc(floor_area_sqm)) %>%
top_n(5)
## Selecting by resale_price
## # A tibble: 5 x 11
## month town flat_type block street_name storey_range floor_area_sqm
## <date> <chr> <fct> <chr> <chr> <fct> <dbl>
## 1 2018-09-01 KALL… 3 ROOM 41 JLN BAHAGIA 01 TO 03 237
## 2 2017-07-01 QUEE… EXECUTIVE 148 MEI LING ST 19 TO 21 156
## 3 2017-02-01 BISH… 5 ROOM 273B BISHAN ST … 40 TO 42 120
## 4 2018-08-01 TOA … 5 ROOM 139A LOR 1A TOA… 40 TO 42 117
## 5 2018-10-01 CENT… 5 ROOM 1D CANTONMENT… 40 TO 42 106
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## # remaining_lease <dbl>, resale_price <dbl>
What is the resale price of these big flats?
sales %>%
arrange(desc(floor_area_sqm)) %>%
top_n(5) %>%
select(resale_price)
## Selecting by resale_price
## # A tibble: 5 x 1
## resale_price
## <dbl>
## 1 1185000
## 2 1160000
## 3 1180000
## 4 1160888
## 5 1168000
What is the most expensive flat in Punggol?
sales %>%
filter(town == "PUNGGOL") %>%
filter(resale_price == max(resale_price))
## # A tibble: 1 x 11
## month town flat_type block street_name storey_range floor_area_sqm
## <date> <chr> <fct> <chr> <chr> <fct> <dbl>
## 1 2018-11-01 PUNG… 5 ROOM 305B PUNGGOL RD 16 TO 18 147
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## # remaining_lease <dbl>, resale_price <dbl>
Which town has on average the largest flats?
sales %>%
group_by(town) %>%
summarise(avg_size = mean(floor_area_sqm)) %>%
arrange(desc(avg_size))
## # A tibble: 26 x 2
## town avg_size
## <chr> <dbl>
## 1 PASIR RIS 123.
## 2 CHOA CHU KANG 111.
## 3 BUKIT TIMAH 110.
## 4 WOODLANDS 107.
## 5 SEMBAWANG 107.
## 6 BISHAN 107.
## 7 TAMPINES 105.
## 8 BUKIT PANJANG 105.
## 9 JURONG WEST 102.
## 10 HOUGANG 102.
## # … with 16 more rows
Which town has on average the cheapest flats per square meter?
sales %>%
mutate(price_per_sqm = resale_price / floor_area_sqm) %>%
group_by(town) %>%
summarise(avg_price_per_sqm = mean(price_per_sqm)) %>%
arrange(avg_price_per_sqm)
## # A tibble: 26 x 2
## town avg_price_per_sqm
## <chr> <dbl>
## 1 CHOA CHU KANG 3529.
## 2 WOODLANDS 3625.
## 3 SEMBAWANG 3729.
## 4 YISHUN 3907.
## 5 PASIR RIS 3920.
## 6 JURONG WEST 3928.
## 7 BUKIT PANJANG 4016.
## 8 BUKIT BATOK 4126.
## 9 HOUGANG 4182.
## 10 SENGKANG 4341.
## # … with 16 more rows