We import the libraries and data we need first.

library(tidyverse)
library(here)

sales <- read_csv(here::here("data/hdb_resale_2015_onwards.csv"))

Let’s take a look at the data

sales %>% glimpse()
## Observations: 79,100
## Variables: 11
## $ month               <chr> "2015-01", "2015-01", "2015-01", "2015-01", "2015…
## $ town                <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO…
## $ flat_type           <chr> "3 ROOM", "3 ROOM", "3 ROOM", "3 ROOM", "3 ROOM",…
## $ block               <chr> "174", "541", "163", "446", "557", "603", "709", …
## $ street_name         <chr> "ANG MO KIO AVE 4", "ANG MO KIO AVE 10", "ANG MO …
## $ storey_range        <chr> "07 TO 09", "01 TO 03", "01 TO 03", "01 TO 03", "…
## $ floor_area_sqm      <dbl> 60, 68, 69, 68, 68, 67, 68, 68, 67, 68, 67, 68, 6…
## $ flat_model          <chr> "Improved", "New Generation", "New Generation", "…
## $ lease_commence_date <dbl> 1986, 1981, 1980, 1979, 1980, 1980, 1980, 1981, 1…
## $ remaining_lease     <dbl> 70, 65, 64, 63, 64, 64, 64, 65, 62, 69, 60, 64, 6…
## $ resale_price        <dbl> 255000, 275000, 285000, 290000, 290000, 290000, 2…

Load up more libraries

library(lubridate)
library(forcats)

Clean up the data

sales <- sales %>%
  mutate(month = ymd(month, truncated = 1),
         flat_type = as_factor(flat_type),
         storey_range = as_factor(storey_range),
         flat_model = as_factor(flat_model))

Earlier lease commencement date.

sales %>%
  filter(lease_commence_date == min(lease_commence_date))
## # A tibble: 9 x 11
##   month      town  flat_type block street_name storey_range floor_area_sqm
##   <date>     <chr> <fct>     <chr> <chr>       <fct>                 <dbl>
## 1 2015-03-01 JURO… 3 ROOM    39    TEBAN GDNS… 01 TO 03                 67
## 2 2015-08-01 JURO… 3 ROOM    37    TEBAN GDNS… 04 TO 06                 67
## 3 2016-11-01 JURO… 3 ROOM    37    TEBAN GDNS… 04 TO 06                 67
## 4 2017-01-01 JURO… 3 ROOM    37    TEBAN GDNS… 04 TO 06                 67
## 5 2017-10-01 JURO… 3 ROOM    37    TEBAN GDNS… 01 TO 03                 67
## 6 2017-11-01 JURO… 3 ROOM    37    TEBAN GDNS… 01 TO 03                 67
## 7 2018-01-01 JURO… 3 ROOM    38    TEBAN GDNS… 01 TO 03                 67
## 8 2018-06-01 JURO… 3 ROOM    37    TEBAN GDNS… 01 TO 03                 67
## 9 2018-09-01 JURO… 3 ROOM    37    TEBAN GDNS… 01 TO 03                 67
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## #   remaining_lease <dbl>, resale_price <dbl>

Where are the earliest commencement flats?

sales %>%
  filter(lease_commence_date == min(lease_commence_date)) %>%
  select(town) %>%
  distinct()
## # A tibble: 1 x 1
##   town       
##   <chr>      
## 1 JURONG EAST

What are the largest hdb flats?

sales %>%
  arrange(desc(floor_area_sqm)) %>%
  top_n(5)
## Selecting by resale_price
## # A tibble: 5 x 11
##   month      town  flat_type block street_name storey_range floor_area_sqm
##   <date>     <chr> <fct>     <chr> <chr>       <fct>                 <dbl>
## 1 2018-09-01 KALL… 3 ROOM    41    JLN BAHAGIA 01 TO 03                237
## 2 2017-07-01 QUEE… EXECUTIVE 148   MEI LING ST 19 TO 21                156
## 3 2017-02-01 BISH… 5 ROOM    273B  BISHAN ST … 40 TO 42                120
## 4 2018-08-01 TOA … 5 ROOM    139A  LOR 1A TOA… 40 TO 42                117
## 5 2018-10-01 CENT… 5 ROOM    1D    CANTONMENT… 40 TO 42                106
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## #   remaining_lease <dbl>, resale_price <dbl>

What is the resale price of these big flats?

sales %>%
  arrange(desc(floor_area_sqm)) %>%
  top_n(5) %>%
  select(resale_price)
## Selecting by resale_price
## # A tibble: 5 x 1
##   resale_price
##          <dbl>
## 1      1185000
## 2      1160000
## 3      1180000
## 4      1160888
## 5      1168000

What is the most expensive flat in Punggol?

sales %>%
  filter(town == "PUNGGOL") %>%
  filter(resale_price == max(resale_price))
## # A tibble: 1 x 11
##   month      town  flat_type block street_name storey_range floor_area_sqm
##   <date>     <chr> <fct>     <chr> <chr>       <fct>                 <dbl>
## 1 2018-11-01 PUNG… 5 ROOM    305B  PUNGGOL RD  16 TO 18                147
## # … with 4 more variables: flat_model <fct>, lease_commence_date <dbl>,
## #   remaining_lease <dbl>, resale_price <dbl>

Which town has on average the largest flats?

sales %>%
  group_by(town) %>%
  summarise(avg_size = mean(floor_area_sqm)) %>%
  arrange(desc(avg_size))
## # A tibble: 26 x 2
##    town          avg_size
##    <chr>            <dbl>
##  1 PASIR RIS         123.
##  2 CHOA CHU KANG     111.
##  3 BUKIT TIMAH       110.
##  4 WOODLANDS         107.
##  5 SEMBAWANG         107.
##  6 BISHAN            107.
##  7 TAMPINES          105.
##  8 BUKIT PANJANG     105.
##  9 JURONG WEST       102.
## 10 HOUGANG           102.
## # … with 16 more rows

Which town has on average the cheapest flats per square meter?

sales %>%
  mutate(price_per_sqm = resale_price / floor_area_sqm) %>%
  group_by(town) %>%
  summarise(avg_price_per_sqm = mean(price_per_sqm)) %>%
  arrange(avg_price_per_sqm)
## # A tibble: 26 x 2
##    town          avg_price_per_sqm
##    <chr>                     <dbl>
##  1 CHOA CHU KANG             3529.
##  2 WOODLANDS                 3625.
##  3 SEMBAWANG                 3729.
##  4 YISHUN                    3907.
##  5 PASIR RIS                 3920.
##  6 JURONG WEST               3928.
##  7 BUKIT PANJANG             4016.
##  8 BUKIT BATOK               4126.
##  9 HOUGANG                   4182.
## 10 SENGKANG                  4341.
## # … with 16 more rows