Data on residential house prices is obtained from the residential property price register. This data contains date of sale and price, location and brief description of residential property sold across Ireland.

1. Reading in the data

2. Change the data to a tibble and Inspect the data frame

res_prop_df <-as_tibble(res_prop_data)
colnames(res_prop_df)
## [1] "Date of Sale (dd/mm/yyyy)" "Address"                  
## [3] "County"                    "Eircode"                  
## [5] "Price (€)"                 "Not Full Market Price"    
## [7] "VAT Exclusive"             "Description of Property"  
## [9] "Property Size Description"

Inspect data frame structure & locate missing values

head(res_prop_df)%>%slice(1)
res_prop_df %>% summarise_all(~sum(is.na(.)))

Missing values located in Eircode column (introduced in July 2015) and Property size description.

3. Date and Price columns are characters:

Rename date column and change to type date and numeric. Remove euro sign from price column and remove the “property size desc” column

res_prop_df <- res_prop_df %>%
  rename(date = "Date of Sale (dd/mm/yyyy)", county = "County", price = "Price (€)")

# Remove the euro sign                                                                                   
res_prop_df$price <- as.numeric(gsub("[^0-9.]", "", res_prop_df$price))


# Change the date column to type date from char
res_prop_df <- res_prop_df %>%
  mutate(date = as.Date(date, format = "%d/%m/%Y")) 
 res_prop_df %>% filter("Not Full Market Price" == "Yes")
res_prop_df <- res_prop_df %>%select(-"Property Size Description")
tail(tail(res_prop_df))

4. Find the number of properties where Not Full Market price is “Yes”

Presuming “No” in this column implies full market price, filter the VAT Exclusive columns for “Yes”. No “Yes” entries in these columns, filtering not required as the data is consistent.

res_prop_df %>% filter("Not Full Market Price" == "Yes")
res_prop_df %>% filter("VAT Exculsive" == "Yes")

5. Summarise the number of residential sales by weeks, grouping by county, setting weeks with no sales to zero.

sales_summary_tbl <- res_prop_df %>%select("date","county")%>%
  
group_by(county) %>% 
  
  summarise_by_time(
    .date_var = date,
    .by="weeks",
    value =n()
  ) %>%
  
  pad_by_time(
    .date_var = date,
    .by="weeks",
    .pad_value = 0
  ) %>%
  ungroup() 
sales_summary_tbl%>%slice(1:3)

6. Investigate time series of weekly sales by a subset of counties:

sales_summary_tbl %>% filter(county %in% c("Dublin","Longford"))%>% plot_time_series(
                  .date_var = date, 
                    .value = value, 
                   #.color_var = week(date),
                   .facet_vars = county,
                   .smooth = T,
                   .trelliscope = F,
                   .facet_ncol = 6,
                   .interactive = T,
                   .facet_scales = "free_y",
                   .plotly_slider = F
                   )

7. Filter county Dublin plot time series of weekly sales.

sales_summary_tbl %>% filter(county %in% c("Dublin")) %>% 
  plot_time_series(
                  .date_var = date, 
                    .value = value, 
                   .facet_vars = county,
                   .smooth = T,
                   .trelliscope = F,
                   .facet_ncol = 5,
                   .interactive = T,
                   .facet_scales = "free_y",
                   #.smooth_period =
                   .plotly_slider = F,
                   .title = "Weekly Residential Property Sales 2010 - 2023",

                   )

8. Yearly summary data of residential sales for Dublin

sales_summary_tbl %>% filter(county %in% c("Dublin")) %>%  
  
  
  plot_time_series_boxplot(
  date, value,
  .period      = "1 year",
  .smooth_func =  median, 
  .facet_ncol  = 2,
  .interactive = T,
  .plotly_slider = T,
  .smooth_message = T,
  .smooth_span = 2,
  .title = "Yearly Summary Residential Property Sales  ",
  )

9. Inspect trend and seasonality

Filter the df for county dublin.The trend graph clearly marks the effects of the 2008 financial crisis and the covid lockdowns.

dublin_res_sales <- sales_summary_tbl %>% filter(county %in% c("Dublin"))

dublin_res_sales %>%
  
  plot_stl_diagnostics(.date_var = date,
                       .value = log1p(value),
                       .frequency = "12 month",
                       .trend = "1 year")

10. Identify possible outliers in Sales data

dublin_res_sales%>%# mutate(value = log1p(value)) %>% group_by(county)%>%
  plot_anomaly_diagnostics(.message = F,
  
  .date_var = date,
  .value = value,
  .alpha = 0.15,
  .max_anomalies = 0.15,
  .ribbon_alpha = 0.45,
  .frequency = "1 month",
  .title ="Anomaly Detection"
)

11. Visualize cleaned time series:

Removing weeks with zero sales, and inspect the cleaned time series.

dublin_res_sales%>%select(-county)%>%
  mutate(value_na = ifelse(value == 0 , NA, value)) %>%
  mutate(value_cleaned = ts_clean_vec(lambda = "auto",value_na, period = 30)) %>%
  
  pivot_longer(-date)%>%
  plot_time_series(date,value,name,
                   .smooth = F,
                   .title = "Cleaned and Original time series")

13. Visualise Residential Sales Moving Averages (Mean) 2010 - 2023

n<- dublin_res_sales %>%
  tk_augment_slidify(
    .align = "center",
    .value = value,
    .period = c(30,90,180,365),
    .f = mean,
    .partial = TRUE,
    .names = stringr::str_c("MA_", c(30,90,180,365))
  ) %>%
  ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Sales: ", scales::dollar(value,suffix="",prefix="")))) +
  #geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
  geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_30,suffix="",prefix=""))), group = 1)+
  geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_90,suffix="",prefix=""))), group = 1)+
  geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_180,suffix="",prefix=""))), group = 1)+
  geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_365,suffix="",prefix=""))), group = 1)+
  theme_minimal() + 
  scale_y_continuous("Residential Sales",
                     breaks = scales::breaks_extended(15),
                     labels = scales::label_dollar(suffix="000",prefix=""))+
  
  scale_x_date(NULL,
               breaks = scales::breaks_width("9 months"), 
               labels = scales::label_date_short()
  ) + 
  ggtitle(" Dublin Sales Moving Average (Mean)")+
  xlab("")

# Modify the appearance in plotly
ggplotly(n, tooltip = "text") %>%
 style(
    line = list(
      color = c("orange", "green", "red", "blue")
      
    )
  )

Residential Price Analysis 2010 - 2023

14. Aggregate the National price data by day

mean_price_data_daily_national <- res_prop_df %>% select(date,price) %>% rename(value = "price")  %>%
  summarise_by_time(
    .date_var = date,
    .by="days",
    across(value:value, .fns = mean),
  ) %>%
  
  pad_by_time(
    .date_var = date,
    .by="days",
    .pad_value = 0
  )

15. Create the 30,90,180 and 365 day moving averages & visualise the time series plots of the moving averages

n <-  mean_price_data_daily_national %>%
  tk_augment_slidify(
    .align = "center",
    .value = value,
    .period = c(30,90,180, 365),
    .f = mean,
    .partial = TRUE,
    .names = stringr::str_c("MA_", c(30,90,180,365))    
  ) %>%
  
  
  ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(value,suffix="",prefix="")))) +
  #geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
  geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_30,suffix="",prefix="€"))), group = 1, color = "blue")+
  geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_90,suffix="",prefix="€"))), group = 1, color = "red")+
  geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_180,suffix="",prefix="€"))), group = 1, color = "green")+
  geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_365,suffix="",prefix="€"))), group = 1, color = "orange")+
  theme_minimal() + 
  scale_y_continuous("",
                     breaks = scales::breaks_extended(15),
                     labels = scales::label_dollar(suffix="",prefix="€"))+
  
  scale_x_date(NULL,
               breaks = scales::breaks_width("9 months"), 
               labels = scales::label_date_short()
  ) + 
  ggtitle("National Price Moving Averages")+
  xlab("")+
  theme(legend.position = "right")


ggplotly(n, tooltip = "text") 
  • Post Financial Crisis 2008 downward trends to a low late 2012, Covid 19 in February 2019, with 30 and 90 day plots below the 180 and 365 moving averages in late 2023 indicating the possibility of a downturn in mean residential property prices in Dublin.

16. Covid 19 time period 2019 - 2023

 h<-mean_price_data_daily_national %>%
  filter_by_time(.start_date = "2022") %>%
  tk_augment_slidify(
    .align = "center",
    .value = value,
    .period = c(30,90,180, 365),
    .f = mean,
    .partial = TRUE,
    .names = stringr::str_c("MA_", c(30,90,180,365))    
  ) %>%
  
  ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(value,suffix="",prefix="")))) +
  #geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
  geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_30,suffix="",prefix="€"))), group = 1, color = "blue")+
  geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_90,suffix="",prefix="€"))), group = 1, color = "red")+
  geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_180,suffix="",prefix="€"))), group = 1, color = "green")+
  geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_365,suffix="",prefix="€"))), group = 1, color = "orange")+
  theme_minimal() + 
  scale_y_continuous("",
                     breaks = scales::breaks_extended(15),
                     labels = scales::label_dollar(suffix="",prefix="€"))+
  
  scale_x_date(NULL,
               breaks = scales::breaks_width("9 months"), 
               labels = scales::label_date_short()
  ) + 
  ggtitle("National Price Moving Averages ")+
  xlab("")+
  theme_minimal()


ggplotly(h, tooltip = "text") 
  • 90 day moving average moves with the 30 day moving average.
  • The current trends in Q3 of 2023 sees the 30 and 90 day moving averages move below the 180 and 356 day averages.
  • This crossover is suggesting a potential shift in the trend, indicating that the prices is on a downward trend.

17. Clean the mean daily price of zeros and visualise the cleaned and original

mean_price_data_daily_national%>%
  filter_by_time(.start_date = "2022") %>%
  mutate(value_na = ifelse(value == 0, NA,value)) %>%
  mutate(value_cleaned = ts_clean_vec(lambda = "auto",value_na, period = 7)) %>%
  
  pivot_longer(-date)%>%
  plot_time_series(date,value,name,
                   .smooth = F,
                   .title = "Cleaned and Original TS")

18. Aggregate by year the mean house price by county

all_county_data <- res_prop_df %>%select("date","county" ,"price") %>%select("date","county","price") %>% rename(value = "price") %>%
  
group_by(county) %>% 
  
  summarise_by_time(
    .date_var = date,
    .by="year",
    across(value:value, .fns = mean)
  ) %>%
  
  pad_by_time(
    .date_var = date,
    .by="year",
    .pad_value = 0
  ) %>%
  ungroup() 

19. Create a heat map of Mean house price by location over time.

heat_map<- ggplot(all_county_data, aes(y = reorder(county, value), x = reorder(year(date),date), fill = value,
                                       text = paste0("County: ",county, "<br>Mean Price: €", round(value,digits=0))))+
  geom_tile() +
  geom_text(aes(label = floor(value)), size = 2.5, color = "white") +
  scale_fill_viridis(discrete = FALSE, option = "turbo",labels = scales::number_format(scale = 0.001, suffix = "K", accuracy = 1)) +
  theme(legend.position = "right") +
  ggtitle("") +
  xlab("") +
  ylab("") +
  labs(fill = "Mean Price")

ggplotly(heat_map, tooltip = "text") 
  • The fallout from 2008 can be identified as the dark blue are in the heat map, pushing up into the top five most expensive counties.
  • As time passes the plot changes in colour, with light blues greens and yellows dominating as the mean price rises.
  • By 2023 darker oranges and reds begin to appear, with mean values in the region of 500K appearing in Dublin & Wicklow

20. National Heat Map by Median Yearly price

all_county_median_data <- res_prop_df %>%select("date","county" ,"price") %>%select("date","county","price") %>% rename(value = "price") %>%
  
group_by(county) %>% 
  
  summarise_by_time(
    .date_var = date,
    .by="year",
    across(value:value, .fns = median)
  ) %>%
  
  pad_by_time(
    .date_var = date,
    .by="year",
    .pad_value = 0
  ) %>%
  ungroup() 
heat_map_median<- ggplot(all_county_median_data, aes(y = reorder(county, value), x = reorder(year(date),date), fill = value,
                                       text = paste0("County: ",county, "<br>Median Price: €", round(value,digits=1))))+
  geom_tile() +
  geom_text(aes(label = floor(value)), size = 2, color = "white") +
  scale_fill_viridis(discrete = FALSE, option = "turbo",labels = scales::number_format(scale = 0.001, suffix = "K", accuracy = 1)) +
  #theme(legend.position = "right") +
  ggtitle("") +
  xlab("") +
  ylab("") +
  labs(fill = "Median Price")

ggplotly(heat_map_median, tooltip = "text") 

Median house prices illustrate the value with 50% of the prices above and below this value. The median is less influenced by larger variance in house prices when comparing price by location. The median house price in Longford is €160000 with the mean price €183452, where as in Dublin the mean is €583957 with the median €410000.