res_prop_df <-as_tibble(res_prop_data)
colnames(res_prop_df)
## [1] "Date of Sale (dd/mm/yyyy)" "Address"
## [3] "County" "Eircode"
## [5] "Price (€)" "Not Full Market Price"
## [7] "VAT Exclusive" "Description of Property"
## [9] "Property Size Description"
head(res_prop_df)%>%slice(1)
res_prop_df %>% summarise_all(~sum(is.na(.)))
Missing values located in Eircode column (introduced in July 2015) and Property size description.
Rename date column and change to type date and numeric. Remove euro sign from price column and remove the “property size desc” column
res_prop_df <- res_prop_df %>%
rename(date = "Date of Sale (dd/mm/yyyy)", county = "County", price = "Price (€)")
# Remove the euro sign
res_prop_df$price <- as.numeric(gsub("[^0-9.]", "", res_prop_df$price))
# Change the date column to type date from char
res_prop_df <- res_prop_df %>%
mutate(date = as.Date(date, format = "%d/%m/%Y"))
res_prop_df %>% filter("Not Full Market Price" == "Yes")
res_prop_df <- res_prop_df %>%select(-"Property Size Description")
tail(tail(res_prop_df))
Presuming “No” in this column implies full market price, filter the VAT Exclusive columns for “Yes”. No “Yes” entries in these columns, filtering not required as the data is consistent.
res_prop_df %>% filter("Not Full Market Price" == "Yes")
res_prop_df %>% filter("VAT Exculsive" == "Yes")
sales_summary_tbl <- res_prop_df %>%select("date","county")%>%
group_by(county) %>%
summarise_by_time(
.date_var = date,
.by="weeks",
value =n()
) %>%
pad_by_time(
.date_var = date,
.by="weeks",
.pad_value = 0
) %>%
ungroup()
sales_summary_tbl%>%slice(1:3)
sales_summary_tbl %>% filter(county %in% c("Dublin","Longford"))%>% plot_time_series(
.date_var = date,
.value = value,
#.color_var = week(date),
.facet_vars = county,
.smooth = T,
.trelliscope = F,
.facet_ncol = 6,
.interactive = T,
.facet_scales = "free_y",
.plotly_slider = F
)
sales_summary_tbl %>% filter(county %in% c("Dublin")) %>%
plot_time_series(
.date_var = date,
.value = value,
.facet_vars = county,
.smooth = T,
.trelliscope = F,
.facet_ncol = 5,
.interactive = T,
.facet_scales = "free_y",
#.smooth_period =
.plotly_slider = F,
.title = "Weekly Residential Property Sales 2010 - 2023",
)
sales_summary_tbl %>% filter(county %in% c("Dublin")) %>%
plot_time_series_boxplot(
date, value,
.period = "1 year",
.smooth_func = median,
.facet_ncol = 2,
.interactive = T,
.plotly_slider = T,
.smooth_message = T,
.smooth_span = 2,
.title = "Yearly Summary Residential Property Sales ",
)
Filter the df for county dublin.The trend graph clearly marks the effects of the 2008 financial crisis and the covid lockdowns.
dublin_res_sales <- sales_summary_tbl %>% filter(county %in% c("Dublin"))
dublin_res_sales %>%
plot_stl_diagnostics(.date_var = date,
.value = log1p(value),
.frequency = "12 month",
.trend = "1 year")
dublin_res_sales%>%# mutate(value = log1p(value)) %>% group_by(county)%>%
plot_anomaly_diagnostics(.message = F,
.date_var = date,
.value = value,
.alpha = 0.15,
.max_anomalies = 0.15,
.ribbon_alpha = 0.45,
.frequency = "1 month",
.title ="Anomaly Detection"
)
Removing weeks with zero sales, and inspect the cleaned time series.
dublin_res_sales%>%select(-county)%>%
mutate(value_na = ifelse(value == 0 , NA, value)) %>%
mutate(value_cleaned = ts_clean_vec(lambda = "auto",value_na, period = 30)) %>%
pivot_longer(-date)%>%
plot_time_series(date,value,name,
.smooth = F,
.title = "Cleaned and Original time series")
n<- dublin_res_sales %>%
tk_augment_slidify(
.align = "center",
.value = value,
.period = c(30,90,180,365),
.f = mean,
.partial = TRUE,
.names = stringr::str_c("MA_", c(30,90,180,365))
) %>%
ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Sales: ", scales::dollar(value,suffix="",prefix="")))) +
#geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_30,suffix="",prefix=""))), group = 1)+
geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_90,suffix="",prefix=""))), group = 1)+
geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_180,suffix="",prefix=""))), group = 1)+
geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Sales: ", scales::dollar(MA_365,suffix="",prefix=""))), group = 1)+
theme_minimal() +
scale_y_continuous("Residential Sales",
breaks = scales::breaks_extended(15),
labels = scales::label_dollar(suffix="000",prefix=""))+
scale_x_date(NULL,
breaks = scales::breaks_width("9 months"),
labels = scales::label_date_short()
) +
ggtitle(" Dublin Sales Moving Average (Mean)")+
xlab("")
# Modify the appearance in plotly
ggplotly(n, tooltip = "text") %>%
style(
line = list(
color = c("orange", "green", "red", "blue")
)
)
mean_price_data_daily_national <- res_prop_df %>% select(date,price) %>% rename(value = "price") %>%
summarise_by_time(
.date_var = date,
.by="days",
across(value:value, .fns = mean),
) %>%
pad_by_time(
.date_var = date,
.by="days",
.pad_value = 0
)
n <- mean_price_data_daily_national %>%
tk_augment_slidify(
.align = "center",
.value = value,
.period = c(30,90,180, 365),
.f = mean,
.partial = TRUE,
.names = stringr::str_c("MA_", c(30,90,180,365))
) %>%
ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(value,suffix="",prefix="")))) +
#geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_30,suffix="",prefix="€"))), group = 1, color = "blue")+
geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_90,suffix="",prefix="€"))), group = 1, color = "red")+
geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_180,suffix="",prefix="€"))), group = 1, color = "green")+
geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_365,suffix="",prefix="€"))), group = 1, color = "orange")+
theme_minimal() +
scale_y_continuous("",
breaks = scales::breaks_extended(15),
labels = scales::label_dollar(suffix="",prefix="€"))+
scale_x_date(NULL,
breaks = scales::breaks_width("9 months"),
labels = scales::label_date_short()
) +
ggtitle("National Price Moving Averages")+
xlab("")+
theme(legend.position = "right")
ggplotly(n, tooltip = "text")
h<-mean_price_data_daily_national %>%
filter_by_time(.start_date = "2022") %>%
tk_augment_slidify(
.align = "center",
.value = value,
.period = c(30,90,180, 365),
.f = mean,
.partial = TRUE,
.names = stringr::str_c("MA_", c(30,90,180,365))
) %>%
ggplot(aes(date, value,text = paste0("MA:","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(value,suffix="",prefix="")))) +
#geom_line(aes(y = value,text = paste0("Monthly MA","<br>Date: ", date, "<br>Price: ", scales::dollar(value,suffix="",prefix="€"))), group = 1) +
geom_line(aes(y = MA_30,text = paste0("30 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_30,suffix="",prefix="€"))), group = 1, color = "blue")+
geom_line(aes(y = MA_90, text = paste0("90 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_90,suffix="",prefix="€"))), group = 1, color = "red")+
geom_line(aes(y = MA_180, text = paste0("180 Day MA","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_180,suffix="",prefix="€"))), group = 1, color = "green")+
geom_line(aes(y = MA_365,text = paste0("365 Day","<br>Date: ", date, "<br>Mean Price: ", scales::dollar(MA_365,suffix="",prefix="€"))), group = 1, color = "orange")+
theme_minimal() +
scale_y_continuous("",
breaks = scales::breaks_extended(15),
labels = scales::label_dollar(suffix="",prefix="€"))+
scale_x_date(NULL,
breaks = scales::breaks_width("9 months"),
labels = scales::label_date_short()
) +
ggtitle("National Price Moving Averages ")+
xlab("")+
theme_minimal()
ggplotly(h, tooltip = "text")
mean_price_data_daily_national%>%
filter_by_time(.start_date = "2022") %>%
mutate(value_na = ifelse(value == 0, NA,value)) %>%
mutate(value_cleaned = ts_clean_vec(lambda = "auto",value_na, period = 7)) %>%
pivot_longer(-date)%>%
plot_time_series(date,value,name,
.smooth = F,
.title = "Cleaned and Original TS")
all_county_data <- res_prop_df %>%select("date","county" ,"price") %>%select("date","county","price") %>% rename(value = "price") %>%
group_by(county) %>%
summarise_by_time(
.date_var = date,
.by="year",
across(value:value, .fns = mean)
) %>%
pad_by_time(
.date_var = date,
.by="year",
.pad_value = 0
) %>%
ungroup()
heat_map<- ggplot(all_county_data, aes(y = reorder(county, value), x = reorder(year(date),date), fill = value,
text = paste0("County: ",county, "<br>Mean Price: €", round(value,digits=0))))+
geom_tile() +
geom_text(aes(label = floor(value)), size = 2.5, color = "white") +
scale_fill_viridis(discrete = FALSE, option = "turbo",labels = scales::number_format(scale = 0.001, suffix = "K", accuracy = 1)) +
theme(legend.position = "right") +
ggtitle("") +
xlab("") +
ylab("") +
labs(fill = "Mean Price")
ggplotly(heat_map, tooltip = "text")
all_county_median_data <- res_prop_df %>%select("date","county" ,"price") %>%select("date","county","price") %>% rename(value = "price") %>%
group_by(county) %>%
summarise_by_time(
.date_var = date,
.by="year",
across(value:value, .fns = median)
) %>%
pad_by_time(
.date_var = date,
.by="year",
.pad_value = 0
) %>%
ungroup()
heat_map_median<- ggplot(all_county_median_data, aes(y = reorder(county, value), x = reorder(year(date),date), fill = value,
text = paste0("County: ",county, "<br>Median Price: €", round(value,digits=1))))+
geom_tile() +
geom_text(aes(label = floor(value)), size = 2, color = "white") +
scale_fill_viridis(discrete = FALSE, option = "turbo",labels = scales::number_format(scale = 0.001, suffix = "K", accuracy = 1)) +
#theme(legend.position = "right") +
ggtitle("") +
xlab("") +
ylab("") +
labs(fill = "Median Price")
ggplotly(heat_map_median, tooltip = "text")
Median house prices illustrate the value with 50% of the prices above and below this value. The median is less influenced by larger variance in house prices when comparing price by location. The median house price in Longford is €160000 with the mean price €183452, where as in Dublin the mean is €583957 with the median €410000.