An analysis on global housing market trends

Focusing specifically on Australia, Canada, France, Singapore, UK and USA

Erik Yuan Rui Woon, s3905260

Oct 14, 2025

Introduction

Introduction cont.

Problem Statement

Data

Data cont.

ds <- read.csv("global_house_purchase_dataset.csv")
ds$decision <- ds$decision %>% factor(levels = c(0,1), labels = c("No", "Yes"))
ds$furnishing_status <- ds$furnishing_status %>% factor(levels = c("Unfurnished", "Semi-Furnished", "Fully-Furnished"), ordered=TRUE)
ds_filtered <- subset(ds, !country %in% c("Brazil", "China", "India", "Germany","Japan", "South Africa", "UAE"))
currency_pairs <- c("AUDUSD=X", "CADUSD=X", "EURUSD=X", "SGDUSD=X", "GBPUSD=X")

exchange_data <- getQuote(currency_pairs)
exchange_rates <- exchange_data$Last

currency_codes <- c("AUD", "CAD","EUR", "SGD", "GBP")
names(exchange_rates) <- currency_codes

exchange_rates <- c("USD" = 1, exchange_rates)
country_currency <- c("Australia" = "AUD", "Canada" = "CAD", "France" = "EUR","Singapore" = "SGD", "UK" = "GBP", "USA" = "USD")
ds_filtered$currency <- country_currency[ds_filtered$country]
ds_filtered$price_usd <- ds_filtered$price * exchange_rates[ds_filtered$currency]
ds_filtered$salary_usd <- ds_filtered$customer_salary*exchange_rates[ds_filtered$currency]
ds_filtered$loan_amt_usd <- ds_filtered$loan_amount*exchange_rates[ds_filtered$currency]
ds_filtered$monthly_exp_usd <-ds_filtered$monthly_expenses*exchange_rates[ds_filtered$currency]

Descriptive Statistics and Visualisations

country_summary <- ds_filtered %>%
  group_by(country) %>%
  summarise(avg_price_usd = mean(price_usd, na.rm = TRUE),
    median_price_usd = median(price_usd, na.rm = TRUE),
    avg_salary_usd = mean(salary_usd, na.rm = TRUE))
knitr::kable(country_summary)
country avg_price_usd median_price_usd avg_salary_usd
Australia 669704.4 670908.3 35981.73
Canada 792069.5 791423.9 39300.16
France 1555208.8 1560141.4 63666.54
Singapore 1724636.6 1714613.8 42269.96
UK 1709818.0 1713280.0 73538.80
USA 1603145.7 1600616.0 54950.75

Descriptive Statistics and Visualisations cont.

# Boxplot: Price comparison by Country
ggplot(ds_filtered, aes(x = country, y = price_usd, fill = country)) +
  geom_boxplot(fill = "white", color = "black") +
  xlab("Country") +  ylab("Property Price (USD)") +  theme_classic() +
  labs(title = "Boxplot of Property Prices by Country") +
  scale_y_continuous(labels = dollar) +  
  theme(legend.position = "none", axis.text.x = element_text(angle = 90, size = 10),
    axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 12),
    plot.title = element_text(size = 16))

Descriptive Statistics and Visualisations cont.

ggplot(ds_filtered, aes(x = country, fill = decision)) +
  geom_bar(position = "dodge") + labs(title = "Decision to Buy by Country", 
  x = "Country", y = "Count of Decisions", fill = "Decision") + theme_minimal()

Descriptive Statistics and visualisations cont.

property_counts <- ds_filtered %>% group_by(country, constructed_year, property_type) %>% summarise(count = n()) 
filtered_counts <- property_counts %>% filter(property_type %in% c("Apartment", "Independent House"))
ggplot(filtered_counts, aes(x = constructed_year, y = count, colour = property_type)) +
  geom_line(size=0.75) + facet_wrap(~ country)+
  labs(title = "Number of apartments and townhouses built by year in each country",
       x = "Constructed Year", y = "Number of Properties", color = "Property Type") + theme_minimal() + 
  theme(legend.text = element_text(size = 8), legend.title = element_text(size = 9), legend.position = "bottom")

Hypothesis testing: Welch 2-sample t-test

yes_prices <- ds_filtered$price_usd[ds_filtered$decision == "Yes"]
no_prices <- ds_filtered$price_usd[ds_filtered$decision == "No"]

par(mfrow=c(1,2))
yes_prices %>% qqnorm()
no_prices %>% qqnorm()

Welch 2-sample t-test cont.

leveneTest(price_usd ~ decision, data = ds_filtered)
## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group     1  196.24 < 2.2e-16 ***
##       92441                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(yes_prices, no_prices)
## 
##  Welch Two Sample t-test
## 
## data:  yes_prices and no_prices
## t = -21.082, df = 37695, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -144894.7 -120244.3
## sample estimates:
## mean of x mean of y 
##   1240507   1373076

Regression analysis: Linear regression

cor.test(ds_filtered$price_usd, ds_filtered$loan_amt_usd, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  ds_filtered$price_usd and ds_filtered$loan_amt_usd
## t = 771.39, df = 92441, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9294711 0.9312047
## sample estimates:
##       cor 
## 0.9303431

Linear regression cont.

lm_model <- lm(price_usd ~ loan_amt_usd, data = ds_filtered)
summary(lm_model)
## 
## Call:
## lm(formula = price_usd ~ loan_amt_usd, data = ds_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -741484 -196411  -71887  144676 1236356 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.802e+05  1.811e+03   99.48   <2e-16 ***
## loan_amt_usd 1.385e+00  1.796e-03  771.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 305800 on 92441 degrees of freedom
## Multiple R-squared:  0.8655, Adjusted R-squared:  0.8655 
## F-statistic: 5.95e+05 on 1 and 92441 DF,  p-value: < 2.2e-16

Linear regression cont.

par(mfrow=c(2,2))
plot(lm_model)

Categorical association

furnishing_decision_table <- table(ds_filtered$furnishing_status, ds_filtered$decision)
chi_result <- chisq.test(furnishing_decision_table)
chi_result
## 
##  Pearson's Chi-squared test
## 
## data:  furnishing_decision_table
## X-squared = 4.81, df = 2, p-value = 0.09026

Categorical association cont.

country_decision_table <- table(ds_filtered$country, ds_filtered$decision)
chi_result <- chisq.test(country_decision_table)
chi_result
## 
##  Pearson's Chi-squared test
## 
## data:  country_decision_table
## X-squared = 487.55, df = 5, p-value < 2.2e-16

Categorical association cont.

property_furnishing_table <- table(ds_filtered$property_type, ds_filtered$furnishing_status)
chi_result <- chisq.test(property_furnishing_table)
chi_result
## 
##  Pearson's Chi-squared test
## 
## data:  property_furnishing_table
## X-squared = 7.8341, df = 10, p-value = 0.645

Discussion

References