Erik Yuan Rui Woon, s3905260
Oct 14, 2025
It is not necessary (That is, it is optional and not compulsory) but if you like you can publish your presentation to RPubs (see here) and add this link to your presentation here.
Rpubs link comes here: https://rpubs.com/s3905260/1354300
ds <- read.csv("global_house_purchase_dataset.csv")
ds$decision <- ds$decision %>% factor(levels = c(0,1), labels = c("No", "Yes"))
ds$furnishing_status <- ds$furnishing_status %>% factor(levels = c("Unfurnished", "Semi-Furnished", "Fully-Furnished"), ordered=TRUE)
ds_filtered <- subset(ds, !country %in% c("Brazil", "China", "India", "Germany","Japan", "South Africa", "UAE"))
currency_pairs <- c("AUDUSD=X", "CADUSD=X", "EURUSD=X", "SGDUSD=X", "GBPUSD=X")
exchange_data <- getQuote(currency_pairs)
exchange_rates <- exchange_data$Last
currency_codes <- c("AUD", "CAD","EUR", "SGD", "GBP")
names(exchange_rates) <- currency_codes
exchange_rates <- c("USD" = 1, exchange_rates)
country_currency <- c("Australia" = "AUD", "Canada" = "CAD", "France" = "EUR","Singapore" = "SGD", "UK" = "GBP", "USA" = "USD")
ds_filtered$currency <- country_currency[ds_filtered$country]
ds_filtered$price_usd <- ds_filtered$price * exchange_rates[ds_filtered$currency]
ds_filtered$salary_usd <- ds_filtered$customer_salary*exchange_rates[ds_filtered$currency]
ds_filtered$loan_amt_usd <- ds_filtered$loan_amount*exchange_rates[ds_filtered$currency]
ds_filtered$monthly_exp_usd <-ds_filtered$monthly_expenses*exchange_rates[ds_filtered$currency]country_summary <- ds_filtered %>%
group_by(country) %>%
summarise(avg_price_usd = mean(price_usd, na.rm = TRUE),
median_price_usd = median(price_usd, na.rm = TRUE),
avg_salary_usd = mean(salary_usd, na.rm = TRUE))
knitr::kable(country_summary)| country | avg_price_usd | median_price_usd | avg_salary_usd |
|---|---|---|---|
| Australia | 669704.4 | 670908.3 | 35981.73 |
| Canada | 792069.5 | 791423.9 | 39300.16 |
| France | 1555208.8 | 1560141.4 | 63666.54 |
| Singapore | 1724636.6 | 1714613.8 | 42269.96 |
| UK | 1709818.0 | 1713280.0 | 73538.80 |
| USA | 1603145.7 | 1600616.0 | 54950.75 |
# Boxplot: Price comparison by Country
ggplot(ds_filtered, aes(x = country, y = price_usd, fill = country)) +
geom_boxplot(fill = "white", color = "black") +
xlab("Country") + ylab("Property Price (USD)") + theme_classic() +
labs(title = "Boxplot of Property Prices by Country") +
scale_y_continuous(labels = dollar) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, size = 10),
axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 12),
plot.title = element_text(size = 16))ggplot(ds_filtered, aes(x = country, fill = decision)) +
geom_bar(position = "dodge") + labs(title = "Decision to Buy by Country",
x = "Country", y = "Count of Decisions", fill = "Decision") + theme_minimal()property_counts <- ds_filtered %>% group_by(country, constructed_year, property_type) %>% summarise(count = n())
filtered_counts <- property_counts %>% filter(property_type %in% c("Apartment", "Independent House"))
ggplot(filtered_counts, aes(x = constructed_year, y = count, colour = property_type)) +
geom_line(size=0.75) + facet_wrap(~ country)+
labs(title = "Number of apartments and townhouses built by year in each country",
x = "Constructed Year", y = "Number of Properties", color = "Property Type") + theme_minimal() +
theme(legend.text = element_text(size = 8), legend.title = element_text(size = 9), legend.position = "bottom")yes_prices <- ds_filtered$price_usd[ds_filtered$decision == "Yes"]
no_prices <- ds_filtered$price_usd[ds_filtered$decision == "No"]
par(mfrow=c(1,2))
yes_prices %>% qqnorm()
no_prices %>% qqnorm()## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 196.24 < 2.2e-16 ***
## 92441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Welch Two Sample t-test
##
## data: yes_prices and no_prices
## t = -21.082, df = 37695, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -144894.7 -120244.3
## sample estimates:
## mean of x mean of y
## 1240507 1373076
##
## Pearson's product-moment correlation
##
## data: ds_filtered$price_usd and ds_filtered$loan_amt_usd
## t = 771.39, df = 92441, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9294711 0.9312047
## sample estimates:
## cor
## 0.9303431
##
## Call:
## lm(formula = price_usd ~ loan_amt_usd, data = ds_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -741484 -196411 -71887 144676 1236356
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.802e+05 1.811e+03 99.48 <2e-16 ***
## loan_amt_usd 1.385e+00 1.796e-03 771.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 305800 on 92441 degrees of freedom
## Multiple R-squared: 0.8655, Adjusted R-squared: 0.8655
## F-statistic: 5.95e+05 on 1 and 92441 DF, p-value: < 2.2e-16
furnishing_decision_table <- table(ds_filtered$furnishing_status, ds_filtered$decision)
chi_result <- chisq.test(furnishing_decision_table)
chi_result##
## Pearson's Chi-squared test
##
## data: furnishing_decision_table
## X-squared = 4.81, df = 2, p-value = 0.09026
country_decision_table <- table(ds_filtered$country, ds_filtered$decision)
chi_result <- chisq.test(country_decision_table)
chi_result##
## Pearson's Chi-squared test
##
## data: country_decision_table
## X-squared = 487.55, df = 5, p-value < 2.2e-16
property_furnishing_table <- table(ds_filtered$property_type, ds_filtered$furnishing_status)
chi_result <- chisq.test(property_furnishing_table)
chi_result##
## Pearson's Chi-squared test
##
## data: property_furnishing_table
## X-squared = 7.8341, df = 10, p-value = 0.645