The objective of this project is to analyze the rental prices of furnished apartments in comparison to unfurnished ones. This analysis performs multiple regression analysis, builds functions to represent rental prices, includes furniture as a dummy variable, and illustrates the results.
Some apartments are comparable with each other due to location, size, and year of construction. This analysis will help understand the premium associated with furnished apartments.
library(tidyverse)
library(ggplot2)
library(car)
library(magrittr)
library(dplyr)
library(knitr)
library(DT)
# Read the CSV file
data <- read.csv("Vergleichsobjekte_(Un)furnished.csv")
# Reformat column names
names(data) <- c("Operator", "Address", "Size.in.sqm",
"All.In.Rent_Gross.warm.rent", "Basic.Rent",
"Features", "X_sqm_warm", "Year.of.construction",
"Hyperlink", "Furnished_Unfurnished")
# Clean and prepare the data
data_clean <- data %>%
mutate(
Size_sqm = as.numeric(Size.in.sqm),
Rent_warm = as.numeric(gsub("[^0-9.]", "", All.In.Rent_Gross.warm.rent)),
Rent_per_sqm = as.numeric(gsub("[^0-9.]", "", X_sqm_warm)),
Year = as.numeric(Year.of.construction),
Furnished = ifelse(Furnished_Unfurnished == "Furnished Apartments", 1, 0)
) %>%
select(Size_sqm, Rent_warm, Rent_per_sqm, Year, Furnished) %>%
na.omit()
# Display data summary
cat("Dataset dimensions:", nrow(data_clean), "rows,", ncol(data_clean), "columns\n")## Dataset dimensions: 36 rows, 5 columns
# Display the first few rows of the cleaned data
kable(head(data_clean, 10), caption = "First 10 rows of cleaned data") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))| Size_sqm | Rent_warm | Rent_per_sqm | Year | Furnished |
|---|---|---|---|---|
| 19.00 | 1199 | 63.11 | 2022 | 1 |
| 30.00 | 1549 | 51.63 | 2022 | 1 |
| 41.00 | 1649 | 40.22 | 2022 | 1 |
| 61.00 | 2199 | 36.05 | 2022 | 1 |
| 26.55 | 1030 | 38.79 | 2024 | 1 |
| 32.98 | 1280 | 38.81 | 2024 | 1 |
| 35.75 | 1385 | 38.74 | 2024 | 1 |
| 30.54 | 996 | 32.61 | 2023 | 1 |
| 26.84 | 906 | 33.75 | 2023 | 1 |
| 54.06 | 1598 | 29.56 | 2023 | 1 |
## Size_sqm Rent_warm Rent_per_sqm Year Furnished
## Min. :18.00 Min. : 779 Min. :21.58 Min. :1970 Min. :0.0000
## 1st Qu.:26.16 1st Qu.:1011 1st Qu.:30.45 1st Qu.:2019 1st Qu.:0.0000
## Median :31.82 Median :1182 Median :34.09 Median :2022 Median :1.0000
## Mean :35.03 Mean :1219 Mean :36.95 Mean :2020 Mean :0.5556
## 3rd Qu.:42.77 3rd Qu.:1389 3rd Qu.:39.88 3rd Qu.:2024 3rd Qu.:1.0000
## Max. :61.00 Max. :2199 Max. :63.11 Max. :2024 Max. :1.0000
# Perform multiple regression
model <- lm(Rent_warm ~ Size_sqm + Year + Furnished, data = data_clean)
# Display summary of the regression model
summary(model)##
## Call:
## lm(formula = Rent_warm ~ Size_sqm + Year + Furnished, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -264.03 -149.27 -49.69 181.36 398.60
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9632.881 7413.576 -1.299 0.20311
## Size_sqm 23.585 3.437 6.862 9.19e-08 ***
## Year 4.900 3.681 1.331 0.19255
## Furnished 230.476 78.971 2.918 0.00639 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 197.3 on 32 degrees of freedom
## Multiple R-squared: 0.6264, Adjusted R-squared: 0.5914
## F-statistic: 17.89 on 3 and 32 DF, p-value: 5.348e-07
# Calculate VIF to check for multicollinearity
vif_values <- vif(model)
kable(data.frame(Variable = names(vif_values), VIF = vif_values),
caption = "Variance Inflation Factors") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))| Variable | VIF | |
|---|---|---|
| Size_sqm | Size_sqm | 1.439447 |
| Year | Year | 1.025269 |
| Furnished | Furnished | 1.424625 |
Interpretation: VIF values below 5 indicate no serious multicollinearity issues.
# Create a function to predict rent based on size, year, and furnished status
predict_rent <- function(size, year, furnished) {
# Extract coefficients from the model
coef <- coef(model)
# Calculate predicted rent
predicted_rent <- coef[1] + coef[2] * size + coef[3] * year + coef[4] * furnished
return(predicted_rent)
}
# Function to calculate percentage difference
calc_percent_diff <- function(furnished_value, unfurnished_value) {
percent_diff <- (furnished_value - unfurnished_value) / unfurnished_value * 100
return(percent_diff)
}
# Test the prediction function
cat("Example predictions:\n")## Example predictions:
## 30 sqm apartment built in 2020 (furnished): 1204.15 €
## 30 sqm apartment built in 2020 (unfurnished): 973.67 €
# Function to compare furnished vs unfurnished apartments
compare_furnished_unfurnished <- function(data) {
# Calculate average rent and rent per sqm for furnished and unfurnished apartments
avg_values <- data %>%
group_by(Furnished) %>%
summarise(
avg_rent = mean(Rent_warm),
avg_rent_per_sqm = mean(Rent_per_sqm),
count = n()
)
# Extract values
furnished_rent <- avg_values$avg_rent[avg_values$Furnished == 1]
unfurnished_rent <- avg_values$avg_rent[avg_values$Furnished == 0]
furnished_rent_per_sqm <- avg_values$avg_rent_per_sqm[avg_values$Furnished == 1]
unfurnished_rent_per_sqm <- avg_values$avg_rent_per_sqm[avg_values$Furnished == 0]
# Calculate percentage differences
rent_diff_percent <- calc_percent_diff(furnished_rent, unfurnished_rent)
rent_per_sqm_diff_percent <- calc_percent_diff(furnished_rent_per_sqm, unfurnished_rent_per_sqm)
# Return results
return(list(
avg_values = avg_values,
rent_diff_percent = rent_diff_percent,
rent_per_sqm_diff_percent = rent_per_sqm_diff_percent
))
}
# Apply the function to our dataset
results <- compare_furnished_unfurnished(data_clean)
# Display average values table
kable(results$avg_values %>%
mutate(Furnished = ifelse(Furnished == 1, "Furnished", "Unfurnished")) %>%
rename(`Apartment Type` = Furnished,
`Average Rent (€)` = avg_rent,
`Average Rent per sqm (€)` = avg_rent_per_sqm,
`Count` = count),
caption = "Average Rental Prices by Furnished Status",
digits = 2) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))| Apartment Type | Average Rent (€) | Average Rent per sqm (€) | Count |
|---|---|---|---|
| Unfurnished | 1251.50 | 30.10 | 16 |
| Furnished | 1192.65 | 42.44 | 20 |
##
## 🏠 KEY FINDINGS:
cat("📈 Furnished apartments are", round(results$rent_diff_percent, 2),
"% more expensive than unfurnished apartments.\n")## 📈 Furnished apartments are -4.7 % more expensive than unfurnished apartments.
cat("📊 Per square meter, furnished apartments are", round(results$rent_per_sqm_diff_percent, 2),
"% more expensive than unfurnished apartments.\n")## 📊 Per square meter, furnished apartments are 41 % more expensive than unfurnished apartments.
# Calculate and print average sizes
avg_sizes <- data_clean %>%
group_by(Furnished) %>%
summarise(avg_size = mean(Size_sqm))
cat("\n📏 SIZE COMPARISON:\n")##
## 📏 SIZE COMPARISON:
cat("Furnished apartments average size:", round(avg_sizes$avg_size[avg_sizes$Furnished == 1], 2), "sqm\n")## Furnished apartments average size: 29.51 sqm
cat("Unfurnished apartments average size:", round(avg_sizes$avg_size[avg_sizes$Furnished == 0], 2), "sqm\n")## Unfurnished apartments average size: 41.92 sqm
# Function to predict rent for both furnished and unfurnished apartments
compare_predicted_rent <- function(size, year) {
furnished_rent <- predict_rent(size, year, 1)
unfurnished_rent <- predict_rent(size, year, 0)
percent_diff <- calc_percent_diff(furnished_rent, unfurnished_rent)
cat("For a", size, "sqm apartment built in", year, ":\n")
cat("Predicted rent (furnished):", round(furnished_rent, 2), "€\n")
cat("Predicted rent (unfurnished):", round(unfurnished_rent, 2), "€\n")
cat("Furnished is", round(percent_diff, 2), "% more expensive\n\n")
}
# Example predictions
cat("PREDICTION EXAMPLES:\n\n")## PREDICTION EXAMPLES:
## For a 30 sqm apartment built in 2020 :
## Predicted rent (furnished): 1204.15 €
## Predicted rent (unfurnished): 973.67 €
## Furnished is 23.67 % more expensive
## For a 50 sqm apartment built in 2010 :
## Predicted rent (furnished): 1626.85 €
## Predicted rent (unfurnished): 1396.38 €
## Furnished is 16.51 % more expensive
## For a 75 sqm apartment built in 2015 :
## Predicted rent (furnished): 2240.99 €
## Predicted rent (unfurnished): 2010.51 €
## Furnished is 11.46 % more expensive
# Box plot of rent by furnished status
ggplot(data_clean, aes(x = factor(Furnished), y = Rent_warm, fill = factor(Furnished))) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.5) +
labs(title = "Rent Distribution by Furnished Status",
x = "Apartment Type", y = "Rent (€)",
fill = "Furnished") +
scale_x_discrete(labels = c("Unfurnished", "Furnished")) +
scale_fill_discrete(labels = c("Unfurnished", "Furnished")) +
theme_minimal() +
theme(legend.position = "none")Distribution of rental prices for furnished vs unfurnished apartments
# Scatter plot of rent vs. size, colored by furnished status
ggplot(data_clean, aes(x = Size_sqm, y = Rent_warm, color = factor(Furnished))) +
geom_point(alpha = 0.7, size = 3) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.3) +
labs(title = "Rent vs. Size by Furnished Status",
x = "Size (sqm)", y = "Rent (€)",
color = "Apartment Type") +
scale_color_discrete(labels = c("Unfurnished", "Furnished")) +
theme_minimal() +
theme(legend.position = "bottom")Relationship between apartment size and rent, colored by furnished status
# Histogram of rent per square meter
ggplot(data_clean, aes(x = Rent_per_sqm, fill = factor(Furnished))) +
geom_histogram(position = "dodge", bins = 20, alpha = 0.7) +
labs(title = "Distribution of Rent per Square Meter",
x = "Rent per Square Meter (€/sqm)", y = "Count",
fill = "Apartment Type") +
scale_fill_discrete(labels = c("Unfurnished", "Furnished")) +
theme_minimal() +
theme(legend.position = "bottom")Distribution of rent per square meter for furnished vs unfurnished apartments
Based on the analysis of furnished vs unfurnished apartment rentals:
Price Premium: Furnished apartments command a significant premium over unfurnished ones, with the exact percentage varying based on size, location, and year of construction.
Model Performance: The regression model explains the variation in rental prices using apartment size, year of construction, and furnished status as predictors.
Practical Implications: The prediction function can be used to estimate rental prices for different apartment configurations, helping both landlords and tenants make informed decisions.
The predict_rent() function can be used to: - Estimate
fair rental prices for new listings - Compare furnished vs unfurnished
rental costs - Analyze market trends in different apartment segments -
Support investment and pricing decisions
Analysis completed on 2025-05-24