Real_estate = read.csv("C:/Users/mattv/Desktop/ADEC 7301 Assignments/Data Sets/Real estate.csv")
head(Real_estate)
## No X1.transaction.date X2.house.age X3.distance.to.the.nearest.MRT.station
## 1 1 2012.917 32.0 84.87882
## 2 2 2012.917 19.5 306.59470
## 3 3 2013.583 13.3 561.98450
## 4 4 2013.500 13.3 561.98450
## 5 5 2012.833 5.0 390.56840
## 6 6 2012.667 7.1 2175.03000
## X4.number.of.convenience.stores X5.latitude X6.longitude
## 1 10 24.98298 121.5402
## 2 9 24.98034 121.5395
## 3 5 24.98746 121.5439
## 4 5 24.98746 121.5439
## 5 5 24.97937 121.5425
## 6 3 24.96305 121.5125
## Y.house.price.of.unit.area
## 1 37.9
## 2 42.2
## 3 47.3
## 4 54.8
## 5 43.1
## 6 32.1
summary(Real_estate)
## No X1.transaction.date X2.house.age
## Min. : 1.0 Min. :2013 Min. : 0.000
## 1st Qu.:104.2 1st Qu.:2013 1st Qu.: 9.025
## Median :207.5 Median :2013 Median :16.100
## Mean :207.5 Mean :2013 Mean :17.713
## 3rd Qu.:310.8 3rd Qu.:2013 3rd Qu.:28.150
## Max. :414.0 Max. :2014 Max. :43.800
## X3.distance.to.the.nearest.MRT.station X4.number.of.convenience.stores
## Min. : 23.38 Min. : 0.000
## 1st Qu.: 289.32 1st Qu.: 1.000
## Median : 492.23 Median : 4.000
## Mean :1083.89 Mean : 4.094
## 3rd Qu.:1454.28 3rd Qu.: 6.000
## Max. :6488.02 Max. :10.000
## X5.latitude X6.longitude Y.house.price.of.unit.area
## Min. :24.93 Min. :121.5 Min. : 7.60
## 1st Qu.:24.96 1st Qu.:121.5 1st Qu.: 27.70
## Median :24.97 Median :121.5 Median : 38.45
## Mean :24.97 Mean :121.5 Mean : 37.98
## 3rd Qu.:24.98 3rd Qu.:121.5 3rd Qu.: 46.60
## Max. :25.01 Max. :121.6 Max. :117.50
sum(is.na(Real_estate))
## [1] 0
correlation = cor(Real_estate$X2.house.age, Real_estate$Y.house.price.of.unit.area)
correlation_rounded = round(correlation, 4)
print(correlation_rounded)
## [1] -0.2106
cor_test = cor.test(Real_estate$X2.house.age, Real_estate$Y.house.price.of.unit.area)
print(cor_test)
##
## Pearson's product-moment correlation
##
## data: Real_estate$X2.house.age and Real_estate$Y.house.price.of.unit.area
## t = -4.3721, df = 412, p-value = 1.56e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3008396 -0.1165546
## sample estimates:
## cor
## -0.210567
model = lm(Real_estate$Y.house.price.of.unit.area ~ Real_estate$X2.house.age, data = Real_estate)
summary(model)
##
## Call:
## lm(formula = Real_estate$Y.house.price.of.unit.area ~ Real_estate$X2.house.age,
## data = Real_estate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.113 -10.738 1.626 8.199 77.781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.43470 1.21098 35.042 < 2e-16 ***
## Real_estate$X2.house.age -0.25149 0.05752 -4.372 1.56e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.32 on 412 degrees of freedom
## Multiple R-squared: 0.04434, Adjusted R-squared: 0.04202
## F-statistic: 19.11 on 1 and 412 DF, p-value: 1.56e-05
# Scatter plot with regression line
plot(Real_estate$X2.house.age, Real_estate$Y.house.price.of.unit.area,
xlab = "House Age", ylab = "House Price")
abline(model, col = "red")
\[ \hat{y} = 42.4347 - 0.2515 \cdot \text{House Age} \]
# Install and Load car package
if (!require(car)) {
install.packages(car)
library(car)
}
## Loading required package: car
## Loading required package: carData
# Fitted vs Residuals
plot(model$fitted.values, model$residuals,
xlab = "Fitted values",
ylab = "Residuals",
main = "Fitted vs Residuals",
pch = 20, col = "blue")
abline(h = 0, lty = 2, col = "red")
# Normal Q-Q Plot
qqPlot(model$residuals, main = "Normal Q-Q Plot")
## [1] 271 221