\(~\)
\(~\)
# Load Libraries
library(tidyverse)
\(~\)
# Import data
<- read.csv("https://raw.githubusercontent.com/letisalba/Data-605/main/Week-12/Real_estate.csv")
real_estate head(real_estate, n = 5)
## No X1.transaction.date X2.house.age X3.distance.to.the.nearest.MRT.station
## 1 1 2012.917 32.0 84.87882
## 2 2 2012.917 19.5 306.59470
## 3 3 2013.583 13.3 561.98450
## 4 4 2013.500 13.3 561.98450
## 5 5 2012.833 5.0 390.56840
## X4.number.of.convenience.stores X5.latitude X6.longitude
## 1 10 24.98298 121.5402
## 2 9 24.98034 121.5395
## 3 5 24.98746 121.5439
## 4 5 24.98746 121.5439
## 5 5 24.97937 121.5425
## Y.house.price.of.unit.area
## 1 37.9
## 2 42.2
## 3 47.3
## 4 54.8
## 5 43.1
# Checking is missing values
head(is.na(real_estate))
## No X1.transaction.date X2.house.age
## [1,] FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE
## X3.distance.to.the.nearest.MRT.station X4.number.of.convenience.stores
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
## [6,] FALSE FALSE
## X5.latitude X6.longitude Y.house.price.of.unit.area
## [1,] FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE
# Glimpse of data set
glimpse(real_estate)
## Rows: 414
## Columns: 8
## $ No <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ X1.transaction.date <dbl> 2012.917, 2012.917, 2013.583, 2…
## $ X2.house.age <dbl> 32.0, 19.5, 13.3, 13.3, 5.0, 7.…
## $ X3.distance.to.the.nearest.MRT.station <dbl> 84.87882, 306.59470, 561.98450,…
## $ X4.number.of.convenience.stores <int> 10, 9, 5, 5, 5, 3, 7, 6, 1, 3, …
## $ X5.latitude <dbl> 24.98298, 24.98034, 24.98746, 2…
## $ X6.longitude <dbl> 121.5402, 121.5395, 121.5439, 1…
## $ Y.house.price.of.unit.area <dbl> 37.9, 42.2, 47.3, 54.8, 43.1, 3…
# Summary of data set
summary(real_estate)
## No X1.transaction.date X2.house.age
## Min. : 1.0 Min. :2013 Min. : 0.000
## 1st Qu.:104.2 1st Qu.:2013 1st Qu.: 9.025
## Median :207.5 Median :2013 Median :16.100
## Mean :207.5 Mean :2013 Mean :17.713
## 3rd Qu.:310.8 3rd Qu.:2013 3rd Qu.:28.150
## Max. :414.0 Max. :2014 Max. :43.800
## X3.distance.to.the.nearest.MRT.station X4.number.of.convenience.stores
## Min. : 23.38 Min. : 0.000
## 1st Qu.: 289.32 1st Qu.: 1.000
## Median : 492.23 Median : 4.000
## Mean :1083.89 Mean : 4.094
## 3rd Qu.:1454.28 3rd Qu.: 6.000
## Max. :6488.02 Max. :10.000
## X5.latitude X6.longitude Y.house.price.of.unit.area
## Min. :24.93 Min. :121.5 Min. : 7.60
## 1st Qu.:24.96 1st Qu.:121.5 1st Qu.: 27.70
## Median :24.97 Median :121.5 Median : 38.45
## Mean :24.97 Mean :121.5 Mean : 37.98
## 3rd Qu.:24.98 3rd Qu.:121.5 3rd Qu.: 46.60
## Max. :25.01 Max. :121.6 Max. :117.50
# Getting colnames
colnames(real_estate)
## [1] "No"
## [2] "X1.transaction.date"
## [3] "X2.house.age"
## [4] "X3.distance.to.the.nearest.MRT.station"
## [5] "X4.number.of.convenience.stores"
## [6] "X5.latitude"
## [7] "X6.longitude"
## [8] "Y.house.price.of.unit.area"
# Renaming for easier read
colnames(real_estate) <- c("No", "Transaction_Date", "House_Age",
"Nearest_MRT_Station", "Number_Convenience_Stores",
"Latitude", "Longitude", "House_Price")
head(real_estate)
## No Transaction_Date House_Age Nearest_MRT_Station Number_Convenience_Stores
## 1 1 2012.917 32.0 84.87882 10
## 2 2 2012.917 19.5 306.59470 9
## 3 3 2013.583 13.3 561.98450 5
## 4 4 2013.500 13.3 561.98450 5
## 5 5 2012.833 5.0 390.56840 5
## 6 6 2012.667 7.1 2175.03000 3
## Latitude Longitude House_Price
## 1 24.98298 121.5402 37.9
## 2 24.98034 121.5395 42.2
## 3 24.98746 121.5439 47.3
## 4 24.98746 121.5439 54.8
## 5 24.97937 121.5425 43.1
## 6 24.96305 121.5125 32.1
# Obtaining our singular Linear Model
<- lm(House_Price ~ House_Age, real_estate)
my_lm summary(my_lm)
##
## Call:
## lm(formula = House_Price ~ House_Age, data = real_estate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.113 -10.738 1.626 8.199 77.781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.43470 1.21098 35.042 < 2e-16 ***
## House_Age -0.25149 0.05752 -4.372 1.56e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.32 on 412 degrees of freedom
## Multiple R-squared: 0.04434, Adjusted R-squared: 0.04202
## F-statistic: 19.11 on 1 and 412 DF, p-value: 1.56e-05
# Running the multiple regression
<- lm(House_Price ~ Transaction_Date + House_Age +
my_lm2 + Number_Convenience_Stores +
Nearest_MRT_Station + Longitude, real_estate)
Latitude summary(my_lm2)
##
## Call:
## lm(formula = House_Price ~ Transaction_Date + House_Age + Nearest_MRT_Station +
## Number_Convenience_Stores + Latitude + Longitude, data = real_estate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.664 -5.410 -0.966 4.217 75.193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.444e+04 6.776e+03 -2.131 0.03371 *
## Transaction_Date 5.146e+00 1.557e+00 3.305 0.00103 **
## House_Age -2.697e-01 3.853e-02 -7.000 1.06e-11 ***
## Nearest_MRT_Station -4.488e-03 7.180e-04 -6.250 1.04e-09 ***
## Number_Convenience_Stores 1.133e+00 1.882e-01 6.023 3.84e-09 ***
## Latitude 2.255e+02 4.457e+01 5.059 6.38e-07 ***
## Longitude -1.242e+01 4.858e+01 -0.256 0.79829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.858 on 407 degrees of freedom
## Multiple R-squared: 0.5824, Adjusted R-squared: 0.5762
## F-statistic: 94.59 on 6 and 407 DF, p-value: < 2.2e-16
# Multiple regression without Longitude
<- lm(House_Price ~ Transaction_Date + House_Age + Nearest_MRT_Station +
my_lm3 + Latitude, real_estate)
Number_Convenience_Stores summary(my_lm3)
##
## Call:
## lm(formula = House_Price ~ Transaction_Date + House_Age + Nearest_MRT_Station +
## Number_Convenience_Stores + Latitude, data = real_estate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.623 -5.371 -1.020 4.244 75.346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.596e+04 3.233e+03 -4.936 1.17e-06 ***
## Transaction_Date 5.135e+00 1.555e+00 3.303 0.00104 **
## House_Age -2.694e-01 3.847e-02 -7.003 1.04e-11 ***
## Nearest_MRT_Station -4.353e-03 4.899e-04 -8.887 < 2e-16 ***
## Number_Convenience_Stores 1.136e+00 1.876e-01 6.056 3.17e-09 ***
## Latitude 2.269e+02 4.417e+01 5.136 4.36e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.848 on 408 degrees of freedom
## Multiple R-squared: 0.5823, Adjusted R-squared: 0.5772
## F-statistic: 113.8 on 5 and 408 DF, p-value: < 2.2e-16
\(~\)
# my_lm
par(mfrow = c(2,2))
plot(my_lm)
# my_lm2
par(mfrow = c(2,2))
plot(my_lm2)
# my_lm3
par(mfrow = c(2,2))
plot(my_lm3)
\(~\)
# my_lm
hist(my_lm$residuals, xlab = 'Residuals', main = 'Histogram of Singluar Linear Model')
#my_lm2
hist(my_lm2$residuals, xlab = 'Residuals', main = 'Histogram of Multiple Linear Model')
#my_lm3
hist(my_lm3$residuals, xlab = 'Residuals', main = 'Histogram of Multiple Linear Model without Longitude')