Discussion 11

#Loading package
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
#Replacing the file path with the correct path to your Excel file
file_path <- "C:/Users/aleja/Desktop/Real estate valuation data set.xlsx"

#Reading the first 5 rows of the Excel file
data <- read_excel(file_path, n_max = 5)

#Viewing the first 5 rows of the data
print(data)
## # A tibble: 5 × 8
##      No `X1 transaction date` `X2 house age` X3 distance to the nearest MRT st…¹
##   <dbl>                 <dbl>          <dbl>                               <dbl>
## 1     1                 2013.           32                                  84.9
## 2     2                 2013.           19.5                               307. 
## 3     3                 2014.           13.3                               562. 
## 4     4                 2014.           13.3                               562. 
## 5     5                 2013.            5                                 391. 
## # ℹ abbreviated name: ¹​`X3 distance to the nearest MRT station`
## # ℹ 4 more variables: `X4 number of convenience stores` <dbl>,
## #   `X5 latitude` <dbl>, `X6 longitude` <dbl>,
## #   `Y house price of unit area` <dbl>

Summary statistics

summary(data)
##        No    X1 transaction date  X2 house age  
##  Min.   :1   Min.   :2013        Min.   : 5.00  
##  1st Qu.:2   1st Qu.:2013        1st Qu.:13.30  
##  Median :3   Median :2013        Median :13.30  
##  Mean   :3   Mean   :2013        Mean   :16.62  
##  3rd Qu.:4   3rd Qu.:2014        3rd Qu.:19.50  
##  Max.   :5   Max.   :2014        Max.   :32.00  
##  X3 distance to the nearest MRT station X4 number of convenience stores
##  Min.   : 84.88                         Min.   : 5.0                   
##  1st Qu.:306.59                         1st Qu.: 5.0                   
##  Median :390.57                         Median : 5.0                   
##  Mean   :381.20                         Mean   : 6.8                   
##  3rd Qu.:561.98                         3rd Qu.: 9.0                   
##  Max.   :561.98                         Max.   :10.0                   
##   X5 latitude     X6 longitude   Y house price of unit area
##  Min.   :24.98   Min.   :121.5   Min.   :37.90             
##  1st Qu.:24.98   1st Qu.:121.5   1st Qu.:42.20             
##  Median :24.98   Median :121.5   Median :43.10             
##  Mean   :24.98   Mean   :121.5   Mean   :45.06             
##  3rd Qu.:24.99   3rd Qu.:121.5   3rd Qu.:47.30             
##  Max.   :24.99   Max.   :121.5   Max.   :54.80
# Check for multicollinearity
cor_matrix <- cor(data)
high_correlation <- which(cor_matrix > 0.7 & cor_matrix < 1, arr.ind = TRUE)
high_correlation
##                                        row col
## X3 distance to the nearest MRT station   4   2
## X5 latitude                              6   2
## X6 longitude                             7   2
## Y house price of unit area               8   2
## X4 number of convenience stores          5   3
## X1 transaction date                      2   4
## X6 longitude                             7   4
## Y house price of unit area               8   4
## X2 house age                             3   5
## X1 transaction date                      2   6
## X6 longitude                             7   6
## X1 transaction date                      2   7
## X3 distance to the nearest MRT station   4   7
## X5 latitude                              6   7
## Y house price of unit area               8   7
## X1 transaction date                      2   8
## X3 distance to the nearest MRT station   4   8
## X6 longitude                             7   8
# Load necessary libraries

library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
# Compute the correlation matrix
correlation_matrix <- cor(data)

# Compute the correlation matrix
correlation_matrix <- cor(data)

# Increase the plot size and decrease the text size
par(mar = c(1, 1, 1, 1))  # Adjust margin to leave space for larger plot
corrplot(correlation_matrix, method = "color", type = "upper", order = "hclust", tl.col = "black", tl.srt = 45, tl.cex = 0.7)

Running a regression model

#Running a linear regression model with only X3 distance to the nearest MRT station
model <- lm(`Y house price of unit area` ~ `X3 distance to the nearest MRT station`, data = data)

#Showing the summary of the regression model
summary(model)
## 
## Call:
## lm(formula = `Y house price of unit area` ~ `X3 distance to the nearest MRT station`, 
##     data = data)
## 
## Residuals:
##       1       2       3       4       5 
##  1.2030 -0.7544 -2.8621  4.6379 -2.2243 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              34.301523   3.691416   9.292  0.00264
## `X3 distance to the nearest MRT station`  0.028222   0.008772   3.217  0.04869
##                                            
## (Intercept)                              **
## `X3 distance to the nearest MRT station` * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.496 on 3 degrees of freedom
## Multiple R-squared:  0.7753, Adjusted R-squared:  0.7004 
## F-statistic: 10.35 on 1 and 3 DF,  p-value: 0.04869
#Extracting the residuals from the linear regression model
residuals <- residuals(model)
par(mfrow=c(2,2))
plot(model)

#Shapiro-Wilk test for normality of residuals
shapiro.test(residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.92129, p-value = 0.5383

The results suggest that X3 distance to the nearest MRT station is a statistically significant predictor of Y house price of unit area. The model explains a significant portion of the variability in house prices, and the normality assumption for the residuals is not violated.

LS0tDQp0aXRsZTogIkRpc2N1c3Npb24gMTEiDQphdXRob3I6ICJMYXVyYSBCIg0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQNCi0tLQ0KDQoNCiMjIyBEaXNjdXNzaW9uIDExDQoNCmBgYHtyfQ0KI0xvYWRpbmcgcGFja2FnZQ0KbGlicmFyeShyZWFkeGwpDQoNCiNSZXBsYWNpbmcgdGhlIGZpbGUgcGF0aCB3aXRoIHRoZSBjb3JyZWN0IHBhdGggdG8geW91ciBFeGNlbCBmaWxlDQpmaWxlX3BhdGggPC0gIkM6L1VzZXJzL2FsZWphL0Rlc2t0b3AvUmVhbCBlc3RhdGUgdmFsdWF0aW9uIGRhdGEgc2V0Lnhsc3giDQoNCiNSZWFkaW5nIHRoZSBmaXJzdCA1IHJvd3Mgb2YgdGhlIEV4Y2VsIGZpbGUNCmRhdGEgPC0gcmVhZF9leGNlbChmaWxlX3BhdGgsIG5fbWF4ID0gNSkNCg0KI1ZpZXdpbmcgdGhlIGZpcnN0IDUgcm93cyBvZiB0aGUgZGF0YQ0KcHJpbnQoZGF0YSkNCg0KYGBgDQoNClN1bW1hcnkgc3RhdGlzdGljcw0KYGBge3J9DQpzdW1tYXJ5KGRhdGEpDQpgYGANCg0KYGBge3J9DQojIENoZWNrIGZvciBtdWx0aWNvbGxpbmVhcml0eQ0KY29yX21hdHJpeCA8LSBjb3IoZGF0YSkNCmhpZ2hfY29ycmVsYXRpb24gPC0gd2hpY2goY29yX21hdHJpeCA+IDAuNyAmIGNvcl9tYXRyaXggPCAxLCBhcnIuaW5kID0gVFJVRSkNCmhpZ2hfY29ycmVsYXRpb24NCmBgYA0KDQoNCmBgYHtyfQ0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMNCg0KbGlicmFyeShjb3JycGxvdCkNCg0KDQoNCiMgQ29tcHV0ZSB0aGUgY29ycmVsYXRpb24gbWF0cml4DQpjb3JyZWxhdGlvbl9tYXRyaXggPC0gY29yKGRhdGEpDQoNCiMgQ29tcHV0ZSB0aGUgY29ycmVsYXRpb24gbWF0cml4DQpjb3JyZWxhdGlvbl9tYXRyaXggPC0gY29yKGRhdGEpDQoNCiMgSW5jcmVhc2UgdGhlIHBsb3Qgc2l6ZSBhbmQgZGVjcmVhc2UgdGhlIHRleHQgc2l6ZQ0KcGFyKG1hciA9IGMoMSwgMSwgMSwgMSkpICAjIEFkanVzdCBtYXJnaW4gdG8gbGVhdmUgc3BhY2UgZm9yIGxhcmdlciBwbG90DQpjb3JycGxvdChjb3JyZWxhdGlvbl9tYXRyaXgsIG1ldGhvZCA9ICJjb2xvciIsIHR5cGUgPSAidXBwZXIiLCBvcmRlciA9ICJoY2x1c3QiLCB0bC5jb2wgPSAiYmxhY2siLCB0bC5zcnQgPSA0NSwgdGwuY2V4ID0gMC43KQ0KDQpgYGANCg0KDQpSdW5uaW5nIGEgcmVncmVzc2lvbiBtb2RlbA0KDQpgYGB7cn0NCiNSdW5uaW5nIGEgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwgd2l0aCBvbmx5IFgzIGRpc3RhbmNlIHRvIHRoZSBuZWFyZXN0IE1SVCBzdGF0aW9uDQptb2RlbCA8LSBsbShgWSBob3VzZSBwcmljZSBvZiB1bml0IGFyZWFgIH4gYFgzIGRpc3RhbmNlIHRvIHRoZSBuZWFyZXN0IE1SVCBzdGF0aW9uYCwgZGF0YSA9IGRhdGEpDQoNCiNTaG93aW5nIHRoZSBzdW1tYXJ5IG9mIHRoZSByZWdyZXNzaW9uIG1vZGVsDQpzdW1tYXJ5KG1vZGVsKQ0KYGBgDQoNCmBgYHtyfQ0KI0V4dHJhY3RpbmcgdGhlIHJlc2lkdWFscyBmcm9tIHRoZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbA0KcmVzaWR1YWxzIDwtIHJlc2lkdWFscyhtb2RlbCkNCmBgYA0KDQoNCg0KYGBge3J9DQpwYXIobWZyb3c9YygyLDIpKQ0KcGxvdChtb2RlbCkNCmBgYA0KDQoNCmBgYHtyfQ0KI1NoYXBpcm8tV2lsayB0ZXN0IGZvciBub3JtYWxpdHkgb2YgcmVzaWR1YWxzDQpzaGFwaXJvLnRlc3QocmVzaWR1YWxzKQ0KYGBgDQoNClRoZSByZXN1bHRzIHN1Z2dlc3QgdGhhdCBYMyBkaXN0YW5jZSB0byB0aGUgbmVhcmVzdCBNUlQgc3RhdGlvbiBpcyBhIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgcHJlZGljdG9yIG9mIFkgaG91c2UgcHJpY2Ugb2YgdW5pdCBhcmVhLiBUaGUgbW9kZWwgZXhwbGFpbnMgYSBzaWduaWZpY2FudCBwb3J0aW9uIG9mIHRoZSB2YXJpYWJpbGl0eSBpbiBob3VzZSBwcmljZXMsIGFuZCB0aGUgbm9ybWFsaXR5IGFzc3VtcHRpb24gZm9yIHRoZSByZXNpZHVhbHMgaXMgbm90IHZpb2xhdGVkLg0KDQo=