library(tidyverse)
library(lubridate)
library(ggcorrplot)
library(lattice)
library(psych)
library(DataExplorer)
library(reshape2)
library(car)
library(caret)
library(cowplot)
library(caTools)
library(rpart.plot)
library(e1071)
library(leaps)
library(rpart)
library(scales)
library(RColorBrewer)
library(packHV)
library(vip)
library(ranger)
library(rpart)
options(warn=-1)
library('magrittr')
library('dplyr')
library('tidyr')
library(ggplot2)
library(zoo)
library(magrittr)
library(knitr)
library(kableExtra)
library(reshape2)
library(ggpubr)
library(onewaytests)
library(effectsize)
library(corrplot)setwd(“C:_homeworks_2023”)
## X city area rooms bathroom parking.spaces floor animal furniture
## 1 0 1 240 3 3 4 - acept furnished
## 2 1 0 64 2 1 1 10 acept not furnished
## 3 2 1 443 5 5 4 3 acept furnished
## 4 3 1 73 2 2 1 12 acept not furnished
## 5 4 1 19 1 1 0 - not acept not furnished
## 6 5 1 13 1 1 0 2 acept not furnished
## hoa rent.amount property.tax fire.insurance total
## 1 R$0 R$8,000 R$1,000 R$121 R$9,121
## 2 R$540 R$820 R$122 R$11 R$1,493
## 3 R$4,172 R$7,000 R$1,417 R$89 R$12,680
## 4 R$700 R$1,250 R$150 R$16 R$2,116
## 5 R$0 R$1,200 R$41 R$16 R$1,257
## 6 R$0 R$2,200 R$42 R$28 R$2,270
kbl(df[1:5,]) %>%
kable_paper("hover", full_width = F)| X | city | area | rooms | bathroom | parking.spaces | floor | animal | furniture | hoa | rent.amount | property.tax | fire.insurance | total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 240 | 3 | 3 | 4 |
|
acept | furnished | R$0 | R$8,000 | R$1,000 | R$121 | R$9,121 |
| 1 | 0 | 64 | 2 | 1 | 1 | 10 | acept | not furnished | R$540 | R$820 | R$122 | R$11 | R$1,493 |
| 2 | 1 | 443 | 5 | 5 | 4 | 3 | acept | furnished | R$4,172 | R$7,000 | R$1,417 | R$89 | R$12,680 |
| 3 | 1 | 73 | 2 | 2 | 1 | 12 | acept | not furnished | R$700 | R$1,250 | R$150 | R$16 | R$2,116 |
| 4 | 1 | 19 | 1 | 1 | 0 |
|
not acept | not furnished | R$0 | R$1,200 | R$41 | R$16 | R$1,257 |
## [1] "X" "city" "area" "rooms"
## [5] "bathroom" "parking.spaces" "floor" "animal"
## [9] "furniture" "hoa" "rent.amount" "property.tax"
## [13] "fire.insurance" "total"
## 'data.frame': 6080 obs. of 14 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ city : int 1 0 1 1 1 1 1 1 1 1 ...
## $ area : int 240 64 443 73 19 13 55 55 82 32 ...
## $ rooms : int 3 2 5 2 1 1 1 2 3 1 ...
## $ bathroom : int 3 1 5 2 1 1 1 2 1 1 ...
## $ parking.spaces: int 4 1 4 1 0 0 1 1 1 1 ...
## $ floor : chr "-" "10" "3" "12" ...
## $ animal : chr "acept" "acept" "acept" "acept" ...
## $ furniture : chr "furnished" "not furnished" "furnished" "not furnished" ...
## $ hoa : chr "R$0" "R$540" "R$4,172" "R$700" ...
## $ rent.amount : chr "R$8,000" "R$820" "R$7,000" "R$1,250" ...
## $ property.tax : chr "R$1,000" "R$122" "R$1,417" "R$150" ...
## $ fire.insurance: chr "R$121" "R$11" "R$89" "R$16" ...
## $ total : chr "R$9,121" "R$1,493" "R$12,680" "R$2,116" ...
summary(df)## X city area rooms
## Min. : 0 Min. :0.0000 Min. : 10.0 Min. : 1.000
## 1st Qu.:1520 1st Qu.:1.0000 1st Qu.: 58.0 1st Qu.: 2.000
## Median :3040 Median :1.0000 Median : 100.0 Median : 3.000
## Mean :3040 Mean :0.8633 Mean : 151.1 Mean : 2.493
## 3rd Qu.:4559 3rd Qu.:1.0000 3rd Qu.: 200.0 3rd Qu.: 3.000
## Max. :6079 Max. :1.0000 Max. :24606.0 Max. :10.000
## bathroom parking.spaces floor animal
## Min. : 1.000 Min. : 0.000 Length:6080 Length:6080
## 1st Qu.: 1.000 1st Qu.: 1.000 Class :character Class :character
## Median : 2.000 Median : 1.000 Mode :character Mode :character
## Mean : 2.342 Mean : 1.756
## 3rd Qu.: 3.000 3rd Qu.: 2.000
## Max. :10.000 Max. :12.000
## furniture hoa rent.amount property.tax
## Length:6080 Length:6080 Length:6080 Length:6080
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## fire.insurance total
## Length:6080 Length:6080
## Class :character Class :character
## Mode :character Mode :character
##
##
##
This dataset contains 6079 houses to rent with 13 diferent features. The feature explanation is included in next sections.
The unit of observation of the analysis is a house(property)
Kaggle
Link:https://www.kaggle.com/code/micahshull/r-machine-learning-brazilian-housing/data
The variable description is as below.
In this analysis it is focused on building a regression model to predict the house rent, based on some cosidered factor such as the location, number of bathrooms, number of rooms, if the property is furnished etc. By building a model to predict the house rent it will be useful for both the home ownwers and the renters. Both parties can get an idea about how the factors are associated with the house rent and the renter can decide whether the property is worthy or not compared with the general rent amonts given the facilities, as same the owner can see if he is charging less than the competitors given the facilities. Therefore, building a model for this purpose can serve many people.
Before moving to the modeling part data pre processing is a necessary and required step, to get the data for suitable types so it can be continued with the further analaysis without any error. In this dataset also there were many required preprocesing steps were identified and they were corrected before the next step.
df <- df %>% rename(City = "city",
Area = "area",
Rooms = "rooms",
Bathrooms = "bathroom",
Parking = "parking.spaces",
Floor = "floor",
Pet = "animal",
Furnished = "furniture",
HOA = "hoa",
Rent = "rent.amount",
Tax = "property.tax",
Insurance = "fire.insurance",
Total = "total")
names(df)## [1] "X" "City" "Area" "Rooms" "Bathrooms" "Parking"
## [7] "Floor" "Pet" "Furnished" "HOA" "Rent" "Tax"
## [13] "Insurance" "Total"
df <-df %>% select(Rent, everything())drops <- c("X")
df = df[,!(names(df) %in% drops)]
head(df)## Rent City Area Rooms Bathrooms Parking Floor Pet Furnished
## 1 R$8,000 1 240 3 3 4 - acept furnished
## 2 R$820 0 64 2 1 1 10 acept not furnished
## 3 R$7,000 1 443 5 5 4 3 acept furnished
## 4 R$1,250 1 73 2 2 1 12 acept not furnished
## 5 R$1,200 1 19 1 1 0 - not acept not furnished
## 6 R$2,200 1 13 1 1 0 2 acept not furnished
## HOA Tax Insurance Total
## 1 R$0 R$1,000 R$121 R$9,121
## 2 R$540 R$122 R$11 R$1,493
## 3 R$4,172 R$1,417 R$89 R$12,680
## 4 R$700 R$150 R$16 R$2,116
## 5 R$0 R$41 R$16 R$1,257
## 6 R$0 R$42 R$28 R$2,270
#Changing the character data types to factors
cols = c("City", "Pet", "Furnished")
df[,cols] = df %>% select(all_of(cols)) %>% lapply(as.factor)
# df %>% glimpse()
head(df, 5)## Rent City Area Rooms Bathrooms Parking Floor Pet Furnished
## 1 R$8,000 1 240 3 3 4 - acept furnished
## 2 R$820 0 64 2 1 1 10 acept not furnished
## 3 R$7,000 1 443 5 5 4 3 acept furnished
## 4 R$1,250 1 73 2 2 1 12 acept not furnished
## 5 R$1,200 1 19 1 1 0 - not acept not furnished
## HOA Tax Insurance Total
## 1 R$0 R$1,000 R$121 R$9,121
## 2 R$540 R$122 R$11 R$1,493
## 3 R$4,172 R$1,417 R$89 R$12,680
## 4 R$700 R$150 R$16 R$2,116
## 5 R$0 R$41 R$16 R$1,257
#Checking the types of the variables
str(df)## 'data.frame': 6080 obs. of 13 variables:
## $ Rent : chr "R$8,000" "R$820" "R$7,000" "R$1,250" ...
## $ City : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
## $ Area : int 240 64 443 73 19 13 55 55 82 32 ...
## $ Rooms : int 3 2 5 2 1 1 1 2 3 1 ...
## $ Bathrooms: int 3 1 5 2 1 1 1 2 1 1 ...
## $ Parking : int 4 1 4 1 0 0 1 1 1 1 ...
## $ Floor : chr "-" "10" "3" "12" ...
## $ Pet : Factor w/ 2 levels "acept","not acept": 1 1 1 1 2 1 1 1 1 1 ...
## $ Furnished: Factor w/ 2 levels "furnished","not furnished": 1 2 1 2 2 2 1 2 1 2 ...
## $ HOA : chr "R$0" "R$540" "R$4,172" "R$700" ...
## $ Tax : chr "R$1,000" "R$122" "R$1,417" "R$150" ...
## $ Insurance: chr "R$121" "R$11" "R$89" "R$16" ...
## $ Total : chr "R$9,121" "R$1,493" "R$12,680" "R$2,116" ...
# floor includes a dash ("-") and has what appear to be mislabled data.
# replace the dash with a zero
df[df$Floor == "-",]$Floor <- "0"
# change the data type to numeric
df$Floor <- sort(as.numeric(df$Floor))
#create a copy
df1<-data.frame(df)
#remove dollar signs from sales column
df1[]<-lapply(df1,gsub,pattern="$",fixed=TRUE,replacement="")
# Remove R
df1[]<-lapply(df1,gsub,pattern="R",fixed=TRUE,replacement="")
head(df1)## Rent City Area Rooms Bathrooms Parking Floor Pet Furnished HOA
## 1 8,000 1 240 3 3 4 0 acept furnished 0
## 2 820 0 64 2 1 1 0 acept not furnished 540
## 3 7,000 1 443 5 5 4 0 acept furnished 4,172
## 4 1,250 1 73 2 2 1 0 acept not furnished 700
## 5 1,200 1 19 1 1 0 0 not acept not furnished 0
## 6 2,200 1 13 1 1 0 0 acept not furnished 0
## Tax Insurance Total
## 1 1,000 121 9,121
## 2 122 11 1,493
## 3 1,417 89 12,680
## 4 150 16 2,116
## 5 41 16 1,257
## 6 42 28 2,270
df1$Rent <- as.numeric(gsub(",","",df1$Rent))
df1$City <- as.numeric(gsub(",","",df1$City))
df1$HOA <- as.numeric(gsub(",","",df1$HOA))
df1$Tax <- as.numeric(gsub(",","",df1$Tax))
df1$Insurance <- as.numeric(gsub(",","",df1$Insurance))
df1$Total <- as.numeric(gsub(",","",df1$Total))
df1$Area <- as.integer(df1$Area)
df1$Rooms <- as.integer(df1$Rooms)
df1$Bathrooms <- as.integer(df1$Bathrooms)
df1$Parking <- as.integer(df1$Parking)
df1$Floor <- as.integer(df1$Floor)categories <- unique(df1$Pet)
numberOfCategories <- length(categories)
numberOfCategories## [1] 2
df1$Pet <- ifelse(df1$Pet == "acept",1,0)categories_Furnished <- unique(df1$Furnished)
df1$Furnished <- ifelse(df1$Furnished == "furnished",1,0)#Testing the null values in the dataset
sapply(df1, function(x) sum(is.na(x)))## Rent City Area Rooms Bathrooms Parking Floor Pet
## 0 0 0 0 0 0 0 0
## Furnished HOA Tax Insurance Total
## 0 202 27 0 0
However, there were some missing values identified that exist in the data those are removed from the dataset in order to continue with the analysis.
#Replace NA in all columns
df1 <- na.aggregate(df1)
#dropping the remaining missing values
df1 <- na.omit(df1)
sapply(df1, function(x) sum(is.na(x)))## Rent City Area Rooms Bathrooms Parking Floor Pet
## 0 0 0 0 0 0 0 0
## Furnished HOA Tax Insurance Total
## 0 0 0 0 0
The next phase is to apply the model to that. Before that identifying the factors associated with the house rent will be done via scatterplots.
pairs(df1, pch = 19, lower.panel = NULL)b = ggplot(df1, aes(Tax, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Tax")
a = ggplot(df1, aes(HOA, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & HOA")
c = ggplot(df1, aes(Insurance, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Insurance")
d = ggplot(df1, aes(Area, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Area")
options(repr.plot.width=14, repr.plot.height=10)
plot_grid(a,b,c,d, ncol = 2, nrow = 2)The rent show positive and linear realtionships with HOA, Tax, Insurance and Area of the property.
HOA, Tax and Area variables show few outliers as well. However, considering the scatterplots it can be seen that the graphs do not follow any curve patters but linear patters that we can apply regression on the variables.
e = ggplot(df1, aes(City, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
#scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & City")
f = ggplot(df1, aes(Rooms, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
#scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Rooms")
g = ggplot(df1, aes(Bathrooms, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Bathrooms")
h = ggplot(df1, aes(Parking, Rent)) +
geom_point(cex = 3, pch = 1, stroke = 2, color="palegreen3") +
geom_smooth(method = "lm", color = "green4", lwd = 3, formula = "y~x") +
scale_y_continuous(labels = dollar) +
theme_light(base_size = 16) +
labs(title = "Rent & Praking")
options(repr.plot.width=14, repr.plot.height=10)
plot_grid(e,f,g,h, ncol = 2, nrow = 2)The city variable is categorical and indicates if the property is located in a city or not. From the scatterplot it is observed that the rent price ranges do not significantly change between and city and not city areas.
There is a positive linear relationship between the number of rooms and the total rent variables, indicating as the number of rooms increases the rent increases as well.The linear pattern suggests that rooms variable aslo can be involved for building the model.
There is a positive linear relationship between the number of bathrooms and the total rent variables, indicating as the number of bathrooms increases the rent increases as well.The linear pattern suggests that bathrooms variable can be involved for building the model as well.
There is a positive linear relationship between the number of parking space and the total rent variables, indicating as the number of parking space increases the rent increases as well.
# Create the relationship model.
model <- lm(Rent~City+Area+Rooms+Bathrooms+Parking+Floor+Pet+Furnished+HOA+Tax+Insurance+Total, data = df1)
model##
## Call:
## lm(formula = Rent ~ City + Area + Rooms + Bathrooms + Parking +
## Floor + Pet + Furnished + HOA + Tax + Insurance + Total,
## data = df1)
##
## Coefficients:
## (Intercept) City Area Rooms Bathrooms Parking
## 64.281695 37.469134 -0.004131 -8.277689 3.280373 -2.680688
## Floor Pet Furnished HOA Tax Insurance
## -0.273280 -27.561636 8.895291 -0.855003 -0.858788 9.339528
## Total
## 0.858558
summary(model)##
## Call:
## lm(formula = Rent ~ City + Area + Rooms + Bathrooms + Parking +
## Floor + Pet + Furnished + HOA + Tax + Insurance + Total,
## data = df1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -724.31 -68.56 -34.81 2.99 1447.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.281695 9.387496 6.848 8.25e-12 ***
## City 37.469134 7.059387 5.308 1.15e-07 ***
## Area -0.004131 0.006570 -0.629 0.5295
## Rooms -8.277689 3.302407 -2.507 0.0122 *
## Bathrooms 3.280373 2.907898 1.128 0.2593
## Parking -2.680688 2.149355 -1.247 0.2124
## Floor -0.273280 0.380929 -0.717 0.4732
## Pet -27.561636 5.701265 -4.834 1.37e-06 ***
## Furnished 8.895291 5.613793 1.585 0.1131
## HOA -0.855003 0.003893 -219.639 < 2e-16 ***
## Tax -0.858788 0.003768 -227.934 < 2e-16 ***
## Insurance 9.339528 0.278621 33.521 < 2e-16 ***
## Total 0.858558 0.003748 229.090 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 183.1 on 6067 degrees of freedom
## Multiple R-squared: 0.9974, Adjusted R-squared: 0.9974
## F-statistic: 1.929e+05 on 12 and 6067 DF, p-value: < 2.2e-16
The hypotheses of the ANOVA table is,
The ANOVA test indicated the overall significance of the model with a f statistic of F(12,6067) = 19290 and p <0.001
Multiple linear regression is performed on all the variables as the full model. If the p-value is less than 0.05 then the coefficient is considered significant. In our model seven independent variables are significant and they are namely city, rooms, pet, HOA, Tax, Insuarance and Total.
Here for this model the adjusted R-squared value is 0.9974. The value indicates that the 99.74% of the total variation in the total rent variable can be explained by the built model. However the multicolinearity of the model should be tested due to this high accuacy of the model.
After building the model it is a mandatory requiremnt to conduct a diagnostic analysis to observe if the data satisfy the assumptions of regression. Therefore, this section will focus on the diagnostic analysis of the above built model.
par(mfrow = c(2, 2))
plot(model)This graph is used to check the linear relationship assumption of the model.
A horizontal line, without distinct patterns is an indication for a linear relationship,in our scenario the residuals follow some patters and not randomly distributed.
Therefore, it can not be concluded that the residuals are linearly distributed.
Used to examine whether the residuals are normally distributed.
In our scenario the residuals points do not follow a straight dashed line indicating the residuals are not following a normal distribution.
This plot is used to check the homogeneity of variance of the residuals. Here the residuals are expected to equally spread points around a horizontal line zero axis. But in the graph of this analysis the residuals are not spread randomly.
The graph show that there are influencial points exists.
df_cts <- df1[,c("Rent","Area", "Rooms", "Bathrooms", "Parking", "Floor", "HOA", "Tax", "Insurance", "Total")]res = cor(df_cts)
round(res,2)## Rent Area Rooms Bathrooms Parking Floor HOA Tax Insurance Total
## Rent 1.00 0.24 0.53 0.66 0.58 -0.02 0.19 0.06 0.99 0.54
## Area 0.24 1.00 0.27 0.28 0.25 0.01 0.04 0.03 0.25 0.14
## Rooms 0.53 0.27 1.00 0.75 0.62 -0.01 0.13 0.04 0.56 0.31
## Bathrooms 0.66 0.28 0.75 1.00 0.69 -0.02 0.15 0.06 0.66 0.39
## Parking 0.58 0.25 0.62 0.69 1.00 -0.01 0.10 0.04 0.60 0.31
## Floor -0.02 0.01 -0.01 -0.02 -0.01 1.00 -0.02 0.01 -0.01 -0.01
## HOA 0.19 0.04 0.13 0.15 0.10 -0.02 1.00 0.02 0.16 0.55
## Tax 0.06 0.03 0.04 0.06 0.04 0.01 0.02 1.00 0.06 0.74
## Insurance 0.99 0.25 0.56 0.66 0.60 -0.01 0.16 0.06 1.00 0.52
## Total 0.54 0.14 0.31 0.39 0.31 -0.01 0.55 0.74 0.52 1.00
corrplot(res,type ="lower", order = "hclust", tl.col ="black", tl.srt = 45)As the adjusted R squared value is almost equal to 1, it is clear that multicolinearity can exist among the independent variables. To identify that a correlation plot is plotted and.
#selecting the categorical variables
df_cat <- df1[,c("City", "Pet", "Furnished")]
head(df_cat)## City Pet Furnished
## 1 1 1 1
## 2 0 1 0
## 3 1 1 1
## 4 1 1 0
## 5 1 0 0
## 6 1 1 0
#transformation - log
data_log <- log(df_cts+1)
#combining dataframes
df_final <- cbind(data_log, df_cat)model2 <- lm(Rent~City+Bathrooms+Pet+HOA+Tax+Insurance,
data = df_final)summary(model2)##
## Call:
## lm(formula = Rent ~ City + Bathrooms + Pet + HOA + Tax + Insurance,
## data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.75794 -0.09290 -0.02539 0.04104 1.20557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4213532 0.0140030 315.743 < 2e-16 ***
## City 0.1283126 0.0076740 16.721 < 2e-16 ***
## Bathrooms 0.1374173 0.0091816 14.967 < 2e-16 ***
## Pet 0.0049221 0.0060328 0.816 0.415
## HOA 0.0234942 0.0008625 27.239 < 2e-16 ***
## Tax -0.0077504 0.0013032 -5.947 2.88e-09 ***
## Insurance 0.8782586 0.0047502 184.890 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1954 on 6073 degrees of freedom
## Multiple R-squared: 0.9364, Adjusted R-squared: 0.9363
## F-statistic: 1.49e+04 on 6 and 6073 DF, p-value: < 2.2e-16
The second model ‘model2’ is fitted only with the selected significant variables from the model. And some variables were removed to avoid the multicollinearity of the model as well. From the selected model ‘pet’ variable has become insignificant with p = 0.415.
The hypotheses of the ANOVA table is
The ANOVA test indicated the overall significance of the model with a f statistic of F(12,6073) = 14900 and p <0.001.
Here for this model the adjusted R-squared value is 0.9364.
The value indicates that the 93.64% of the total variation in the total rent variable can be explained by the built model.
This model indicates good acuuracy for prediction purposes.
rent = 4.42 + 0.13(city) + 0.14(bathrooms) + 0.005(pet) + 0.023(HOA) - 0.008(Tax) + 0.89(Insurance)
par(mfrow = c(2, 2))
plot(model2)