Mainroad
hist(Mainroad, breaks = 2)
COMMENT:
Housing.5 <- read.csv("~/Downloads/Housing 5.csv")
# Organizing Data
Price <- Housing.5$price
Area <- Housing.5$area
Bedrooms <- Housing.5$bedrooms
Bathrooms <- Housing.5$bathrooms
Stories <- Housing.5$stories
Mainroad <- ifelse(Housing.5$mainroad=="yes",1,0)
Guestroom <- ifelse(Housing.5$guestroom=="yes",1,0)
Basement <- ifelse(Housing.5$basement=="yes",1,0)
Hot_Water_Heating <- ifelse(Housing.5$hotwaterheating=="yes",1,0)
Air_Conditioning <- ifelse(Housing.5$airconditioning=="yes",1,0)
Parking <- Housing.5$parking
Preferred_Area <- ifelse(Housing.5$prefarea=="yes",1,0)
Histogram
hist(Price/1000, xlab = "Price in Thousands $")
COMMENT:
Box Plot
boxplot(Price/1000, xlab = "Price in Thousands $")
COMMENT:
Scatter-Plot
plot(Area, Price/1000, xlab = "Area", ylab = "Price in Thousands $")
COMMENT:
Box-Plot
boxplot(Area, xlab = "Area")
COMMENT:
Histogram
hist(Bedrooms, xlab = "Number of Bedrooms")
COMMENT:
hist(Bathrooms, xlab = "Number of Bathrooms")
COMMENT:
hist(Stories, xlab = "Number of Stories")
COMMENT:
hist(Parking, breaks = 2)
COMMENT:
analysis_1 <- Housing.5[, c("bedrooms", "bathrooms", "stories", "parking")]
boxplot(analysis_1)
hist(Mainroad, breaks = 2)
COMMENT:
hist(Guestroom, breaks = 2)
COMMENT:
hist(Basement, breaks = 2)
COMMENT:
hist(Hot_Water_Heating, breaks = 2)
COMMENT:
hist(Air_Conditioning, breaks = 2)
COMMENT:
hist(Preferred_Area, breaks = 2)
COMMENT:
reg.mod <- lm(price~., data = Housing.5)
summary(reg.mod)
##
## Call:
## lm(formula = price ~ ., data = Housing.5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2619718 -657322 -68409 507176 5166695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42771.69 264313.31 0.162 0.871508
## area 244.14 24.29 10.052 < 2e-16 ***
## bedrooms 114787.56 72598.66 1.581 0.114445
## bathrooms 987668.11 103361.98 9.555 < 2e-16 ***
## stories 450848.00 64168.93 7.026 6.55e-12 ***
## mainroadyes 421272.59 142224.13 2.962 0.003193 **
## guestroomyes 300525.86 131710.22 2.282 0.022901 *
## basementyes 350106.90 110284.06 3.175 0.001587 **
## hotwaterheatingyes 855447.15 223152.69 3.833 0.000141 ***
## airconditioningyes 864958.31 108354.51 7.983 8.91e-15 ***
## parking 277107.10 58525.89 4.735 2.82e-06 ***
## prefareayes 651543.80 115682.34 5.632 2.89e-08 ***
## furnishingstatussemi-furnished -46344.62 116574.09 -0.398 0.691118
## furnishingstatusunfurnished -411234.39 126210.56 -3.258 0.001192 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1068000 on 531 degrees of freedom
## Multiple R-squared: 0.6818, Adjusted R-squared: 0.674
## F-statistic: 87.52 on 13 and 531 DF, p-value: < 2.2e-16
COMMENT:
As per #1, there are many outliers in the price and area variables, and a few in the parking and bathroom
# Remove Outliers in Price
Price_Q1 <- quantile(Price, 0.25)
Price_Q3 <- quantile(Price, 0.75)
Price_IQR <- IQR(Price)
Housing.5 <- subset(Housing.5, Price>(Price_Q1-1.5*Price_IQR) & Price<(Price_Q3+1.5*Price_IQR))
# Remove Outliers in Area
Area_Q1 <- quantile(Area, 0.25)
Area_Q3 <- quantile(Area, 0.75)
Area_IQR <- IQR(Area)
Housing.5 <- subset(Housing.5, Area>(Area_Q1-1.5*Area_IQR) & Area<(Area_Q3+1.5*Area_IQR))
# Remove Outliers in Parking
Parking_Q1 <- quantile(Parking, 0.25)
Parking_Q3 <- quantile(Parking, 0.75)
Parking_IQR <- IQR(Parking)
Housing.5 <- subset(Housing.5, Parking>(Parking_Q1-1.5*Parking_IQR) & Parking<(Parking_Q3+1.5*Parking_IQR))
# Remove Outliers in Bathroom
Bathroom_Q1 <- quantile(Bathrooms, 0.25)
Bathroom_Q3 <- quantile(Bathrooms, 0.75)
Bathroom_IQR <- IQR(Bathrooms)
Housing.5 <- subset(Housing.5, Bathrooms>(Bathroom_Q1-1.5*Bathroom_IQR) & Bathrooms<(Bathroom_Q3+1.5*Bathroom_IQR))