homes <- read.csv("homeprice.csv")
library(ggplot2)
#Scatter Plots with Trend Lines ##Scatter Plot Comparing Sales Price to List Price
ggplot(homes, aes(x = list, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price vs List Price",
x = "List Price",
y = "Sale Price") +
theme_minimal()
##Scatter Plot Comparing Sales Price to Number of Full Bathrooms
ggplot(homes, aes(x = full, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price by Number of Full Bathrooms",
x = "Number of Full Bathrooms",
y = "Sale Price") +
theme_minimal()
##Scatter Plot comparing Sale Price to Number of Half Baths
ggplot(homes, aes(x = half, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price by Number of Half Bathrooms",
x = "Number of Full Bathrooms",
y = "Sale Price") +
theme_minimal()
##Scatter Plot Comparing Sale Price to Number of Bedrooms
ggplot(homes, aes(x = bedrooms, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price by Number of Bedrooms",
x = "Number of Bedrooms",
y = "Sale Price") +
theme_minimal()
##Scatter Plot Comparing Sale Price to Number of non-Bedrooms
ggplot(homes, aes(x = rooms, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price by Number of Non-Bedrooms",
x = "Number of Non-Bedrooms",
y = "Sale Price") +
theme_minimal()
##Scatter Plot Comparing Sale Price to Neighborhood Rank
ggplot(homes, aes(x = neighborhood, y = sale)) +
geom_point(color = "darkblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Sale Price by Neighborhood Rank",
x = "Neighborhood Rank",
y = "Sale Price") +
theme_minimal()
###Takeaways Slight postive Correlations between Sale Price and Number of Half-baths, Full Baths, and Bedrooms; considerably stronger positive correlations between sale price and number of non-bedrooms, neighborhood rank, and list price. ##Linear Regression Model to determine which variable has the greatest impact on sale price
SalePricemodel <- lm(sale ~ full + half + bedrooms + rooms + neighborhood, data = homes)
summary(SalePricemodel)
anova(SalePricemodel)
###Takeaways According to the ANOVA test, the number of full bathrooms has the greatest impact on sale price. ##Linear Regression Model to determine which variable has the greatest impact on list price
ListPricemodel <- lm(list ~ full + half + bedrooms + rooms + neighborhood, data = homes)
summary(ListPricemodel)
anova(ListPricemodel)
###Takeaways The number of full bathrooms also has the greatest impact on list price. ##Effect of Neighborhood Rank on list price and sale price
ListVsSalePricewithrankmodel <-lm(list ~ sale + neighborhood, data = homes)
SalesVsListPricewithrankmodel <-lm(sale ~ list + neighborhood, data = homes)
anova(ListVsSalePricewithrankmodel)
anova(SalesVsListPricewithrankmodel)
###Overall A real estate agent should focus primarily on the number of full bathrooms as that has the greatest impact on how much a buyer is willing to pay for a house.In addition, based on the effect of neighborhood rank on sale and list price, it is less likely that a richer neighborhood will have a house go over the asking price.