Comparing Home Prices to Different Variables

homes <- read.csv("homeprice.csv")
library(ggplot2)

#Scatter Plots with Trend Lines ##Scatter Plot Comparing Sales Price to List Price

ggplot(homes, aes(x = list, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price vs List Price",
       x = "List Price",
       y = "Sale Price") +
  theme_minimal()

##Scatter Plot Comparing Sales Price to Number of Full Bathrooms

ggplot(homes, aes(x = full, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price by Number of Full Bathrooms",
       x = "Number of Full Bathrooms",
       y = "Sale Price") +
  theme_minimal()

##Scatter Plot comparing Sale Price to Number of Half Baths

ggplot(homes, aes(x = half, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price by Number of Half Bathrooms",
       x = "Number of Full Bathrooms",
       y = "Sale Price") +
  theme_minimal()

##Scatter Plot Comparing Sale Price to Number of Bedrooms

ggplot(homes, aes(x = bedrooms, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price by Number of Bedrooms",
       x = "Number of Bedrooms",
       y = "Sale Price") +
  theme_minimal()

##Scatter Plot Comparing Sale Price to Number of non-Bedrooms

ggplot(homes, aes(x = rooms, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price by Number of Non-Bedrooms",
       x = "Number of Non-Bedrooms",
       y = "Sale Price") +
  theme_minimal()

##Scatter Plot Comparing Sale Price to Neighborhood Rank

ggplot(homes, aes(x = neighborhood, y = sale)) +
  geom_point(color = "darkblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Sale Price by Neighborhood Rank",
       x = "Neighborhood Rank",
       y = "Sale Price") +
  theme_minimal()

###Takeaways Slight postive Correlations between Sale Price and Number of Half-baths, Full Baths, and Bedrooms; considerably stronger positive correlations between sale price and number of non-bedrooms, neighborhood rank, and list price. ##Linear Regression Model to determine which variable has the greatest impact on sale price

SalePricemodel <- lm(sale ~ full + half + bedrooms + rooms + neighborhood, data = homes)
summary(SalePricemodel)
anova(SalePricemodel)

###Takeaways According to the ANOVA test, the number of full bathrooms has the greatest impact on sale price. ##Linear Regression Model to determine which variable has the greatest impact on list price

ListPricemodel <- lm(list ~ full + half + bedrooms + rooms + neighborhood, data = homes)
summary(ListPricemodel)
anova(ListPricemodel)

###Takeaways The number of full bathrooms also has the greatest impact on list price. ##Effect of Neighborhood Rank on list price and sale price

ListVsSalePricewithrankmodel <-lm(list ~ sale + neighborhood, data = homes)
SalesVsListPricewithrankmodel <-lm(sale ~ list + neighborhood, data = homes)
anova(ListVsSalePricewithrankmodel)
anova(SalesVsListPricewithrankmodel)

###Overall A real estate agent should focus primarily on the number of full bathrooms as that has the greatest impact on how much a buyer is willing to pay for a house.In addition, based on the effect of neighborhood rank on sale and list price, it is less likely that a richer neighborhood will have a house go over the asking price.