Project1_HomePrices

Author

S.Shank

Project 1 - Home Prices

library(dplyr) library(ggplot2)

homesData = read.csv(‘homeprice.csv’)

Home_Sale_Price = “sale”

specs1 = select(homesData, -sale)

Scatter Plot comparison

for (feature in names(homesData)) { if (feature != Home_Sale_Price) { scatcomp1 = ggplot(homesData, aes_string(x = feature, y = Home_Sale_Price)) + geom_point(color = “darkorange”) + geom_smooth(method = lm) labs( title = paste(“Scatter Plot of”, Home_Sale_Price, “vs”, feature), x = feature, y = Home_Sale_Price )

print(scatcomp1)

} }

histograms

HD = homesData

for (feature in names(HD)) { if (is.numeric(HD[[feature]])) {

# Create the histogram
hist1 = ggplot(HD, aes_string(x = feature)) +
  geom_histogram(binwidth = 1, fill = "darkred", color = "black") +
  labs(
    title = paste("Histogram of", feature),
    x = feature,
    y = "Frequency"
  ) +
  theme_minimal()

print(hist1)

} }

Box Plots

HD2 = homesData

for (feature in names(HD2)) { if (is.numeric(HD2[[feature]])) { # Only plot numeric columns

box1 = ggplot(HD2, aes_string(y = feature)) +
  geom_boxplot(fill = "darkgreen", color = "black") +
  labs(title = paste("Boxplot of", feature), y = feature) +
  theme_minimal()

print(box1)

} }

Correlation Tests with ‘sale’

salefullcorr1 = cor.test(homesData\(sale, homesData\)full) salefullcorr1

salehalfcorr1 = cor.test(homesData\(sale, homesData\)half) salehalfcorr1

salebedcorr1 = cor.test(homesData\(sale, homesData\)bedrooms) salebedcorr1

saleroomcorr1 = cor.test(homesData\(sale, homesData\)rooms) saleroomcorr1

salenbhcorr1 = cor.test(homesData\(sale, homesData\)neighborhood) salenbhcorr1

###Multiple Linear Regression Model - sale###

Home_Sale_Price = “sale”

HSall = select(homesData, sale, list, rooms, bedrooms, neighborhood)

predictors = setdiff(names(HSall), Home_Sale_Price)

linregform1 = as.formula(paste(Home_Sale_Price, “~”, paste(predictors, collapse = “+”)))

multilinModel = lm(linregform1, data = homesData)

linregsumm = summary(multilinModel)

leverage = anova(multilinModel)

leverage

Correlation Tests with ‘list’

listfullcorr1 = cor.test(homesData\(list, homesData\)full) listfullcorr1

listhalfcorr1 = cor.test(homesData\(list, homesData\)half) listhalfcorr1

listbedcor1 = cor.test(homesData\(list, homesData\)bedrooms) listbedcor1

listroomcor1 = cor.test(homesData\(list, homesData\)rooms) listroomcor1

listnbhcor1 = cor.test(homesData\(list, homesData\)neighborhood) listnbhcor1

###Multiple Linear Regression Model - list###

Home_List_Price = “list”

predictors2 = setdiff(names(HSall), Home_List_Price)

linregform2 = as.formula(paste(Home_List_Price, “~”, paste(predictors2, collapse = “+”)))

multilinModel2 = lm(linregform2, data = homesData)

linregsumm2 = summary(multilinModel2)

leverage2 = anova(multilinModel2)

leverage2

Neighborhood Influence on Housing Sale Price and List Price

Correlations for neighborhood v sale & list

salelistnbh = select(homesData, sale, list, neighborhood)

NBH = “neighborhood”

Scatter Plot comparison

for (var in names(salelistnbh)) { # Skip the comparison of the variable with itself if (var != NBH) { # Create a scatter plot comparing target_var (y-axis) with var (x-axis) scatcomp_NBH = ggplot(homesData, aes_string(x = var, y = NBH)) + geom_point(color = “darkorange”) + geom_smooth(method = lm) labs( title = paste(“Scatter Plot of”, NBH, “vs”, var), x = var, y = NBH )

# Print the plot
print(scatcomp_NBH)

} }

Correlation of neighborhood rank to sale price

NBHsalecorr = cor.test(salelistnbh\(neighborhood, salelistnbh\)sale) NBHsalecorr

Correlation of neighborhood rank to sale price

NBHlistcorr = cor.test(salelistnbh\(neighborhood, salelistnbh\)list) NBHlistcorr

Scatterplot of sale price to list price according to neighborhood rank

salelistscat = ggplot(salelistnbh, aes(x = list, y = sale, col = neighborhood)) + geom_point() print(salelistscat)

mean sale and list prices for different neighborhood ranks

meansale1 = homesData %>% group_by(neighborhood) %>% summarise(meansaleprice = mean(sale, na.rm = TRUE)) %>% print()

meanlist1 = homesData %>% group_by(neighborhood) %>% summarise(meanlistprice = mean(list, na.rm = TRUE)) %>% print()

meandiff = homesData %>% group_by(neighborhood) %>% summarise((meansaleprice = mean(sale, na.rm = TRUE)) - (meanlistprice = mean(list, na.rm = TRUE))) %>% print()