Project1_HomePrices
Project 1 - Home Prices
library(dplyr) library(ggplot2)
homesData = read.csv(‘homeprice.csv’)
Home_Sale_Price = “sale”
specs1 = select(homesData, -sale)
Scatter Plot comparison
for (feature in names(homesData)) { if (feature != Home_Sale_Price) { scatcomp1 = ggplot(homesData, aes_string(x = feature, y = Home_Sale_Price)) + geom_point(color = “darkorange”) + geom_smooth(method = lm) labs( title = paste(“Scatter Plot of”, Home_Sale_Price, “vs”, feature), x = feature, y = Home_Sale_Price )
print(scatcomp1)
} }
histograms
HD = homesData
for (feature in names(HD)) { if (is.numeric(HD[[feature]])) {
# Create the histogram
hist1 = ggplot(HD, aes_string(x = feature)) +
geom_histogram(binwidth = 1, fill = "darkred", color = "black") +
labs(
title = paste("Histogram of", feature),
x = feature,
y = "Frequency"
) +
theme_minimal()
print(hist1)
} }
Box Plots
HD2 = homesData
for (feature in names(HD2)) { if (is.numeric(HD2[[feature]])) { # Only plot numeric columns
box1 = ggplot(HD2, aes_string(y = feature)) +
geom_boxplot(fill = "darkgreen", color = "black") +
labs(title = paste("Boxplot of", feature), y = feature) +
theme_minimal()
print(box1)
} }
Correlation Tests with ‘sale’
salefullcorr1 = cor.test(homesData\(sale, homesData\)full) salefullcorr1
salehalfcorr1 = cor.test(homesData\(sale, homesData\)half) salehalfcorr1
salebedcorr1 = cor.test(homesData\(sale, homesData\)bedrooms) salebedcorr1
saleroomcorr1 = cor.test(homesData\(sale, homesData\)rooms) saleroomcorr1
salenbhcorr1 = cor.test(homesData\(sale, homesData\)neighborhood) salenbhcorr1
###Multiple Linear Regression Model - sale###
Home_Sale_Price = “sale”
HSall = select(homesData, sale, list, rooms, bedrooms, neighborhood)
predictors = setdiff(names(HSall), Home_Sale_Price)
linregform1 = as.formula(paste(Home_Sale_Price, “~”, paste(predictors, collapse = “+”)))
multilinModel = lm(linregform1, data = homesData)
linregsumm = summary(multilinModel)
leverage = anova(multilinModel)
leverage
Correlation Tests with ‘list’
listfullcorr1 = cor.test(homesData\(list, homesData\)full) listfullcorr1
listhalfcorr1 = cor.test(homesData\(list, homesData\)half) listhalfcorr1
listbedcor1 = cor.test(homesData\(list, homesData\)bedrooms) listbedcor1
listroomcor1 = cor.test(homesData\(list, homesData\)rooms) listroomcor1
listnbhcor1 = cor.test(homesData\(list, homesData\)neighborhood) listnbhcor1
###Multiple Linear Regression Model - list###
Home_List_Price = “list”
predictors2 = setdiff(names(HSall), Home_List_Price)
linregform2 = as.formula(paste(Home_List_Price, “~”, paste(predictors2, collapse = “+”)))
multilinModel2 = lm(linregform2, data = homesData)
linregsumm2 = summary(multilinModel2)
leverage2 = anova(multilinModel2)
leverage2
Neighborhood Influence on Housing Sale Price and List Price
Correlations for neighborhood v sale & list
salelistnbh = select(homesData, sale, list, neighborhood)
NBH = “neighborhood”
Scatter Plot comparison
for (var in names(salelistnbh)) { # Skip the comparison of the variable with itself if (var != NBH) { # Create a scatter plot comparing target_var (y-axis) with var (x-axis) scatcomp_NBH = ggplot(homesData, aes_string(x = var, y = NBH)) + geom_point(color = “darkorange”) + geom_smooth(method = lm) labs( title = paste(“Scatter Plot of”, NBH, “vs”, var), x = var, y = NBH )
# Print the plot
print(scatcomp_NBH)
} }
Correlation of neighborhood rank to sale price
NBHsalecorr = cor.test(salelistnbh\(neighborhood, salelistnbh\)sale) NBHsalecorr
Correlation of neighborhood rank to sale price
NBHlistcorr = cor.test(salelistnbh\(neighborhood, salelistnbh\)list) NBHlistcorr
Scatterplot of sale price to list price according to neighborhood rank
salelistscat = ggplot(salelistnbh, aes(x = list, y = sale, col = neighborhood)) + geom_point() print(salelistscat)
mean sale and list prices for different neighborhood ranks
meansale1 = homesData %>% group_by(neighborhood) %>% summarise(meansaleprice = mean(sale, na.rm = TRUE)) %>% print()
meanlist1 = homesData %>% group_by(neighborhood) %>% summarise(meanlistprice = mean(list, na.rm = TRUE)) %>% print()
meandiff = homesData %>% group_by(neighborhood) %>% summarise((meansaleprice = mean(sale, na.rm = TRUE)) - (meanlistprice = mean(list, na.rm = TRUE))) %>% print()