library(data.table)

Question 3

size_data<-read.csv("/Users/lizafurman/Downloads/hprices.csv")

a. Centered variables

mean1 <- mean(size_data$price, na.rm = TRUE)
size_data$centered_price <- size_data$price - mean1
mean2 <- mean(size_data$sqrft, na.rm = TRUE)
size_data$centered_sqrft <- size_data$sqrft - mean2

b. Simple Linear regression

model <- lm(price ~ sqrft, data = size_data)
b_0 <- coef(model) [1]
b_1 <- coef(model) [2]
se_b0 <- summary(model)$coefficients [1,2]
se_b1 <- summary(model)$coefficients [2,2]
r2 <- summary(model)$r.squared
b_0
## (Intercept) 
##    12315.18
b_1
##    sqrft 
## 139.9854
se_b0
## [1] 17979.88
se_b1
## [1] 8.195431
r2
## [1] 0.626415
centered_model<- lm(centered_price ~ centered_sqrft, data = size_data)
centered_b_0 <- coef(centered_model) [1]
centered_b_1 <- coef(centered_model) [2]
centered_se_b0 <- summary(centered_model)$coefficients [1,2]
centered_se_b1 <- summary(centered_model)$coefficients [2,2]
centered_r2 <- summary(centered_model)$r.squared
centered_b_0
##   (Intercept) 
## -1.934118e-11
centered_b_1
## centered_sqrft 
##       139.9854
centered_se_b0
## [1] 4782.641
centered_se_b1
## [1] 8.195431
centered_r2
## [1] 0.626415

c.Graphs

plot(size_data$sqrft, size_data$price,
     main = "Price&Sqrft",
     xlab = "Sqrft", ylab = "Price")
abline(model, col = "pink")

plot(size_data$centered_sqrft, size_data$centered_price,
     main = "Centered Price&Centered Sqrft",
     xlab = "Centered Sqrft", ylab = "Centered Price")
abline(model, col = "purple")

When the variables are centered in a model where X is apartment size and Y is price, subtracting the means only shifts the scatterplot and the regression line on the “size–price” graph without changing their shape or slope. Since the covariance between size and price and the variance of size stay the same, the slope coefficient β₁ and the coefficient of determination R² do not change.