library(data.table)
size_data<-read.csv("/Users/lizafurman/Downloads/hprices.csv")
mean1 <- mean(size_data$price, na.rm = TRUE)
size_data$centered_price <- size_data$price - mean1
mean2 <- mean(size_data$sqrft, na.rm = TRUE)
size_data$centered_sqrft <- size_data$sqrft - mean2
model <- lm(price ~ sqrft, data = size_data)
b_0 <- coef(model) [1]
b_1 <- coef(model) [2]
se_b0 <- summary(model)$coefficients [1,2]
se_b1 <- summary(model)$coefficients [2,2]
r2 <- summary(model)$r.squared
b_0
## (Intercept)
## 12315.18
b_1
## sqrft
## 139.9854
se_b0
## [1] 17979.88
se_b1
## [1] 8.195431
r2
## [1] 0.626415
centered_model<- lm(centered_price ~ centered_sqrft, data = size_data)
centered_b_0 <- coef(centered_model) [1]
centered_b_1 <- coef(centered_model) [2]
centered_se_b0 <- summary(centered_model)$coefficients [1,2]
centered_se_b1 <- summary(centered_model)$coefficients [2,2]
centered_r2 <- summary(centered_model)$r.squared
centered_b_0
## (Intercept)
## -1.934118e-11
centered_b_1
## centered_sqrft
## 139.9854
centered_se_b0
## [1] 4782.641
centered_se_b1
## [1] 8.195431
centered_r2
## [1] 0.626415
plot(size_data$sqrft, size_data$price,
main = "Price&Sqrft",
xlab = "Sqrft", ylab = "Price")
abline(model, col = "pink")
plot(size_data$centered_sqrft, size_data$centered_price,
main = "Centered Price&Centered Sqrft",
xlab = "Centered Sqrft", ylab = "Centered Price")
abline(model, col = "purple")
When the variables are centered in a model where X is apartment size and
Y is price, subtracting the means only shifts the scatterplot and the
regression line on the “size–price” graph without changing their shape
or slope. Since the covariance between size and price and the variance
of size stay the same, the slope coefficient β₁ and the coefficient of
determination R² do not change.