YouTube “Multiple Regression” videos

Home Price Data

E <- 0
S <- 2
W <- 3
N <- 1

home_prices <- data.frame(
  price=c(450,398,412,307,289,310,245,260,290,332,377,322,383,404,371),
  sqft=c(3860,3787,3681,3643,3601,3485,3401,3312,3213,3207,3124,3109,3076,3073,3052),
  exempHS=factor(c(1,1,1,1,1,1,1,0,1,1,1,1,1,1,1)),
  region=factor(c(S,N,S,N,E,S,W,N,W,S,S,S,E,S,S)))
home_prices
##    price sqft exempHS region
## 1    450 3860       1      2
## 2    398 3787       1      1
## 3    412 3681       1      2
## 4    307 3643       1      1
## 5    289 3601       1      0
## 6    310 3485       1      2
## 7    245 3401       1      3
## 8    260 3312       0      1
## 9    290 3213       1      3
## 10   332 3207       1      2
## 11   377 3124       1      2
## 12   322 3109       1      2
## 13   383 3076       1      0
## 14   404 3073       1      2
## 15   371 3052       1      2

Scatter plot of sqft vs price with exemplary high school

library(ggplot2)
g <- ggplot(home_prices, aes(x=sqft, y=price, color=exempHS))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g

Scatter plot of sqft vs price with region

g <- ggplot(home_prices, aes(x=sqft, y=price, color=region))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g

Regression Model

fit <- lm(price ~ ., data=home_prices)
summary(fit)
## 
## Call:
## lm(formula = price ~ ., data = home_prices)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -68.062 -39.267   8.557  33.834  58.411 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.61239  184.77308   0.745    0.475
## sqft          0.03607    0.05756   0.627    0.546
## exempHS1     77.96322   68.77479   1.134    0.286
## region1       2.91912   57.13357   0.051    0.960
## region2      36.77754   41.80086   0.880    0.402
## region3     -67.36375   52.89474  -1.274    0.235
## 
## Residual standard error: 52.86 on 9 degrees of freedom
## Multiple R-squared:  0.5131, Adjusted R-squared:  0.2427 
## F-statistic: 1.897 on 5 and 9 DF,  p-value: 0.1904