YouTube “Multiple Regression” videos

Home Price Data

home_prices <- data.frame(
  price=c(145,69.9,315,144.9,134.9,369,95,228.9,149,295,388.5,75,130,174,334.9),
  sqft=c(1872,1954,4104,1524,1297,3278,1192,2252,1620,2466,3188,1061,1195,1552,2901),
  exempHS=factor(c(0,0,1,0,0,1,0,1,0,1,1,0,0,1,1)))
home_prices
##    price sqft exempHS
## 1  145.0 1872       0
## 2   69.9 1954       0
## 3  315.0 4104       1
## 4  144.9 1524       0
## 5  134.9 1297       0
## 6  369.0 3278       1
## 7   95.0 1192       0
## 8  228.9 2252       1
## 9  149.0 1620       0
## 10 295.0 2466       1
## 11 388.5 3188       1
## 12  75.0 1061       0
## 13 130.0 1195       0
## 14 174.0 1552       1
## 15 334.9 2901       1

Plotting sqft vs price

library(ggplot2)
qplot(sqft, price, data=home_prices, color=exempHS)

Regression analysis

fit <- lm(price ~ ., data=home_prices)
summary(fit)
## 
## Call:
## lm(formula = price ~ ., data = home_prices)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -78.45 -27.26  16.22  28.04  64.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 27.07494   33.68996   0.804   0.4372  
## sqft         0.06207    0.02032   3.054   0.0100 *
## exempHS1    98.64787   35.96319   2.743   0.0178 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.65 on 12 degrees of freedom
## Multiple R-squared:  0.857,  Adjusted R-squared:  0.8331 
## F-statistic: 35.94 on 2 and 12 DF,  p-value: 8.569e-06

Graphing regression lines

g <- ggplot(home_prices, aes(x=sqft, y=price, color=exempHS))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g