YouTube “Multiple Regression” videos
Route Delivery Schedule is as followed:
rds <- data.frame(miles=c(89,66,78,111,44,77,80,66,109,76),
numDelivers=c(4,1,3,6,1,3,3,2,5,3),
gasPrice=c(3.84,3.19,3.78,3.89,3.57,3.57,3.03,3.51,3.54,3.25),
travelTime=c(7,5.4,6.6,7.4,4.8,6.4,7,5.6,7.3,6.4))
rds
## miles numDelivers gasPrice travelTime
## 1 89 4 3.84 7.0
## 2 66 1 3.19 5.4
## 3 78 3 3.78 6.6
## 4 111 6 3.89 7.4
## 5 44 1 3.57 4.8
## 6 77 3 3.57 6.4
## 7 80 3 3.03 7.0
## 8 66 2 3.51 5.6
## 9 109 5 3.54 7.3
## 10 76 3 3.25 6.4
Calculate all linear models with 2 or 3 dependent variables
fit.m <- lm(travelTime ~ miles, data=rds)
fit.d <- lm(travelTime ~ numDelivers, data=rds)
fit.g <- lm(travelTime ~ gasPrice, data=rds)
fit.md <- lm(travelTime ~ miles + numDelivers, data=rds)
fit.mg <- lm(travelTime ~ miles + gasPrice, data=rds)
fit.dg <- lm(travelTime ~ numDelivers + gasPrice, data=rds)
fit.mdg <- lm(travelTime ~ miles + numDelivers + gasPrice, data=rds)
Show all linear models
data <- sapply(list(fit.m, fit.d, fit.g, fit.md, fit.mg, fit.dg, fit.mdg), function(f) {
s <- summary(f)
formula_name <- gsub("travelTime ~ ", "", deparse(f$call$formula))
p_value <- pf(s$fstat[1], s$fstat[2], s$fstat[3], lower.tail=F)
list(name=formula_name,
fstat=s$fstatistic[1],
pvalue=p_value,
sigma=s$sigma,
adj.rsq=s$adj.r.squared)
})
t(data)
## name fstat pvalue sigma
## [1,] "miles" 49.76813 0.0001066757 0.3423088
## [2,] "numDelivers" 41.95894 0.0001926088 0.3680914
## [3,] "gasPrice" 0.6151381 0.4554534 0.8864028
## [4,] "miles + numDelivers" 23.71607 0.0007626921 0.3526424
## [5,] "miles + gasPrice" 22.63189 0.0008793061 0.3598834
## [6,] "numDelivers + gasPrice" 27.63499 0.0004762859 0.329703
## [7,] "miles + numDelivers + gasPrice" 16.99052 0.002452078 0.3446936
## adj.rsq
## [1,] 0.8442047
## [2,] 0.8198521
## [3,] -0.04467275
## [4,] 0.8346565
## [5,] 0.8277966
## [6,] 0.8554681
## [7,] 0.8420264