Problem 3

cab = read.csv("https://raw.githubusercontent.com/pengdsci/MAT325/main/w11/cab-dataset.txt")
timeHours = cab$TimeMin/60 # x-coordinate: converted to hours
pickupCount = cab$PickupCount # y-coordinate
n = length(pickupCount)
X = cbind(rep(1,n), timeHours)
Y = cbind(pickupCount)
beta = round(solve(t(X)%*%X)%*%(t(X)%*%Y),4)
library(pander)
## Warning: package 'pander' was built under R version 4.2.3
pander(beta)
  pickupCount
16.9
timeHours 1.395

y=1.395x+16.9 where x represents the duration and y represents the number of pickups

Y = matrix(c(pickupCount), ncol=1)
cabs = c(timeHours)
X = as.matrix(cbind(intercept = rep(1, length(Y)), time = cabs, time.sq = cabs**2, time.cu = cabs**3))
beta = solve(t(X)%*%X)%*%(t(X)%*%Y)
pander(round(beta,4))
intercept 41.34
time -9.159
time.sq 1.016
time.cu -0.0267

y= 41.34 + -9.159x+1.016x2-.0267x3

plot(timeHours, pickupCount)

curve(1.395*x+16.9)

curve(-.0267*x**3+1.016*x**2-9.159*x+41.34)

cab = read.csv("https://raw.githubusercontent.com/pengdsci/MAT325/main/w11/cab-dataset.txt")
timeHours = cab$TimeMin/60 # x-coordinate: converted to hours
pickupCount = cab$PickupCount # y-coordinate

par(mfrow = c(1,2)) # setting up the layout of the graphical page
plot(timeHours, pickupCount, pch = 19, col = "blue", main = "")
curve(-.0267*x**3+1.016*x**2-9.159*x+41.34, add = TRUE)
curve(1.395*x+16.9, add = TRUE)

The cubic approximation is a better representation as it follows the curve of the data. The linear approximation does not account for the data curving but it does go up in a positive upwards trend in accordance with the data.