# Examine the data
head(cars) # default dataset within RStudio
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
# Create X and Y values
speed = cars$speed
dist = cars$dist
n = length(speed)
# Plot the data
plot(speed, dist,main="Vehicle Speed vs Distance", xlab = 'Speed', ylab = 'Distance')
# The relationship looks linear, which makes sense since we are talking
# about vehicle data. It seems intuitive to me that data regarding vehicles
# would have some sort of linear relationship. Therefore, I will not be transforming
# the data before applying the regression line.
# Begin creating the regression line
x=speed # Changing the variables so that they can be easily fit into their formulas
y=dist
xbar=mean(x) # This is the mean of X, depicted as capital X with a bar above the letter
ybar=mean(y) # This is the mean of Y, depicted as capital Y with a bar above the letter
SSY=sum((y-ybar)^2) # This is the Sum of Squares for Y
SSX=sum((x-xbar)^2) # This is the Sum of Squares for X
SSXY=sum((x-xbar)*(y-ybar)) # This is the Sum of Squares for X and Y
betahat1=SSXY/SSX # This is the formula for beta hat 1
betahat0=ybar-betahat1*xbar # This is the formula for beta hat 0
# The slope is 3.932409, it is positive and matches the plot
betahat1
## [1] 3.932409
# Perform R squared
R2 = betahat1^2*SSX/SSY
R2 # This measure indicates the level of linearity of the data from 0 - 1
## [1] 0.6510794
# A value of 0.6510794 indicates that the data has linearity
# (I will explain in another lesson)
# Apply the regression line
plot(speed, dist,main="Vehicle Speed vs Distance", xlab = 'Speed', ylab = 'Distance')
abline(betahat0,betahat1)
