# Examine the data
head(cars) # default dataset within RStudio
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
# Create X and Y values
speed = cars$speed
dist = cars$dist
n = length(speed)

# Plot the data
plot(speed, dist,main="Vehicle Speed vs Distance", xlab = 'Speed', ylab = 'Distance')

# The relationship looks linear, which makes sense since we are talking
# about vehicle data. It seems intuitive to me that data regarding vehicles
# would have some sort of linear relationship. Therefore, I will not be transforming
# the data before applying the regression line.

# Begin creating the regression line
x=speed # Changing the variables so that they can be easily fit into their formulas
y=dist
xbar=mean(x) # This is the mean of X, depicted as capital X with a bar above the letter
ybar=mean(y) # This is the mean of Y, depicted as capital Y with a bar above the letter
SSY=sum((y-ybar)^2) # This is the Sum of Squares for Y
SSX=sum((x-xbar)^2) # This is the Sum of Squares for X
SSXY=sum((x-xbar)*(y-ybar)) # This is the Sum of Squares for X and Y
betahat1=SSXY/SSX # This is the formula for beta hat 1
betahat0=ybar-betahat1*xbar # This is the formula for beta hat 0

# The slope is 3.932409, it is positive and matches the plot
betahat1
## [1] 3.932409
# Perform R squared
R2 = betahat1^2*SSX/SSY
R2 # This measure indicates the level of linearity of the data from 0 - 1
## [1] 0.6510794
# A value of 0.6510794 indicates that the data has linearity
# (I will explain in another lesson)

# Apply the regression line
plot(speed, dist,main="Vehicle Speed vs Distance", xlab = 'Speed', ylab = 'Distance')
abline(betahat0,betahat1)