Discussion

Using R, create a simple linear regression model and test its assumptions.

# Loading data
url <- 'https://github.com/dcorrea614/MSDS/raw/master/cereal.csv'

cereal <- read.csv(url)

# linear model
lm_cereal <- lm(cereal$rating ~ cereal$calories)
summary(lm_cereal)
## 
## Call:
## lm(formula = cereal$rating ~ cereal$calories)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.7201  -7.9317  -0.6678   5.9902  23.4161 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     95.78802    6.55057  14.623  < 2e-16 ***
## cereal$calories -0.49701    0.06031  -8.241 4.14e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.24 on 75 degrees of freedom
## Multiple R-squared:  0.4752, Adjusted R-squared:  0.4682 
## F-statistic: 67.92 on 1 and 75 DF,  p-value: 4.14e-12
# plotting
plot(cereal$rating ~ cereal$calories)
abline(lm_cereal)

m <- -0.49701
b <- 95.78802

# creating a predicted value vector
cereal$rating_predicted <- b + m * cereal$rating
cereal$residuals <- cereal$rating - cereal$rating_predicted

# graphing
hist(cereal$residuals)