Using R, create a simple linear regression model and test its assumptions.
# Loading data
url <- 'https://github.com/dcorrea614/MSDS/raw/master/cereal.csv'
cereal <- read.csv(url)
# linear model
lm_cereal <- lm(cereal$rating ~ cereal$calories)
summary(lm_cereal)
##
## Call:
## lm(formula = cereal$rating ~ cereal$calories)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.7201 -7.9317 -0.6678 5.9902 23.4161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.78802 6.55057 14.623 < 2e-16 ***
## cereal$calories -0.49701 0.06031 -8.241 4.14e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.24 on 75 degrees of freedom
## Multiple R-squared: 0.4752, Adjusted R-squared: 0.4682
## F-statistic: 67.92 on 1 and 75 DF, p-value: 4.14e-12
# plotting
plot(cereal$rating ~ cereal$calories)
abline(lm_cereal)
m <- -0.49701
b <- 95.78802
# creating a predicted value vector
cereal$rating_predicted <- b + m * cereal$rating
cereal$residuals <- cereal$rating - cereal$rating_predicted
# graphing
hist(cereal$residuals)