Data is a sample of 30 breakfast cereals.
cereal<-read.delim("https://www.lock5stat.com/datasets/Cereal.txt",
header=TRUE)
head(cereal)
## Name Company Serving Calories Fat Sodium Carbs Fiber Sugars
## 1 AppleJacks K 1.00 117 0.6 143 27 0.5 15.0
## 2 Boo Berry G 1.00 118 0.8 211 27 0.1 14.0
## 3 Cap'n Crunch Q 0.75 144 2.1 269 31 1.1 16.0
## 4 Cinnamon Toast Crunch G 0.75 169 4.4 408 32 1.7 13.3
## 5 Cocoa Blasts Q 1.00 130 1.2 135 29 0.8 16.0
## 6 Cocoa Puffs G 1.00 117 1.0 171 26 0.8 14.0
## Protein
## 1 1.0
## 2 1.0
## 3 1.3
## 4 2.7
## 5 1.0
## 6 1.0
library(tidyverse)
ggplot(cereal, aes(x=Sugars, y=Calories))+
geom_point()+
theme_bw()
ggplot(cereal, aes(x=Sugars, y=Calories))+
geom_point()+
geom_smooth(method="lm", se=FALSE)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
mod<-lm(Calories~Sugars, data=cereal)
summary(mod)
##
## Call:
## lm(formula = Calories ~ Sugars, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.574 -25.282 -2.549 17.796 51.805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 88.9204 10.8120 8.224 5.96e-09 ***
## Sugars 4.3103 0.9269 4.650 7.22e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.61 on 28 degrees of freedom
## Multiple R-squared: 0.4357, Adjusted R-squared: 0.4156
## F-statistic: 21.62 on 1 and 28 DF, p-value: 7.217e-05
anova(mod)
## Analysis of Variance Table
##
## Response: Calories
## Df Sum Sq Mean Sq F value Pr(>F)
## Sugars 1 15316 15316.5 21.623 7.217e-05 ***
## Residuals 28 19834 708.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqnorm(mod$residuals)
qqline(mod$residuals)
ggplot(data=cereal, aes(x=cereal$Sugar, y=mod$residuals))+
geom_point()+
ggtitle("Residual Plot")+
theme_bw()+
geom_hline(yintercept = 0,
color="blue", lty=2, lwd=1)
## Warning: Use of `cereal$Sugar` is discouraged. Use `Sugar` instead.
summary(mod)
##
## Call:
## lm(formula = Calories ~ Sugars, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.574 -25.282 -2.549 17.796 51.805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 88.9204 10.8120 8.224 5.96e-09 ***
## Sugars 4.3103 0.9269 4.650 7.22e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.61 on 28 degrees of freedom
## Multiple R-squared: 0.4357, Adjusted R-squared: 0.4156
## F-statistic: 21.62 on 1 and 28 DF, p-value: 7.217e-05
4.310 + c(-1, 1)*qt(0.975, 28)*0.9269
## [1] 2.411331 6.208669
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 66.772987 111.067837
## Sugars 2.411535 6.208987
pairs(Calories~Sugars+Fat+Protein+Carbs, data=cereal)
mod2<-lm(Calories~Sugars+Carbs, data=cereal)
summary(mod2)
##
## Call:
## lm(formula = Calories ~ Sugars + Carbs, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.763 -6.711 -1.976 2.273 28.015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4568 9.1821 0.485 0.631
## Sugars 0.6528 0.5323 1.226 0.231
## Carbs 4.1317 0.3834 10.776 2.79e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.77 on 27 degrees of freedom
## Multiple R-squared: 0.8935, Adjusted R-squared: 0.8857
## F-statistic: 113.3 on 2 and 27 DF, p-value: 7.355e-14
#install.packages("scatterplot3d")
library(scatterplot3d)
s3d<-scatterplot3d(cereal[,c(9, 7,4)])
s3d$plane3d(mod2)