UPDATED: FALL 2021
Data is a sample of 30 breakfast cereals.
cereal<-read.csv("https://raw.githubusercontent.com/kitadasmalley/MATH138/main/HAWKES/Data/cerealDat.csv",
header=TRUE)
str(cereal)
## 'data.frame': 77 obs. of 15 variables:
## $ Shelf : Factor w/ 3 levels "Bottom","Middle",..: 3 3 3 3 3 1 2 3 1 3 ...
## $ Name : Factor w/ 77 levels "100%_Bran","100%_Natural_Bran",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Manufacturer : Factor w/ 7 levels "A","G","K","N",..: 4 6 3 3 7 2 3 2 7 5 ...
## $ Type : Factor w/ 2 levels "C","H": 1 1 1 1 1 1 1 1 1 1 ...
## $ Calories : int 70 120 70 50 110 110 110 130 90 90 ...
## $ Protein : int 4 3 4 4 2 2 2 3 2 3 ...
## $ Fat : int 1 5 1 0 2 2 0 2 1 0 ...
## $ Sodium : int 130 15 260 140 200 180 125 210 200 210 ...
## $ Fiber : num 10 2 9 14 1 1.5 1 2 4 5 ...
## $ Carbohydrates : num 5 8 7 8 14 10.5 11 18 15 13 ...
## $ Sugars : int 6 8 5 0 8 10 14 8 6 5 ...
## $ Potassium : int 280 135 320 330 NA 70 30 100 125 190 ...
## $ Vitamins : int 25 0 25 25 25 25 25 25 25 25 ...
## $ Weight..of.One.Serving.Cup.: num 1 1 1 1 1 1 1 1.33 1 1 ...
## $ Cups.in.Serving : num 0.33 1 0.33 0.5 0.75 0.75 1 0.75 0.67 0.67 ...
It is commonly thought that cereals are displayed in certain shelf locations at a market to draw the attention of children. Make a hypothesis about shelf location (Shelf) and the sugar content in a serving of cereal.
library(ggplot2)
ggplot(cereal, aes(x=Shelf, y=Sugars, fill=Shelf))+
geom_boxplot()
mod<-lm(Sugars~Shelf, data=cereal)
anova(mod)
## Analysis of Variance Table
##
## Response: Sugars
## Df Sum Sq Mean Sq F value Pr(>F)
## Shelf 2 220.23 110.117 6.6013 0.002316 **
## Residuals 73 1217.71 16.681
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(tidyverse)
ggplot(cereal, aes(x=Sugars, y=Calories))+
geom_point()+
theme_bw()
ggplot(cereal, aes(x=Sugars, y=Calories))+
geom_point()+
geom_smooth(method="lm", se=FALSE)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
mod<-lm(Calories~Sugars, data=cereal)
summary(mod)
##
## Call:
## lm(formula = Calories ~ Sugars, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.158 -9.585 0.486 11.441 37.879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89.1578 3.5429 25.165 < 2e-16 ***
## Sugars 2.5356 0.4287 5.914 9.58e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.26 on 74 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.321, Adjusted R-squared: 0.3118
## F-statistic: 34.98 on 1 and 74 DF, p-value: 9.581e-08
anova(mod)
## Analysis of Variance Table
##
## Response: Calories
## Df Sum Sq Mean Sq F value Pr(>F)
## Sugars 1 9244.9 9244.9 34.977 9.581e-08 ***
## Residuals 74 19559.0 264.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(mod)
summary(mod)
##
## Call:
## lm(formula = Calories ~ Sugars, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.158 -9.585 0.486 11.441 37.879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89.1578 3.5429 25.165 < 2e-16 ***
## Sugars 2.5356 0.4287 5.914 9.58e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.26 on 74 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.321, Adjusted R-squared: 0.3118
## F-statistic: 34.98 on 1 and 74 DF, p-value: 9.581e-08
2.5356 + c(-1, 1)*qt(0.975, df=74)*0.4287
## [1] 1.681397 3.389803
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 82.098338 96.217243
## Sugars 1.681327 3.389863
pairs(Calories~Sugars+Fat+Protein+Carbohydrates, data=cereal)
GGally
library(GGally)
ggpairs(cereal[,c(5, 11, 7, 6, 10)])
mod2<-lm(Calories~Sugars+Carbohydrates, data=cereal)
summary(mod2)
##
## Call:
## lm(formula = Calories ~ Sugars + Carbohydrates, data = cereal)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.670 -7.790 -1.998 5.136 32.178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.1064 7.1786 4.055 0.000124 ***
## Sugars 3.9575 0.3387 11.683 < 2e-16 ***
## Carbohydrates 3.3819 0.3796 8.909 2.76e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.33 on 73 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6747, Adjusted R-squared: 0.6658
## F-statistic: 75.69 on 2 and 73 DF, p-value: < 2.2e-16
#install.packages("scatterplot3d")
library(scatterplot3d)
s3d<-scatterplot3d(cereal[,c(5, 11,10)])
s3d$plane3d(mod2)