library(tidyverse)
beer <- read_csv(file = 'C:/DATA 101. MC/Data 101/beers.csv')
beer <- beer %>% 
  rename(alc='Alcohol Content', cal='Calories in 12 oz') %>%
  mutate(alc = alc*100)
glimpse(beer)
## Rows: 10
## Columns: 5
## $ Brand   <chr> "Big Sky Scape Goat Pale Ale", "Sierra Nevada Harvest Ale", "S…
## $ Brewery <chr> "Big Sky Brewing", "Sierra Nevada", "MillerCoors", "Anheuser B…
## $ Carbs   <dbl> 13.9, 19.3, 16.0, 13.3, 5.3, 15.0, 13.7, 13.3, 11.4, 11.8
## $ alc     <dbl> 4.70, 6.70, 8.10, 0.40, 4.15, 5.10, 5.00, 5.00, 4.70, 6.20
## $ cal     <dbl> 163, 215, 222, 70, 104, 162, 158, 155, 158, 195
names(beer)
## [1] "Brand"   "Brewery" "Carbs"   "alc"     "cal"
knitr:: kable(beer)
Brand Brewery Carbs alc cal
Big Sky Scape Goat Pale Ale Big Sky Brewing 13.9 4.70 163
Sierra Nevada Harvest Ale Sierra Nevada 19.3 6.70 215
Steel Reserve MillerCoors 16.0 8.10 222
O’Doul’s Anheuser Busch 13.3 0.40 70
Coors Light MillerCoors 5.3 4.15 104
Genesee Cream Ale High Falls Brewing 15.0 5.10 162
Sierra Nevada Summerfest Beer Sierra Nevada 13.7 5.00 158
Michelob Beer Anheuser Busch 13.3 5.00 155
Flying Dog Doggie Style Flying Dog Brewery 11.4 4.70 158
Big Sky I.P.A. Big Sky Brewing 11.8 6.20 195
plot(beer$alc,beer$cal,pch=20, col ="red",cex=2, xlb ="% of Alcohol", ylab ="caloris", 
     main="Beer Calories vs. Alcohol Content")
## Warning in plot.window(...): "xlb" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "xlb" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "xlb" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "xlb" is not a
## graphical parameter
## Warning in box(...): "xlb" is not a graphical parameter
## Warning in title(...): "xlb" is not a graphical parameter
abline(lm(beer$cal~beer$alc,data=beer),lwd=2)

par(mfrow=c(1,2)) # creates a side by side box plot
boxplot(beer$cal, col = "yellow", main = "calories")
boxplot(beer$alc, col = "red", main = "Alcohol")

beer <- beer %>% filter(Brand != "O'Doul's")
lin_model <- lm(formula = cal ~ alc, data = beer)
lin_model
## 
## Call:
## lm(formula = cal ~ alc, data = beer)
## 
## Coefficients:
## (Intercept)          alc  
##       25.03        26.32
predict(lin_model)
##        1        2        3        4        5        6        7        8 
## 148.7287 201.3659 238.2120 134.2535 159.2561 156.6243 156.6243 148.7287 
##        9 
## 188.2066
res <- residuals(lin_model)
res
##          1          2          3          4          5          6          7 
##  14.271307  13.634092 -16.211959 -30.253458   2.743864   1.375725  -1.624275 
##          8          9 
##   9.271307   6.793396
plot(beer$alc, res, ylab = 'Residual', xlab = 'Alcohol %', pch = 20, cex = 2 ,
     main = "Residual")
abline(0, 0, lwd=2)

summary(lin_model)
## 
## Call:
## lm(formula = cal ~ alc, data = beer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.253  -1.624   2.744   9.271  14.271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   25.031     24.999   1.001 0.350038    
## alc           26.319      4.432   5.938 0.000577 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.64 on 7 degrees of freedom
## Multiple R-squared:  0.8344, Adjusted R-squared:  0.8107 
## F-statistic: 35.26 on 1 and 7 DF,  p-value: 0.0005768
correlation_coeff <- cor(beer$alc, beer$cal)
paste0("The coor coef, r", round(correlation_coeff,3))
## [1] "The coor coef, r0.913"
cor.test(beer$cal, beer$alc, conf.level = 0.05)
## 
##  Pearson's product-moment correlation
## 
## data:  beer$cal and beer$alc
## t = 5.9384, df = 7, p-value = 0.0005768
## alternative hypothesis: true correlation is not equal to 0
## 5 percent confidence interval:
##  0.9091008 0.9175836
## sample estimates:
##       cor 
## 0.9134414
predict(lin_model, newdata = list(alc = 7.0), interval = "prediction", level = 0.95)
##        fit      lwr      upr
## 1 209.2615 167.2978 251.2252