###1 Create a model for beer rating using:a. IBU (bitterness) b. IBU and brewery
beer <- read.csv("http://www.cknudson.com/data/MNbeer.csv")
moda <- lm(Rating ~ IBU, data = beer)
modb <- lm(Rating ~ IBU + Brewery, data = beer)
mod1 <- lm(Rating ~ Brewery, data = beer)
coef(mod1)
## (Intercept) BreweryBent Paddle BreweryFulton
## 86.750000 0.850000 -2.350000
## BreweryIndeed BrewerySteel Toe BrewerySummit
## 1.107143 1.250000 -1.321429
## BrewerySurly BreweryUrban Growler
## 5.392857 -2.950000
mod0 <- lm(Rating ~ 1, data = beer)
coef(mod0)
## (Intercept)
## 87.18182
anova(mod1,mod0)
## Analysis of Variance Table
##
## Model 1: Rating ~ Brewery
## Model 2: Rating ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 36 301.38
## 2 43 598.55 -7 -297.17 5.071 0.0004478 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###2. Do a test to see whether IBU has a linear relationship with the beer’s rating
summary(moda)
##
## Call:
## lm(formula = Rating ~ IBU, data = beer)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3822 -2.4669 0.3309 2.0854 9.8856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83.76336 1.36811 61.225 <2e-16 ***
## IBU 0.06694 0.02474 2.706 0.0098 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.484 on 42 degrees of freedom
## Multiple R-squared: 0.1485, Adjusted R-squared: 0.1282
## F-statistic: 7.322 on 1 and 42 DF, p-value: 0.0098
anova(moda)
## Analysis of Variance Table
##
## Response: Rating
## Df Sum Sq Mean Sq F value Pr(>F)
## IBU 1 88.86 88.859 7.3223 0.0098 **
## Residuals 42 509.69 12.135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(moda,modb)
## Analysis of Variance Table
##
## Model 1: Rating ~ IBU
## Model 2: Rating ~ IBU + Brewery
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 42 509.69
## 2 35 246.55 7 263.14 5.3364 0.0003213 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###3. Do a test to see whether the beer’s rating still has a linearship with the bitterness, even after accounting for the brewery.
anova(modb, mod1)
## Analysis of Variance Table
##
## Model 1: Rating ~ IBU + Brewery
## Model 2: Rating ~ Brewery
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 35 246.55
## 2 36 301.38 -1 -54.829 7.7835 0.008475 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Thursday problem
lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
#1a
with(lifedata, plot (MaleLife~GNP))
#1b
mod2 <- lm(MaleLife~GNP, data = lifedata)
abline(mod2)
#1c
summary(mod2)
##
## Call:
## lm(formula = MaleLife ~ GNP, data = lifedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.999 -5.388 1.222 5.840 12.553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.694e+01 9.647e-01 59.03 < 2e-16 ***
## GNP 7.728e-04 9.758e-05 7.92 6.35e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.492 on 89 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.4134, Adjusted R-squared: 0.4068
## F-statistic: 62.72 on 1 and 89 DF, p-value: 6.345e-12
# R^2 of this model is 0.434 this means that the model accounts
lifedata$MaleLife <- (lifedata$GNP)^2
#2
lifedata$logGNP <- log(lifedata$GNP)
mod3 <- lm(MaleLife ~ logGNP, data = lifedata)
coef(mod3)
## (Intercept) logGNP
## -559800267 87537661
est <- coef(mod3)[1] + coef(mod3)[2] * (log(5000))
est
## (Intercept)
## 185774906
summary(mod3)
##
## Call:
## lm(formula = MaleLife ~ logGNP, data = lifedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -166364324 -107222957 -25786349 74046096 806613645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -559800267 70603762 -7.929 6.08e-12 ***
## logGNP 87537661 9184094 9.531 2.98e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 143300000 on 89 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.5051, Adjusted R-squared: 0.4996
## F-statistic: 90.85 on 1 and 89 DF, p-value: 2.98e-15
### plot 2 model
plot(mod2)
plot(mod3)
## Including Plots
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.