02/09/2021

###1 Create a model for beer rating using:a. IBU (bitterness) b. IBU and brewery

beer <- read.csv("http://www.cknudson.com/data/MNbeer.csv")
moda <- lm(Rating ~ IBU, data = beer)
modb <- lm(Rating ~ IBU + Brewery, data = beer)
mod1 <- lm(Rating ~ Brewery, data = beer)
coef(mod1)

##          (Intercept)   BreweryBent Paddle        BreweryFulton 
##            86.750000             0.850000            -2.350000 
##        BreweryIndeed     BrewerySteel Toe        BrewerySummit 
##             1.107143             1.250000            -1.321429 
##         BrewerySurly BreweryUrban Growler 
##             5.392857            -2.950000

mod0 <- lm(Rating ~ 1, data = beer)
coef(mod0)

## (Intercept) 
##    87.18182

anova(mod1,mod0)

## Analysis of Variance Table
## 
## Model 1: Rating ~ Brewery
## Model 2: Rating ~ 1
##   Res.Df    RSS Df Sum of Sq     F    Pr(>F)    
## 1     36 301.38                                 
## 2     43 598.55 -7   -297.17 5.071 0.0004478 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###2. Do a test to see whether IBU has a linear relationship with the beer’s rating

summary(moda)

## 
## Call:
## lm(formula = Rating ~ IBU, data = beer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3822 -2.4669  0.3309  2.0854  9.8856 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 83.76336    1.36811  61.225   <2e-16 ***
## IBU          0.06694    0.02474   2.706   0.0098 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.484 on 42 degrees of freedom
## Multiple R-squared:  0.1485, Adjusted R-squared:  0.1282 
## F-statistic: 7.322 on 1 and 42 DF,  p-value: 0.0098

anova(moda)

## Analysis of Variance Table
## 
## Response: Rating
##           Df Sum Sq Mean Sq F value Pr(>F)   
## IBU        1  88.86  88.859  7.3223 0.0098 **
## Residuals 42 509.69  12.135                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(moda,modb)

## Analysis of Variance Table
## 
## Model 1: Rating ~ IBU
## Model 2: Rating ~ IBU + Brewery
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     42 509.69                                  
## 2     35 246.55  7    263.14 5.3364 0.0003213 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###3. Do a test to see whether the beer’s rating still has a linearship with the bitterness, even after accounting for the brewery.

anova(modb, mod1)

## Analysis of Variance Table
## 
## Model 1: Rating ~ IBU + Brewery
## Model 2: Rating ~ Brewery
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     35 246.55                                
## 2     36 301.38 -1   -54.829 7.7835 0.008475 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

### Thursday problem
lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
#1a
with(lifedata, plot (MaleLife~GNP))
#1b
mod2 <- lm(MaleLife~GNP, data = lifedata)
abline(mod2)

#1c
summary(mod2)

## 
## Call:
## lm(formula = MaleLife ~ GNP, data = lifedata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.999  -5.388   1.222   5.840  12.553 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.694e+01  9.647e-01   59.03  < 2e-16 ***
## GNP         7.728e-04  9.758e-05    7.92 6.35e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.492 on 89 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.4134, Adjusted R-squared:  0.4068 
## F-statistic: 62.72 on 1 and 89 DF,  p-value: 6.345e-12

# R^2 of this model is 0.434 this means that the model accounts 
lifedata$MaleLife <- (lifedata$GNP)^2

#2 
lifedata$logGNP <- log(lifedata$GNP)
mod3 <- lm(MaleLife ~ logGNP, data = lifedata)
coef(mod3)

## (Intercept)      logGNP 
##  -559800267    87537661

est <- coef(mod3)[1] + coef(mod3)[2] * (log(5000))
est

## (Intercept) 
##   185774906

summary(mod3)

## 
## Call:
## lm(formula = MaleLife ~ logGNP, data = lifedata)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -166364324 -107222957  -25786349   74046096  806613645 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -559800267   70603762  -7.929 6.08e-12 ***
## logGNP        87537661    9184094   9.531 2.98e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 143300000 on 89 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.5051, Adjusted R-squared:  0.4996 
## F-statistic: 90.85 on 1 and 89 DF,  p-value: 2.98e-15

### plot 2 model
plot(mod2)

plot(mod3)

## Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

02/09/2021

Phuong Nguyen

2/9/2021