Sys.setenv(RSTUDIO_PANDOC="/Users/cameronhermann/Downloads/RStudio.app/Contents/MacOS/pandoc")
beer <- read.csv("http://www.cknudson.com/data/MNbeer.csv")
attach(beer)
model1 <- lm(beer$Rating ~ beer$IBU)

model2 <- lm(beer$Rating ~ beer$IBU + beer$Brewery)
summary(model1)
## 
## Call:
## lm(formula = beer$Rating ~ beer$IBU)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3822 -2.4669  0.3309  2.0854  9.8856 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 83.76336    1.36811  61.225   <2e-16 ***
## beer$IBU     0.06694    0.02474   2.706   0.0098 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.484 on 42 degrees of freedom
## Multiple R-squared:  0.1485, Adjusted R-squared:  0.1282 
## F-statistic: 7.322 on 1 and 42 DF,  p-value: 0.0098
summary(model2)
## 
## Call:
## lm(formula = beer$Rating ~ beer$IBU + beer$Brewery)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7732 -1.7126  0.0889  1.2031  5.9521 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               83.98984    1.65525  50.741  < 2e-16 ***
## beer$IBU                   0.05493    0.01969   2.790  0.00848 ** 
## beer$BreweryBent Paddle    1.19330    1.78467   0.669  0.50811    
## beer$BreweryFulton        -2.00670    1.78467  -1.124  0.26849    
## beer$BreweryIndeed         0.46958    1.67917   0.280  0.78139    
## beer$BrewerySteel Toe      1.16761    1.87697   0.622  0.53793    
## beer$BrewerySummit        -1.47248    1.66443  -0.885  0.38237    
## beer$BrewerySurly          5.20257    1.66495   3.125  0.00357 ** 
## beer$BreweryUrban Growler -2.59571    1.78495  -1.454  0.15479    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.654 on 35 degrees of freedom
## Multiple R-squared:  0.5881, Adjusted R-squared:  0.4939 
## F-statistic: 6.246 on 8 and 35 DF,  p-value: 5.108e-05
dat <- read.csv("http://www.cknudson.com/data/MacNaturalGas.csv")
modelt <- lm(therms~month, data = dat)
monthsq <- dat$month^2
with(dat, plot(therms~monthsq))
abline(modelt)

modeltsq <- lm(therms~month + monthsq, data = dat)
summary(modeltsq)
## 
## Call:
## lm(formula = therms ~ month + monthsq, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -224.58  -32.32  -14.41   36.10  184.27 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  549.8271    23.7119   23.19   <2e-16 ***
## month       -147.6907     8.3893  -17.61   <2e-16 ***
## monthsq       10.4416     0.6314   16.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.02 on 96 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.7672, Adjusted R-squared:  0.7623 
## F-statistic: 158.1 on 2 and 96 DF,  p-value: < 2.2e-16
AIC(modelt) #Change of 10+
## [1] 1252.867
AIC(modeltsq)
## [1] 1121.437
BIC(modelt) #Change of 10+
## [1] 1260.652
BIC(modeltsq)
## [1] 1131.817
monthsqrt <- dat$month^0.5
modeltsqrt <- lm(therms~month + monthsqrt, data = dat)
summary(modeltsqrt)
## 
## Call:
## lm(formula = therms ~ month + monthsqrt, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -242.84  -61.90  -28.56   54.94  228.58 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1089.40      84.11  12.952  < 2e-16 ***
## month         164.76      17.34   9.502 1.75e-15 ***
## monthsqrt    -823.32      79.50 -10.356  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 91.72 on 96 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.5767, Adjusted R-squared:  0.5679 
## F-statistic:  65.4 on 2 and 96 DF,  p-value: < 2.2e-16
coef(modeltsq)
## (Intercept)       month     monthsq 
##    549.8271   -147.6907     10.4416
xvar <- 1:12

ypreds <- 549 - 148 * xvar + 10 * xvar * xvar
with(dat, (plot(month, therms)))
## NULL
lines(xvar, ypreds)

plot(modeltsq)

lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
modelLife <- lm(MaleLife~GNP, data = lifedata)
with(lifedata, plot(MaleLife~GNP))
abline(modelLife)

logGNP <- log(lifedata$GNP)
modelLifeLn <- lm(MaleLife~logGNP, data = lifedata)
with(lifedata, plot(MaleLife~logGNP))

coef(modelLifeLn)
## (Intercept)      logGNP 
##   25.472622    4.780417
xvar <- 1:35000

ypreds <- 25.472622 + 4.780417 * log(xvar)
with(lifedata, (plot(GNP, MaleLife)))
## NULL
lines(xvar*10, ypreds)

plot(modelLifeLn)