Sys.setenv(RSTUDIO_PANDOC="/Users/cameronhermann/Downloads/RStudio.app/Contents/MacOS/pandoc")
beer <- read.csv("http://www.cknudson.com/data/MNbeer.csv")
attach(beer)
model1 <- lm(beer$Rating ~ beer$IBU)
model2 <- lm(beer$Rating ~ beer$IBU + beer$Brewery)
summary(model1)
##
## Call:
## lm(formula = beer$Rating ~ beer$IBU)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3822 -2.4669 0.3309 2.0854 9.8856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83.76336 1.36811 61.225 <2e-16 ***
## beer$IBU 0.06694 0.02474 2.706 0.0098 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.484 on 42 degrees of freedom
## Multiple R-squared: 0.1485, Adjusted R-squared: 0.1282
## F-statistic: 7.322 on 1 and 42 DF, p-value: 0.0098
summary(model2)
##
## Call:
## lm(formula = beer$Rating ~ beer$IBU + beer$Brewery)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7732 -1.7126 0.0889 1.2031 5.9521
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83.98984 1.65525 50.741 < 2e-16 ***
## beer$IBU 0.05493 0.01969 2.790 0.00848 **
## beer$BreweryBent Paddle 1.19330 1.78467 0.669 0.50811
## beer$BreweryFulton -2.00670 1.78467 -1.124 0.26849
## beer$BreweryIndeed 0.46958 1.67917 0.280 0.78139
## beer$BrewerySteel Toe 1.16761 1.87697 0.622 0.53793
## beer$BrewerySummit -1.47248 1.66443 -0.885 0.38237
## beer$BrewerySurly 5.20257 1.66495 3.125 0.00357 **
## beer$BreweryUrban Growler -2.59571 1.78495 -1.454 0.15479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.654 on 35 degrees of freedom
## Multiple R-squared: 0.5881, Adjusted R-squared: 0.4939
## F-statistic: 6.246 on 8 and 35 DF, p-value: 5.108e-05
dat <- read.csv("http://www.cknudson.com/data/MacNaturalGas.csv")
modelt <- lm(therms~month, data = dat)
monthsq <- dat$month^2
with(dat, plot(therms~monthsq))
abline(modelt)

modeltsq <- lm(therms~month + monthsq, data = dat)
summary(modeltsq)
##
## Call:
## lm(formula = therms ~ month + monthsq, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -224.58 -32.32 -14.41 36.10 184.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 549.8271 23.7119 23.19 <2e-16 ***
## month -147.6907 8.3893 -17.61 <2e-16 ***
## monthsq 10.4416 0.6314 16.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68.02 on 96 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.7672, Adjusted R-squared: 0.7623
## F-statistic: 158.1 on 2 and 96 DF, p-value: < 2.2e-16
AIC(modelt) #Change of 10+
## [1] 1252.867
AIC(modeltsq)
## [1] 1121.437
BIC(modelt) #Change of 10+
## [1] 1260.652
BIC(modeltsq)
## [1] 1131.817
monthsqrt <- dat$month^0.5
modeltsqrt <- lm(therms~month + monthsqrt, data = dat)
summary(modeltsqrt)
##
## Call:
## lm(formula = therms ~ month + monthsqrt, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -242.84 -61.90 -28.56 54.94 228.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1089.40 84.11 12.952 < 2e-16 ***
## month 164.76 17.34 9.502 1.75e-15 ***
## monthsqrt -823.32 79.50 -10.356 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 91.72 on 96 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.5767, Adjusted R-squared: 0.5679
## F-statistic: 65.4 on 2 and 96 DF, p-value: < 2.2e-16
coef(modeltsq)
## (Intercept) month monthsq
## 549.8271 -147.6907 10.4416
xvar <- 1:12
ypreds <- 549 - 148 * xvar + 10 * xvar * xvar
with(dat, (plot(month, therms)))
## NULL
lines(xvar, ypreds)

plot(modeltsq)




lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
modelLife <- lm(MaleLife~GNP, data = lifedata)
with(lifedata, plot(MaleLife~GNP))
abline(modelLife)

logGNP <- log(lifedata$GNP)
modelLifeLn <- lm(MaleLife~logGNP, data = lifedata)
with(lifedata, plot(MaleLife~logGNP))

coef(modelLifeLn)
## (Intercept) logGNP
## 25.472622 4.780417
xvar <- 1:35000
ypreds <- 25.472622 + 4.780417 * log(xvar)
with(lifedata, (plot(GNP, MaleLife)))
## NULL
lines(xvar*10, ypreds)

plot(modelLifeLn)



