dat <- read.csv("http://www.cknudson.com/data/MacNaturalGas.csv")
mod1 <- lm(therms ~ month, data = dat)
with(dat, plot(therms ~ month))
abline(mod1)
dat$monthsquared <- (dat$month)^2
#polynomial regression
mod2 <- lm(therms ~ month + monthsquared, data = dat)
summary(mod2)
##
## Call:
## lm(formula = therms ~ month + monthsquared, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -224.58 -32.32 -14.41 36.10 184.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 549.8271 23.7119 23.19 <2e-16 ***
## month -147.6907 8.3893 -17.61 <2e-16 ***
## monthsquared 10.4416 0.6314 16.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68.02 on 96 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.7672, Adjusted R-squared: 0.7623
## F-statistic: 158.1 on 2 and 96 DF, p-value: < 2.2e-16
anova(mod2, mod1)
## Analysis of Variance Table
##
## Model 1: therms ~ month + monthsquared
## Model 2: therms ~ month
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 96 444221
## 2 97 1709752 -1 -1265531 273.49 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(mod1)
## [1] 1252.867
AIC(mod2) #smaller is better
## [1] 1121.437
BIC(mod1)
## [1] 1260.652
BIC(mod2)
## [1] 1131.817
dat$monthsqrt <- (dat$month)^.5
mod3 <- lm (therms ~ month + monthsqrt , data = dat)
#mod 1 nested in mod2
# mod1 <- lm(therms ~ month,)
# mod2 <- lm(therms ~ month + monthsquared)
#mod1 is nested ni mod3
# mod2 and mod3 are nested
logLik(mod1)
## 'log Lik.' -623.4335 (df=3)
logLik(mod2)
## 'log Lik.' -556.7183 (df=4)
pval <- pchisq(134, lower.tail = FALSE, df = 1)
#plots our mod2
xvar <- 1:12
coef(mod2)
## (Intercept) month monthsquared
## 549.8271 -147.6907 10.4416
ypreds <- 549.8271 + -147.6907 * xvar + 10.4416 * xvar * xvar
lines (xvar , ypreds)
lines(xvar, ypreds)

plot(mod1)




plot(mod2)




lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
attach(lifedata)
mod5 <- lm(MaleLife ~ GNP , data = lifedata)
with(lifedata, plot(MaleLife ~ GNP))

#life <- transform(lifedata, lnGNP = log(GNP)) # this defines a new variable
#head(life)
newGNP <- log(GNP)
lifemod2 <- lm(MaleLife ~ newGNP)
lifemod2
##
## Call:
## lm(formula = MaleLife ~ newGNP)
##
## Coefficients:
## (Intercept) newGNP
## 25.47 4.78
est <- 25.47 + (log(5000) * 4.78 )
summary(mod5)
##
## Call:
## lm(formula = MaleLife ~ GNP, data = lifedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.999 -5.388 1.222 5.840 12.553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.694e+01 9.647e-01 59.03 < 2e-16 ***
## GNP 7.728e-04 9.758e-05 7.92 6.35e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.492 on 89 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.4134, Adjusted R-squared: 0.4068
## F-statistic: 62.72 on 1 and 89 DF, p-value: 6.345e-12
summary(lifemod2)
##
## Call:
## lm(formula = MaleLife ~ newGNP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.5872 -3.5064 0.0095 3.7562 14.1309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.4726 2.8387 8.973 4.27e-14 ***
## newGNP 4.7804 0.3693 12.946 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.761 on 89 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.6532, Adjusted R-squared: 0.6493
## F-statistic: 167.6 on 1 and 89 DF, p-value: < 2.2e-16
plot(mod5)




plot(lifemod2)



