dat <- read.csv("http://www.cknudson.com/data/MacNaturalGas.csv")

mod1 <- lm(therms ~ month, data = dat)
with(dat, plot(therms ~ month))
abline(mod1)
dat$monthsquared <- (dat$month)^2
#polynomial regression
mod2 <- lm(therms ~ month + monthsquared, data = dat)

summary(mod2)
## 
## Call:
## lm(formula = therms ~ month + monthsquared, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -224.58  -32.32  -14.41   36.10  184.27 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   549.8271    23.7119   23.19   <2e-16 ***
## month        -147.6907     8.3893  -17.61   <2e-16 ***
## monthsquared   10.4416     0.6314   16.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68.02 on 96 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.7672, Adjusted R-squared:  0.7623 
## F-statistic: 158.1 on 2 and 96 DF,  p-value: < 2.2e-16
anova(mod2, mod1)
## Analysis of Variance Table
## 
## Model 1: therms ~ month + monthsquared
## Model 2: therms ~ month
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     96  444221                                  
## 2     97 1709752 -1  -1265531 273.49 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(mod1)
## [1] 1252.867
AIC(mod2) #smaller is better
## [1] 1121.437
BIC(mod1)
## [1] 1260.652
BIC(mod2)
## [1] 1131.817
dat$monthsqrt <- (dat$month)^.5
mod3 <- lm (therms ~ month + monthsqrt , data = dat)

#mod 1 nested in mod2
# mod1 <- lm(therms ~ month,)
# mod2 <- lm(therms ~ month + monthsquared)
#mod1 is nested ni mod3
# mod2 and mod3 are nested

logLik(mod1)
## 'log Lik.' -623.4335 (df=3)
logLik(mod2)
## 'log Lik.' -556.7183 (df=4)
pval <- pchisq(134, lower.tail = FALSE, df = 1)

#plots our mod2
xvar <- 1:12
coef(mod2)
##  (Intercept)        month monthsquared 
##     549.8271    -147.6907      10.4416
ypreds <- 549.8271 + -147.6907 * xvar + 10.4416 * xvar * xvar
lines (xvar , ypreds)

lines(xvar, ypreds)

plot(mod1)

plot(mod2)

lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
lifedata <- read.csv("http://www.cknudson.com/data/LifeExp.csv")
attach(lifedata)
mod5 <- lm(MaleLife ~ GNP , data = lifedata)
with(lifedata, plot(MaleLife ~ GNP))

#life <- transform(lifedata, lnGNP = log(GNP)) # this defines a new variable
#head(life)
newGNP <- log(GNP)

lifemod2 <- lm(MaleLife ~ newGNP)
lifemod2
## 
## Call:
## lm(formula = MaleLife ~ newGNP)
## 
## Coefficients:
## (Intercept)       newGNP  
##       25.47         4.78
est <- 25.47 + (log(5000) * 4.78 )

summary(mod5)
## 
## Call:
## lm(formula = MaleLife ~ GNP, data = lifedata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.999  -5.388   1.222   5.840  12.553 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.694e+01  9.647e-01   59.03  < 2e-16 ***
## GNP         7.728e-04  9.758e-05    7.92 6.35e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.492 on 89 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.4134, Adjusted R-squared:  0.4068 
## F-statistic: 62.72 on 1 and 89 DF,  p-value: 6.345e-12
summary(lifemod2)
## 
## Call:
## lm(formula = MaleLife ~ newGNP)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.5872  -3.5064   0.0095   3.7562  14.1309 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  25.4726     2.8387   8.973 4.27e-14 ***
## newGNP        4.7804     0.3693  12.946  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.761 on 89 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.6532, Adjusted R-squared:  0.6493 
## F-statistic: 167.6 on 1 and 89 DF,  p-value: < 2.2e-16
plot(mod5)

plot(lifemod2)