library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Stat2Data)
data(LeafWidth)
library(Stat2Data)
data("LeafWidth")
fm = lm(Width ~ Year, data=LeafWidth)
summary(fm)
##
## Call:
## lm(formula = Width ~ Year, data = LeafWidth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1214 -1.1253 -0.3136 0.9320 5.4144
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.723091 8.574977 4.399 1.61e-05 ***
## Year -0.017560 0.004358 -4.029 7.43e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.424 on 250 degrees of freedom
## Multiple R-squared: 0.06098, Adjusted R-squared: 0.05723
## F-statistic: 16.24 on 1 and 250 DF, p-value: 7.425e-05
ggplot(fm, aes(x=Year, y=Width)) + geom_point() + stat_smooth(method=lm, se=FALSE)
\[ \hat{Width}=37.723 - 0.01756*Year \]
\[ \hat{Width}=37.723 - 0.017560*(1966)\\ \hat{Width}\approx3.2 \]
library(Stat2Data)
data("Sparrows")
gm = lm(Weight ~ WingLength, data=Sparrows)
summary(gm)
##
## Call:
## lm(formula = Weight ~ WingLength, data = Sparrows)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5440 -0.9935 0.0809 1.0559 3.4168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.36549 0.95731 1.426 0.156
## WingLength 0.46740 0.03472 13.463 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.4 on 114 degrees of freedom
## Multiple R-squared: 0.6139, Adjusted R-squared: 0.6105
## F-statistic: 181.3 on 1 and 114 DF, p-value: < 2.2e-16
ggplot(gm, aes(x=WingLength, y=Weight)) + geom_point() + stat_smooth(method=lm, se=FALSE)
ggplot(Sparrows, aes(x=gm$residuals)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot(gm, which=2)
library(Stat2Data)
data("Caterpillars")
hm = lm(WetFrass ~ Mass, data=Caterpillars)
summary(hm)
##
## Call:
## lm(formula = WetFrass ~ Mass, data = Caterpillars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65454 -0.04796 -0.03336 -0.01014 1.50828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.033198 0.027436 1.21 0.227
## Mass 0.247696 0.007463 33.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3885 on 265 degrees of freedom
## Multiple R-squared: 0.8061, Adjusted R-squared: 0.8054
## F-statistic: 1102 on 1 and 265 DF, p-value: < 2.2e-16
ggplot(hm, aes(x=Mass, y=WetFrass)) + geom_point()
ggplot(data=Caterpillars) + geom_point(aes(y=log(WetFrass), x=log(Mass)))
jm = lm(Caterpillars$LogWetFrass ~ Caterpillars$LogMass, data=Caterpillars)
summary(jm)
##
## Call:
## lm(formula = Caterpillars$LogWetFrass ~ Caterpillars$LogMass,
## data = Caterpillars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5392 -0.2063 0.1525 0.2906 0.9517
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.73861 0.02977 -24.81 <2e-16 ***
## Caterpillars$LogMass 1.05361 0.02054 51.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4089 on 265 degrees of freedom
## Multiple R-squared: 0.9085, Adjusted R-squared: 0.9082
## F-statistic: 2632 on 1 and 265 DF, p-value: < 2.2e-16
ggplot(jm, aes(x=Caterpillars$LogMass, y=Caterpillars$LogWetFrass)) + geom_point() + stat_smooth(method=lm, se=FALSE)
\[ \hat{LogWetFrass} = -0.73861 + 1.05361*LogMass \]
ggplot(Caterpillars) + geom_point(aes(x=Caterpillars$LogMass, y=Caterpillars$LogWetFrass, col=Instar))
ggplot(Caterpillars) + geom_point(aes(x=Caterpillars$LogMass, y=Caterpillars$LogWetFrass, col=Fgp))
library(Stat2Data)
data("USstamps")
km = lm(Price ~ Year, data=USstamps)
ggplot(km, aes(x=Year, y=Price)) + geom_point()
miniUSstamps = USstamps %>%
slice(5:45)
km1 = lm(Price ~ Year, data=miniUSstamps)
ggplot(km1, aes(x=Year, y=Price)) + geom_point() + stat_smooth(method=lm, se=FALSE)
qm = lm(Price ~ Year, data=miniUSstamps)
summary(qm)
##
## Call:
## lm(formula = Price ~ Year, data = miniUSstamps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9232 -0.9478 0.1195 1.1899 4.5325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.647e+03 4.686e+01 -35.15 <2e-16 ***
## Year 8.410e-01 2.357e-02 35.68 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.737 on 19 degrees of freedom
## Multiple R-squared: 0.9853, Adjusted R-squared: 0.9845
## F-statistic: 1273 on 1 and 19 DF, p-value: < 2.2e-16
\[ \hat{Price} = -1647 + 0.841*Year \]
plot(qm, which=1)
plot(qm, which=2)
library(Stat2Data)
data("Pines")
wm = lm(Hgt96 ~ Hgt90, data=Pines)
ggplot(wm, aes(x=Hgt90, y=Hgt96)) + geom_point()
summary(wm)
##
## Call:
## lm(formula = Hgt96 ~ Hgt90, data = Pines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -275.293 -42.798 7.208 46.332 181.457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 241.2846 8.6209 27.99 < 2e-16 ***
## Hgt90 2.2504 0.4311 5.22 2.28e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 69.02 on 805 degrees of freedom
## (193 observations deleted due to missingness)
## Multiple R-squared: 0.03274, Adjusted R-squared: 0.03154
## F-statistic: 27.25 on 1 and 805 DF, p-value: 2.276e-07
ggplot(wm, aes(x=Hgt90, y=Hgt96)) + geom_point() + stat_smooth(method=lm, se=FALSE)
\[ \hat{Hgt96} = 241.2846 + 2.2504*Hgt90 \]