This built in dataset follows 5 different orange trees and records the circumference in millimeters for ages or days since 12/31/1968.
Here we’ll see if we can use this data to create a linear model to predict the circumference based on the age.
data(Orange)
head(Orange)
## Tree age circumference
## 1 1 118 30
## 2 1 484 58
## 3 1 664 87
## 4 1 1004 115
## 5 1 1231 120
## 6 1 1372 142
# Let's plot the values first
ggplot(Orange, aes(x = age, y = circumference)) +
geom_point() +
theme_bw()
# Create a linear model
olm <- lm(circumference ~ age, data = Orange)
# view results
summary(olm)
##
## Call:
## lm(formula = circumference ~ age, data = Orange)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.310 -14.946 -0.076 19.697 45.111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.399650 8.622660 2.018 0.0518 .
## age 0.106770 0.008277 12.900 1.93e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.74 on 33 degrees of freedom
## Multiple R-squared: 0.8345, Adjusted R-squared: 0.8295
## F-statistic: 166.4 on 1 and 33 DF, p-value: 1.931e-14
# plot residuals
Orange$pred <- predict(olm, newdata = Orange)
Orange$resid <- Orange$pred - Orange$circumference
ggplot(Orange, aes(x = age, y = resid)) +
geom_point() +
theme_bw() +
geom_hline(yintercept = 0)