Oranges

This built in dataset follows 5 different orange trees and records the circumference in millimeters for ages or days since 12/31/1968.

Here we’ll see if we can use this data to create a linear model to predict the circumference based on the age.

data(Orange)
head(Orange)
##   Tree  age circumference
## 1    1  118            30
## 2    1  484            58
## 3    1  664            87
## 4    1 1004           115
## 5    1 1231           120
## 6    1 1372           142
# Let's plot the values first
ggplot(Orange, aes(x = age, y = circumference)) +
  geom_point() +
  theme_bw()

# Create a linear model
olm <- lm(circumference ~ age, data = Orange)

# view results
summary(olm)
## 
## Call:
## lm(formula = circumference ~ age, data = Orange)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.310 -14.946  -0.076  19.697  45.111 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.399650   8.622660   2.018   0.0518 .  
## age          0.106770   0.008277  12.900 1.93e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.74 on 33 degrees of freedom
## Multiple R-squared:  0.8345, Adjusted R-squared:  0.8295 
## F-statistic: 166.4 on 1 and 33 DF,  p-value: 1.931e-14
# plot residuals
Orange$pred <- predict(olm, newdata = Orange)
Orange$resid <- Orange$pred - Orange$circumference

ggplot(Orange, aes(x = age, y = resid)) +
  geom_point() +
  theme_bw() +
  geom_hline(yintercept = 0)