library(s20x)
emus20x.df = read.table("emu.txt", header = TRUE)
head(emus20x.df)
##     height   weight
## 1 1.102333 27.47661
## 2 1.560069 46.71037
## 3 1.548347 41.11160
## 4 1.561041 43.75183
## 5 1.774824 59.21378
## 6 1.576280 61.61217
tail(emus20x.df)
##      height   weight
## 13 1.254460 36.49033
## 14 1.831090 56.02050
## 15 1.263084 46.27767
## 16 1.753566 50.93048
## 17 1.257601 27.01878
## 18 1.240139 46.67213
plot(weight ~ height, data = emus20x.df)

summary(emus20x.df)
##      height          weight      
##  Min.   :1.009   Min.   : 27.02  
##  1st Qu.:1.255   1st Qu.: 37.34  
##  Median :1.519   Median : 45.01  
##  Mean   :1.451   Mean   : 47.40  
##  3rd Qu.:1.594   3rd Qu.: 50.83  
##  Max.   :1.831   Max.   :108.86
boxplot(emus20x.df$height)

boxplot(emus20x.df$weight)

summaryStats(emus20x.df$height)
## Minimum value:           1.01 
## Maximum value:           1.83 
## Mean value:              1.45 
## Median:                  1.52 
## Upper quartile:          1.59 
## Lower quartile:          1.26 
## Variance:                0.06 
## Standard deviation:      0.24 
## Midspread (IQR):         0.34 
## Skewness:                -0.18 
## Number of data values:   18
summaryStats(emus20x.df$weight)
## Minimum value:           27.02 
## Maximum value:           108.86 
## Mean value:              47.4 
## Median:                  45.01 
## Upper quartile:          50.83 
## Lower quartile:          37.34 
## Variance:                333.1 
## Standard deviation:      18.25 
## Midspread (IQR):         13.49 
## Skewness:                1.95 
## Number of data values:   18

** Exploratory Analysis ** Height: - Centre: 1.52m - Spread: 0.82m - Skew: Approximately symmetrical The height boxplot suggests not extreme outliers.

Weight: - Centre: 45.01kg - Spread: 81.84kg - Skew: Approximately symmetrical The weight boxplot and confirms the presence of an outlier, also seen from in the scatterplot.

emu.lm <- lm(weight ~ height, data = emus20x.df)
eovcheck(emu.lm)

summary(emu.lm)
## 
## Call:
## lm(formula = weight ~ height, data = emus20x.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.022  -8.309  -3.633   3.284  53.970 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -15.33      23.05  -0.665   0.5155  
## height         43.23      15.69   2.756   0.0141 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.49 on 16 degrees of freedom
## Multiple R-squared:  0.3219, Adjusted R-squared:  0.2796 
## F-statistic: 7.597 on 1 and 16 DF,  p-value: 0.01405
qqnorm(emu.lm$residuals)
qqline(emu.lm$residuals)

normcheck(emus20x.df$height)

normcheck(emus20x.df$weight)

emu.lm2 <- lm(weight ~ height, data = emus20x.df[-11,])
summary(emu.lm2)
## 
## Call:
## lm(formula = weight ~ height, data = emus20x.df[-11, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.769  -4.606  -1.195   4.491  13.390 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -3.386      9.907  -0.342 0.737215    
## height        32.741      6.786   4.825 0.000223 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.591 on 15 degrees of freedom
## Multiple R-squared:  0.6082, Adjusted R-squared:  0.582 
## F-statistic: 23.28 on 1 and 15 DF,  p-value: 0.0002227
##emu.lm <- lm(weight ~ height, data = emus20x.df)
normcheck(emu.lm2)

hist(emu.lm$residuals)

predict(emu.lm, newdata = data.frame(height = 1.5), interval = "prediction")
##        fit      lwr      upr
## 1 49.52039 15.74109 83.29969