library(s20x)
emus20x.df = read.table("emu.txt", header = TRUE)
head(emus20x.df)
## height weight
## 1 1.102333 27.47661
## 2 1.560069 46.71037
## 3 1.548347 41.11160
## 4 1.561041 43.75183
## 5 1.774824 59.21378
## 6 1.576280 61.61217
tail(emus20x.df)
## height weight
## 13 1.254460 36.49033
## 14 1.831090 56.02050
## 15 1.263084 46.27767
## 16 1.753566 50.93048
## 17 1.257601 27.01878
## 18 1.240139 46.67213
plot(weight ~ height, data = emus20x.df)
summary(emus20x.df)
## height weight
## Min. :1.009 Min. : 27.02
## 1st Qu.:1.255 1st Qu.: 37.34
## Median :1.519 Median : 45.01
## Mean :1.451 Mean : 47.40
## 3rd Qu.:1.594 3rd Qu.: 50.83
## Max. :1.831 Max. :108.86
boxplot(emus20x.df$height)
boxplot(emus20x.df$weight)
summaryStats(emus20x.df$height)
## Minimum value: 1.01
## Maximum value: 1.83
## Mean value: 1.45
## Median: 1.52
## Upper quartile: 1.59
## Lower quartile: 1.26
## Variance: 0.06
## Standard deviation: 0.24
## Midspread (IQR): 0.34
## Skewness: -0.18
## Number of data values: 18
summaryStats(emus20x.df$weight)
## Minimum value: 27.02
## Maximum value: 108.86
## Mean value: 47.4
## Median: 45.01
## Upper quartile: 50.83
## Lower quartile: 37.34
## Variance: 333.1
## Standard deviation: 18.25
## Midspread (IQR): 13.49
## Skewness: 1.95
## Number of data values: 18
** Exploratory Analysis ** Height: - Centre: 1.52m - Spread: 0.82m - Skew: Approximately symmetrical The height boxplot suggests not extreme outliers.
Weight: - Centre: 45.01kg - Spread: 81.84kg - Skew: Approximately symmetrical The weight boxplot and confirms the presence of an outlier, also seen from in the scatterplot.
emu.lm <- lm(weight ~ height, data = emus20x.df)
eovcheck(emu.lm)
summary(emu.lm)
##
## Call:
## lm(formula = weight ~ height, data = emus20x.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.022 -8.309 -3.633 3.284 53.970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.33 23.05 -0.665 0.5155
## height 43.23 15.69 2.756 0.0141 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.49 on 16 degrees of freedom
## Multiple R-squared: 0.3219, Adjusted R-squared: 0.2796
## F-statistic: 7.597 on 1 and 16 DF, p-value: 0.01405
qqnorm(emu.lm$residuals)
qqline(emu.lm$residuals)
normcheck(emus20x.df$height)
normcheck(emus20x.df$weight)
emu.lm2 <- lm(weight ~ height, data = emus20x.df[-11,])
summary(emu.lm2)
##
## Call:
## lm(formula = weight ~ height, data = emus20x.df[-11, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.769 -4.606 -1.195 4.491 13.390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.386 9.907 -0.342 0.737215
## height 32.741 6.786 4.825 0.000223 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.591 on 15 degrees of freedom
## Multiple R-squared: 0.6082, Adjusted R-squared: 0.582
## F-statistic: 23.28 on 1 and 15 DF, p-value: 0.0002227
##emu.lm <- lm(weight ~ height, data = emus20x.df)
normcheck(emu.lm2)
hist(emu.lm$residuals)
predict(emu.lm, newdata = data.frame(height = 1.5), interval = "prediction")
## fit lwr upr
## 1 49.52039 15.74109 83.29969