A. Use the default data set InsectSprays: The counts of insects in agricultural experimental units treated with different insecticides.
Column 1: Insect count
Column 2: The type of spray
attach(InsectSprays)
names(InsectSprays)
## [1] "count" "spray"
\(H_0\): mean of insect counts are same corresponding to every spray.
tail(InsectSprays)
## count spray
## 67 13 F
## 68 10 F
## 69 26 F
## 70 26 F
## 71 24 F
## 72 13 F
names("spray")
## NULL
InsectSprays
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
## 7 10 A
## 8 23 A
## 9 17 A
## 10 20 A
## 11 14 A
## 12 13 A
## 13 11 B
## 14 17 B
## 15 21 B
## 16 11 B
## 17 16 B
## 18 14 B
## 19 17 B
## 20 17 B
## 21 19 B
## 22 21 B
## 23 7 B
## 24 13 B
## 25 0 C
## 26 1 C
## 27 7 C
## 28 2 C
## 29 3 C
## 30 1 C
## 31 2 C
## 32 1 C
## 33 3 C
## 34 0 C
## 35 1 C
## 36 4 C
## 37 3 D
## 38 5 D
## 39 12 D
## 40 6 D
## 41 4 D
## 42 3 D
## 43 5 D
## 44 5 D
## 45 5 D
## 46 5 D
## 47 2 D
## 48 4 D
## 49 3 E
## 50 5 E
## 51 3 E
## 52 5 E
## 53 3 E
## 54 6 E
## 55 1 E
## 56 1 E
## 57 3 E
## 58 2 E
## 59 6 E
## 60 4 E
## 61 11 F
## 62 9 F
## 63 15 F
## 64 22 F
## 65 15 F
## 66 16 F
## 67 13 F
## 68 10 F
## 69 26 F
## 70 26 F
## 71 24 F
## 72 13 F
There are six different types of spray categories in the data set (Categories A through F).
A <-c(1:12)
B <-c(13:24)
C <-c(25:36)
D <-c(37:48)
E <-c(49:60)
F <-c(61:72)
InsectSprays$spray <- as.factor(InsectSprays$spray)
InsectSprays$spray = factor(InsectSprays$spray, labels = c("A", "B", "C","D", "E", "F"))
A <- subset(InsectSprays, spray == "A")
B <- subset(InsectSprays, spray == "B")
C <- subset(InsectSprays, spray == "C")
D <- subset(InsectSprays, spray == "D")
E <- subset(InsectSprays, spray == "E")
F <- subset(InsectSprays, spray == "F")
NROW(A)
## [1] 12
NROW(B)
## [1] 12
NROW(C)
## [1] 12
NROW(D)
## [1] 12
NROW(E)
## [1] 12
NROW(F)
## [1] 12
Every spray category does have the same number of rows. Each category has 12 rows.
mean(count[spray=="A"])
## [1] 14.5
tapply(count, spray, mean)
## A B C D E F
## 14.500000 15.333333 2.083333 4.916667 3.500000 16.666667
tapply(count, spray, var)
## A B C D E F
## 22.272727 18.242424 3.901515 6.265152 3.000000 38.606061
qqnorm(count[spray=="A"])
qqline(count[spray=="A"])
qqnorm(count[spray=="B"])
qqline(count[spray=="B"])
qqnorm(count[spray=="C"])
qqline(count[spray=="C"])
qqnorm(count[spray=="D"])
qqline(count[spray=="D"])
qqnorm(count[spray=="E"])
qqline(count[spray=="E"])
qqnorm(count[spray=="F"])
qqline(count[spray=="F"])
bartlett.test(count ~ spray, data = InsectSprays)
##
## Bartlett test of homogeneity of variances
##
## data: count by spray
## Bartlett's K-squared = 25.96, df = 5, p-value = 9.085e-05
The results suggest that the variances are not homogeneous.
aov(count ~ spray, data = InsectSprays)
## Call:
## aov(formula = count ~ spray, data = InsectSprays)
##
## Terms:
## spray Residuals
## Sum of Squares 2668.833 1015.167
## Deg. of Freedom 5 66
##
## Residual standard error: 3.921902
## Estimated effects may be unbalanced
summary(aov(count ~ spray, data = InsectSprays))
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2669 533.8 34.7 <2e-16 ***
## Residuals 66 1015 15.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov(count ~ spray, data = InsectSprays))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = count ~ spray, data = InsectSprays)
##
## $spray
## diff lwr upr p adj
## B-A 0.8333333 -3.866075 5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A 2.1666667 -2.532742 6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B 1.3333333 -3.366075 6.032742 0.9603075
## D-C 2.8333333 -1.866075 7.532742 0.4920707
## E-C 1.4166667 -3.282742 6.116075 0.9488669
## F-C 14.5833333 9.883925 19.282742 0.0000000
## E-D -1.4166667 -6.116075 3.282742 0.9488669
## F-D 11.7500000 7.050591 16.449409 0.0000000
## F-E 13.1666667 8.467258 17.866075 0.0000000
boxplot(count ~ spray, InsectSprays)
boxplot(count ~ spray,
data = InsectSprays,
main = "Counts of insects treated with different insecticides",
xlab = "The type of spray",
ylab = "Insect Count",
A, B, C, D, E, F, col=c("pink", "orange", "yellow", "red", "purple", "blue"))
There are two outliers.
library(ggplot2)
ggplot(InsectSprays, aes(x=spray, y=count, fill=spray)) + geom_boxplot()
B. Consider the default data set mtcars for the next two problems.
attach(mtcars)
options(digits = 4)
cor(mtcars)
## mpg cyl disp hp drat wt qsec vs
## mpg 1.0000 -0.8522 -0.8476 -0.7762 0.68117 -0.8677 0.4187 0.6640
## cyl -0.8522 1.0000 0.9020 0.8324 -0.69994 0.7825 -0.5912 -0.8108
## disp -0.8476 0.9020 1.0000 0.7909 -0.71021 0.8880 -0.4337 -0.7104
## hp -0.7762 0.8324 0.7909 1.0000 -0.44876 0.6587 -0.7082 -0.7231
## drat 0.6812 -0.6999 -0.7102 -0.4488 1.00000 -0.7124 0.0912 0.4403
## wt -0.8677 0.7825 0.8880 0.6587 -0.71244 1.0000 -0.1747 -0.5549
## qsec 0.4187 -0.5912 -0.4337 -0.7082 0.09120 -0.1747 1.0000 0.7445
## vs 0.6640 -0.8108 -0.7104 -0.7231 0.44028 -0.5549 0.7445 1.0000
## am 0.5998 -0.5226 -0.5912 -0.2432 0.71271 -0.6925 -0.2299 0.1683
## gear 0.4803 -0.4927 -0.5556 -0.1257 0.69961 -0.5833 -0.2127 0.2060
## carb -0.5509 0.5270 0.3950 0.7498 -0.09079 0.4276 -0.6562 -0.5696
## am gear carb
## mpg 0.59983 0.4803 -0.55093
## cyl -0.52261 -0.4927 0.52699
## disp -0.59123 -0.5556 0.39498
## hp -0.24320 -0.1257 0.74981
## drat 0.71271 0.6996 -0.09079
## wt -0.69250 -0.5833 0.42761
## qsec -0.22986 -0.2127 -0.65625
## vs 0.16835 0.2060 -0.56961
## am 1.00000 0.7941 0.05753
## gear 0.79406 1.0000 0.27407
## carb 0.05753 0.2741 1.00000
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(mtcars, method = c("kendall"))