A. Use the default data set InsectSprays: The counts of insects in agricultural experimental units treated with di???erent insecticides. Column 1: Insect count Column 2: The type of spray H0 : Mean of insect counts are same corresponding to every spray.
attach(InsectSprays)
names(InsectSprays)
## [1] "count" "spray"
View(InsectSprays)
str(InsectSprays)
## 'data.frame': 72 obs. of 2 variables:
## $ count: num 10 7 20 14 14 12 10 23 17 20 ...
## $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(InsectSprays)
## count spray
## Min. : 0.00 A:12
## 1st Qu.: 3.00 B:12
## Median : 7.00 C:12
## Mean : 9.50 D:12
## 3rd Qu.:14.25 E:12
## Max. :26.00 F:12
is.factor(InsectSprays$spray)
## [1] TRUE
Find the mean of insect count for the spray category A. Results in #3
Show all the means corresponding to every category in a row (as output). You may use the function tapply().
tapply(InsectSprays$count, InsectSprays$spray, mean)
## A B C D E F
## 14.500000 15.333333 2.083333 4.916667 3.500000 16.666667
tapply(InsectSprays$count, InsectSprays$spray, var)
## A B C D E F
## 22.272727 18.242424 3.901515 6.265152 3.000000 38.606061
par(mfrow = c(2,3))
qqnorm(InsectSprays$count[InsectSprays$spray=="A"])
qqline(InsectSprays$count[InsectSprays$spray=="A"], col="red")
qqnorm(InsectSprays$count[InsectSprays$spray=="B"])
qqline(InsectSprays$count[InsectSprays$spray=="B"], col="orange")
qqnorm(InsectSprays$count[InsectSprays$spray=="C"])
qqline(InsectSprays$count[InsectSprays$spray=="C"], col="brown")
qqnorm(InsectSprays$count[InsectSprays$spray=="D"])
qqline(InsectSprays$count[InsectSprays$spray=="D"], col="purple")
qqnorm(InsectSprays$count[InsectSprays$spray=="E"])
qqline(InsectSprays$count[InsectSprays$spray=="E"], col="yellow")
qqnorm(InsectSprays$count[InsectSprays$spray=="F"])
qqline(InsectSprays$count[InsectSprays$spray=="F"], col="green")
bartlett.test(count~spray, InsectSprays)
##
## Bartlett test of homogeneity of variances
##
## data: count by spray
## Bartlett's K-squared = 25.96, df = 5, p-value = 9.085e-05
The variances are not homogenic.
Anova<- aov(count~spray, data=InsectSprays)
aov(count~spray, data=InsectSprays)
## Call:
## aov(formula = count ~ spray, data = InsectSprays)
##
## Terms:
## spray Residuals
## Sum of Squares 2668.833 1015.167
## Deg. of Freedom 5 66
##
## Residual standard error: 3.921902
## Estimated effects may be unbalanced
summary(Anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2669 533.8 34.7 <2e-16 ***
## Residuals 66 1015 15.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A. df=5 B. Sum Squares= 2669, 1015 C. Variances= 534, 15 D. F value= 34.7 E. Reject the null hypothesis.
TukeyHSD(Anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = count ~ spray, data = InsectSprays)
##
## $spray
## diff lwr upr p adj
## B-A 0.8333333 -3.866075 5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A 2.1666667 -2.532742 6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B 1.3333333 -3.366075 6.032742 0.9603075
## D-C 2.8333333 -1.866075 7.532742 0.4920707
## E-C 1.4166667 -3.282742 6.116075 0.9488669
## F-C 14.5833333 9.883925 19.282742 0.0000000
## E-D -1.4166667 -6.116075 3.282742 0.9488669
## F-D 11.7500000 7.050591 16.449409 0.0000000
## F-E 13.1666667 8.467258 17.866075 0.0000000
boxplot(count~as.factor(spray), data=InsectSprays)
boxplot(count~spray, ylab="count",col=c("blue", "green", "red", "orange", "purple", "brown"), data=InsectSprays)
library(ggplot2)
ggplot(InsectSprays, aes(x =spray, y=count)) +
geom_boxplot(fill= "orange", colour= "blue") +
scale_x_discrete() + xlab("Spray") +
ylab("Count")
B. Consider the default data set mtcars for the next two problems.
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
cor(mtcars, method="pearson")
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(mtcars, method="kendall")