A. Use the default data set InsectSprays: The counts of insects in agricultural experimental units treated with different insecticides. Column 1: Insect count Culumn 2: The type of spray
data(InsectSprays)
\(H_0\) : Mean of insect counts are same corresponding to every spray.
InsectSprays[,2]
## [1] A A A A A A A A A A A A B B B B B B B B B B B B C C C C C C C C C C C
## [36] C D D D D D D D D D D D D E E E E E E E E E E E E F F F F F F F F F F
## [71] F F
## Levels: A B C D E F
mean(InsectSprays[1:12,1])
## [1] 14.5
tapply(InsectSprays$count, InsectSprays$spray,mean)
## A B C D E F
## 14.500000 15.333333 2.083333 4.916667 3.500000 16.666667
tapply(InsectSprays$count,InsectSprays$spray,var)
## A B C D E F
## 22.272727 18.242424 3.901515 6.265152 3.000000 38.606061
A <- InsectSprays[1:12,1]
B <- InsectSprays[13:24,1]
C <- InsectSprays[25:36,1]
D <- InsectSprays[37:48,1]
E <- InsectSprays[49:60,1]
f <- InsectSprays[61:72,1]
qqnorm(A)
qqnorm(B)
qqnorm(C)
qqnorm(D)
qqnorm(E)
qqnorm(f)
# I realized too late that I did this wrong and ran out of time to fix this.
bartlett.test(InsectSprays)
## Warning in FUN(X[[i]], ...): Calling var(x) on a factor x is deprecated and will become an error.
## Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
##
## Bartlett test of homogeneity of variances
##
## data: InsectSprays
## Bartlett's K-squared = 112.05, df = 1, p-value < 2.2e-16
# according to data, no homogenity
aov(count~spray, data=InsectSprays)
## Call:
## aov(formula = count ~ spray, data = InsectSprays)
##
## Terms:
## spray Residuals
## Sum of Squares 2668.833 1015.167
## Deg. of Freedom 5 66
##
## Residual standard error: 3.921902
## Estimated effects may be unbalanced
summary(aov(count~spray, data=InsectSprays))
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2669 533.8 34.7 <2e-16 ***
## Residuals 66 1015 15.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# a
#DF_(within) <- 66
#DF_(between) <- 5
# b
#SS_(within) <- 2668.83
#SS_(between) <- 1015.167
# c
#MS_(within) <- 15.4
#MS_(between) <- 533.8
# d
#f <- 34.7
# e
#p <- 2*10^-16
#since p is very small you can reject the null hypothesis.
TukeyHSD(aov(count~spray, data=InsectSprays))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = count ~ spray, data = InsectSprays)
##
## $spray
## diff lwr upr p adj
## B-A 0.8333333 -3.866075 5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A 2.1666667 -2.532742 6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B 1.3333333 -3.366075 6.032742 0.9603075
## D-C 2.8333333 -1.866075 7.532742 0.4920707
## E-C 1.4166667 -3.282742 6.116075 0.9488669
## F-C 14.5833333 9.883925 19.282742 0.0000000
## E-D -1.4166667 -6.116075 3.282742 0.9488669
## F-D 11.7500000 7.050591 16.449409 0.0000000
## F-E 13.1666667 8.467258 17.866075 0.0000000
boxplot(count~spray, data= InsectSprays) # boxplot of the data
boxplot(count~spray, data= InsectSprays, xlabel = "Spray", ylabel= "Insect count", col= c("Blue", "red", "yellow", "cyan", "purple", "green"))
# there are outliers in sprays C and D.
11.Create the same boxplot using ggplot() and geom_boxplot(). Fill the boxplots with colors as well.
library(ggplot2)
ggplot(InsectSprays, aes(x=spray,y=count))+
geom_boxplot(fill= c("Blue", "red", "yellow", "cyan", "purple", "green"))
data(mtcars)
cor(mtcars, method ="pearson") # I had digits=4 to try and limit the decimal place, but it kept saying it was unused
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
plot(mtcars)
#ggplot(fitted(mtcars))
#ggplot(cor(mtcars, method="kendall"))
# none of these would graph properly when run and it would not let me knit without making them comments