data <- read.csv(file ="C:/Users/Lab pc/Downloads/weightdata1.csv")
View(data)
names(data)
## [1] "Weight" "Gender"
levels(data$feed)
## NULL
boxplot(data$Weight~data$Gender, las=1, ylab = "weight(g)", xlab = "Gender", main="weight by gender")

mean(data$Weight[data$Gender=="male"])
## [1] 64.09091
mean(data$Weight[data$Gender=="female"])
## [1] 60.83333
#alternateway
with(data,tapply(Weight, Gender, mean))
## female male
## 60.83333 64.09091
#lets calculate the absolute diff in means
abs(diff(with(data,tapply(Weight, Gender, mean))))
## male
## 3.257576
with(data,tapply(Weight, Gender, median))
## female male
## 60.5 59.0
#lets calculate the absolute diff in medians
abs(diff(with(data,tapply(Weight, Gender, median))))
## male
## 1.5
test.stat2 <- abs(median(data$Weight[data$Gender=="male"]) - median(data$Weight[data$Gender=="female"]))
test.stat2
## [1] 1.5
## Let's take a look at the 3 "Classic" hyp tests we could consider
## each of which comes with their own limitations
#independent 2-sample t-test
t.test(data$Weight~data$Gender, paired=F, var.eq=F)
##
## Welch Two Sample t-test
##
## data: data$Weight by data$Gender
## t = -0.47651, df = 20.387, p-value = 0.6388
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -17.50067 10.98552
## sample estimates:
## mean in group female mean in group male
## 60.83333 64.09091
#wilcoxon aka mann-Whitney U
wilcox.test(data$Weight~data$Gender,paired=F) #tests Ho:medians are equal
## Warning in wilcox.test.default(x = c(45L, 42L, 35L, 74L, 83L, 74L, 41L, : cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: data$Weight by data$Gender
## W = 54.5, p-value = 0.4981
## alternative hypothesis: true location shift is not equal to 0
#kolmogorov-smirnov 2-sample test
ks.test(data$Weight[data$Gender=="male"],data$Weight[data$Gender=="female"], paired = F)
## Warning in ks.test(data$Weight[data$Gender == "male"], data$Weight[data$Gender
## == : cannot compute exact p-value with ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: data$Weight[data$Gender == "male"] and data$Weight[data$Gender == "female"]
## D = 0.33333, p-value = 0.5465
## alternative hypothesis: two-sided
set.seed(112358) # for reproducibility
n <-length(data$Gender)
n
## [1] 23
B <- 10000 # number of bootstrap samples
variable <- data$Weight #variable we will reample from
# now get bootstrap samples ( without loops!)
BootstrapSamples <- matrix(sample(variable,size=n*B, replace=TRUE), nrow=n,ncol=B)
dim(BootstrapSamples)
## [1] 23 10000
#initialize the vector to store the TEST-STATS
Boot.test.stat1 <- rep(0,B)
Boot.test.stat2 <- rep(0,B)
for (i in 1:B) {
#calculate the boot-test-stat1 and save it
Boot.test.stat1[i] <- abs(mean(BootstrapSamples[1:12,i]) -
mean(BootstrapSamples[13:23,i]))
#calculate the boot-test-stat2 and save it
Boot.test.stat2[i] <- abs(median(BootstrapSamples[1:12,i]) -
median(BootstrapSamples[13:23,i]))
}
#OBSERVED TEST STATS
round(Boot.test.stat1[1:20],1)
## [1] 1.5 5.2 13.8 1.5 6.4 9.1 0.1 3.4 0.4 6.7 3.2 1.1 0.2 8.0 13.4
## [16] 1.5 6.3 13.2 0.6 3.3
round(Boot.test.stat2[1:20],1)
## [1] 10.0 13.5 26.0 1.0 9.0 19.0 0.0 19.0 1.0 2.0 8.0 3.5 5.0 18.5 28.5
## [16] 0.0 11.5 14.0 1.5 0.0
mean(Boot.test.stat2 >= test.stat2)
## [1] 0.8927
table(data$Gender)
##
## female male
## 12 11
plot(density(Boot.test.stat2),
xlab=expression(group("|", bar(Yc)-bar(Ym),"|")))
