Bootstrap

data <- read.csv(file ="C:/Users/Lab pc/Downloads/weightdata1.csv")
View(data)

names(data)

## [1] "Weight" "Gender"

levels(data$feed)

## NULL

boxplot(data$Weight~data$Gender, las=1, ylab = "weight(g)", xlab = "Gender", main="weight by gender")

mean(data$Weight[data$Gender=="male"])

## [1] 64.09091

mean(data$Weight[data$Gender=="female"])

## [1] 60.83333

#alternateway
with(data,tapply(Weight, Gender, mean))

##   female     male 
## 60.83333 64.09091

#lets calculate the absolute diff in means
abs(diff(with(data,tapply(Weight, Gender, mean))))

##     male 
## 3.257576

with(data,tapply(Weight, Gender, median))

## female   male 
##   60.5   59.0

#lets calculate the absolute diff in medians
abs(diff(with(data,tapply(Weight, Gender, median))))

## male 
##  1.5

test.stat2 <- abs(median(data$Weight[data$Gender=="male"]) - median(data$Weight[data$Gender=="female"]))
test.stat2

## [1] 1.5

## Let's take a look at the 3 "Classic" hyp tests we could consider
## each of which comes with their own limitations

#independent 2-sample t-test
t.test(data$Weight~data$Gender, paired=F, var.eq=F)

## 
##  Welch Two Sample t-test
## 
## data:  data$Weight by data$Gender
## t = -0.47651, df = 20.387, p-value = 0.6388
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
##  -17.50067  10.98552
## sample estimates:
## mean in group female   mean in group male 
##             60.83333             64.09091

#wilcoxon aka mann-Whitney U
wilcox.test(data$Weight~data$Gender,paired=F) #tests Ho:medians are equal

## Warning in wilcox.test.default(x = c(45L, 42L, 35L, 74L, 83L, 74L, 41L, : cannot
## compute exact p-value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data$Weight by data$Gender
## W = 54.5, p-value = 0.4981
## alternative hypothesis: true location shift is not equal to 0

#kolmogorov-smirnov 2-sample test
ks.test(data$Weight[data$Gender=="male"],data$Weight[data$Gender=="female"], paired = F)

## Warning in ks.test(data$Weight[data$Gender == "male"], data$Weight[data$Gender
## == : cannot compute exact p-value with ties

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  data$Weight[data$Gender == "male"] and data$Weight[data$Gender == "female"]
## D = 0.33333, p-value = 0.5465
## alternative hypothesis: two-sided

set.seed(112358) # for reproducibility
n <-length(data$Gender)
n

## [1] 23

B <- 10000   # number of bootstrap samples
variable <- data$Weight   #variable we will reample from

# now get bootstrap samples ( without loops!)
BootstrapSamples <- matrix(sample(variable,size=n*B, replace=TRUE), nrow=n,ncol=B)
dim(BootstrapSamples)

## [1]    23 10000

#initialize the vector to store the TEST-STATS
Boot.test.stat1 <- rep(0,B)
Boot.test.stat2 <- rep(0,B)

for (i in 1:B) {
  #calculate the boot-test-stat1 and save it
  Boot.test.stat1[i] <- abs(mean(BootstrapSamples[1:12,i]) - 
                              mean(BootstrapSamples[13:23,i]))
  #calculate the boot-test-stat2 and save it
  Boot.test.stat2[i] <- abs(median(BootstrapSamples[1:12,i]) - 
                              median(BootstrapSamples[13:23,i]))
}

#OBSERVED TEST STATS
round(Boot.test.stat1[1:20],1)

##  [1]  1.5  5.2 13.8  1.5  6.4  9.1  0.1  3.4  0.4  6.7  3.2  1.1  0.2  8.0 13.4
## [16]  1.5  6.3 13.2  0.6  3.3

round(Boot.test.stat2[1:20],1)

##  [1] 10.0 13.5 26.0  1.0  9.0 19.0  0.0 19.0  1.0  2.0  8.0  3.5  5.0 18.5 28.5
## [16]  0.0 11.5 14.0  1.5  0.0

mean(Boot.test.stat2 >= test.stat2)

## [1] 0.8927

table(data$Gender)

## 
## female   male 
##     12     11

plot(density(Boot.test.stat2),
     xlab=expression(group("|", bar(Yc)-bar(Ym),"|")))

Bootstrap

Aman Pandey

6/21/2022