attach(cats)
male <- subset(cats, subset=(cats$Sex=="M"))
female <- subset(cats, subset=(cats$Sex=="F"))
# Part 1
# Hypothesis test
# H(o): mu1=mu2
# H(a): mu1!=mu2 (claim)
mu <- 0
alpha <- .05
# Two tailed test because the claim is about there being a difference
CVusingt.test <- t.test(male$Bwt, female$Bwt,
alternative="two.side",
mu=mu,
confidence=1-alpha)
CVusingt.test
##
## Welch Two Sample t-test
##
## data: male$Bwt and female$Bwt
## t = 8.7095, df = 136.84, p-value = 8.831e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4177242 0.6631268
## sample estimates:
## mean of x mean of y
## 2.900000 2.359574
attributes(CVusingt.test)
## $names
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate"
## [6] "null.value" "stderr" "alternative" "method" "data.name"
##
## $class
## [1] "htest"
CVusingt.test$statistic # the t test statistic
## t
## 8.709488
CVusingt.test$parameter # the degrees of freedom
## df
## 136.8379
CVusingt.test$p.value # the p-value
## [1] 8.831034e-15
CVusingt.test$conf.int # the confidence interval (2 numbers)
## [1] 0.4177242 0.6631268
## attr(,"conf.level")
## [1] 0.95
CVusingt.test$estimate # the estimated mean
## mean of x mean of y
## 2.900000 2.359574
CVusingt.test$null.value # the specified hypothesized mean
## difference in means
## 0
CVusingt.test$stderr # standard error of the mean
## [1] 0.0620502
CVusingt.test$alternative # which kind of test (<, > or =)
## [1] "two.sided"
# Used the below method to check work.
# PRE WORK -- document the given data#
#sampledata <- c(/values)
n1 <- length(male$Bwt) # given
n2 <- length(female$Bwt) # given
xbar1 <- mean(male$Bwt) # given
xbar2 <- mean(female$Bwt) # given
alpha <- 0.05 # given
s1 <- sd(male$Bwt) # given
s2 <- sd(female$Bwt) # given
df1<- n1-1
df2<- n2-1
# STEP 1: Hypotheses and Claims
# Hypothesis test
# H(o): mu1=mu2
# H(a): mu1!=mu2 (claim)
mu1minusmu2 <- 0 #hypothesized
claim <- c("There is a significant difference in the average cat bodyweight between male vs. female")
desiredaction <- c("support")
alphastatement <- c("at alpha=",alpha,".")
# STEP 2: Find critical values
# We don't know sigma and n<30, so this is a t test
# This is a two-tail test because claim is not about > or <
tails <- 2
alphatouse<-alpha/tails
dftouse <- min(df1,df2) # use the lower of the 2 df's from the 2 samples
dftouse
## [1] 46
CV <- qt(p=1-alphatouse, df=dftouse, lower.tail=TRUE)
CV
## [1] 2.012896
# STEP 2.1 -- DRAW THE CURVE (example using regular R plotting)
x <- seq(-6,6,length=1000)
y <- dt(x, df=dftouse)
plot(x, y, type="l", main="Cat bwt Male vs. Female")
polygon(c(x[x>=CV], max(x), CV), c(y[x>=CV], 0,0), col="red")
polygon(c(x[x<=-CV], -CV, min(x)), c(y[x<=-CV], 0,0), col="red")
cvtext <- paste("C.V.:",round(CV,digits=3),sep=" ")
text(
x=CV+0.5,
y=max(y)*.5,
labels=cvtext,
col="red",
cex=2
)
alphatext <- paste("Alpha:",round(alpha,digits=3),sep=" ")
text(
x=CV+0.25,
y=max(y)*.75,
labels=alphatext,
col="red",
cex=2
)
abline(v=CV, col="red")
#STEP 3 Compute the test value
#t = ( (xbar1-xbar2) - (mu1minusmu2) / sqrt((s1^2/n1)+(s2^2/n2))
t <- ((xbar1-xbar2)-(mu1minusmu2))/sqrt((s1^2/n1)+(s2^2/n2))
t
## [1] 8.709488
# STEP 3.1 plot the test value
ttext <- paste("t:",round(t,digits=3),sep=" ")
text(
x=t-0.4,
y=max(y)*.35,
labels=ttext,
col="green",
cex=2
)
abline(v=t, col="green")
# STEP 4: Make the decision to reject or not the null hypothesis
comparison <- abs(t)-abs(CV)
if(comparison > 0) {
conclusion <- ("Reject the Null Hypothesis.")
conclusionstatement <- c("There is enough evidence to",desiredaction,"the following claim:")
} else {
conclusion <- ("Do Not Reject the Null Hypothesis.")
conclusionstatement <- c("There is not enough evidence to",desiredaction,"the following claim:")
}
# STEP 5: State the Conclusion
cat(conclusion, conclusionstatement, claim, alphastatement,sep=" ")
## Reject the Null Hypothesis. There is enough evidence to support the following claim: There is a significant difference in the average cat bodyweight between male vs. female at alpha= 0.05 .
# Part 2
# STEP 1: Hypotheses and Claims
# NULL: muD = 0
# ALT: muD < 0
# CLAIM: muD < 0
muD <- 0 #hypothesized
alpha <- .05
SampleDataX1 <- c(4.6, 7.8, 9.1, 5.6, 6.9, 8.5, 5.3, 7.1, 3.2, 4.4)
SampleDataX2 <- c(6.6, 7.7, 9.0, 6.2, 7.8, 8.3, 5.9, 6.5, 5.8, 4.9)
ttestoutput <- t.test(
x=SampleDataX1,
y=SampleDataX2,
alternative="less",
mu=muD,
paired=TRUE,
conf.level=1-alpha
)
str(ttestoutput)
## List of 10
## $ statistic : Named num -1.95
## ..- attr(*, "names")= chr "t"
## $ parameter : Named num 9
## ..- attr(*, "names")= chr "df"
## $ p.value : num 0.0416
## $ conf.int : num [1:2] -Inf -0.0366
## ..- attr(*, "conf.level")= num 0.95
## $ estimate : Named num -0.62
## ..- attr(*, "names")= chr "mean difference"
## $ null.value : Named num 0
## ..- attr(*, "names")= chr "mean difference"
## $ stderr : num 0.318
## $ alternative: chr "less"
## $ method : chr "Paired t-test"
## $ data.name : chr "SampleDataX1 and SampleDataX2"
## - attr(*, "class")= chr "htest"
ttestoutput
##
## Paired t-test
##
## data: SampleDataX1 and SampleDataX2
## t = -1.9481, df = 9, p-value = 0.04161
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
## -Inf -0.03659503
## sample estimates:
## mean difference
## -0.62
attributes(ttestoutput)
## $names
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate"
## [6] "null.value" "stderr" "alternative" "method" "data.name"
##
## $class
## [1] "htest"
ttestoutput$statistic # the t test statistic
## t
## -1.948098
ttestoutput$parameter # the degrees of freedom
## df
## 9
ttestoutput$p.value # the p-value
## [1] 0.04161026
ttestoutput$conf.int # the confidence interval (2 numbers)
## [1] -Inf -0.03659503
## attr(,"conf.level")
## [1] 0.95
ttestoutput$estimate # the estimated mean
## mean difference
## -0.62
ttestoutput$null.value # the specified hypothesized mean
## mean difference
## 0
ttestoutput$stderr # standard error of the mean
## [1] 0.3182592
ttestoutput$alternative # which kind of test (<, > or =)
## [1] "less"
# Used the below method to check work.
#PRE WORK -- delete any previous variables and plots
rm(list=ls())
dev.off()
## null device
## 1
# PRE WORK -- document the given data#
#sample data
alpha <- 0.05 # given
SampleDataSubjects <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
SampleDataX1 <- c(4.6, 7.8, 9.1, 5.6, 6.9, 8.5, 5.3, 7.1, 3.2, 4.4)
SampleDataX2 <- c(6.6, 7.7, 9.0, 6.2, 7.8, 8.3, 5.9, 6.5, 5.8, 4.9)
SampleData <- data.frame(SampleDataX1,SampleDataX2,row.names=SampleDataSubjects)
SampleData
## SampleDataX1 SampleDataX2
## 1 4.6 6.6
## 2 7.8 7.7
## 3 9.1 9.0
## 4 5.6 6.2
## 5 6.9 7.8
## 6 8.5 8.3
## 7 5.3 5.9
## 8 7.1 6.5
## 9 3.2 5.8
## 10 4.4 4.9
SampleData <- mutate(SampleData, D=SampleDataX1-SampleDataX2, DSquared=D^2)
SampleData
## SampleDataX1 SampleDataX2 D DSquared
## 1 4.6 6.6 -2.0 4.00
## 2 7.8 7.7 0.1 0.01
## 3 9.1 9.0 0.1 0.01
## 4 5.6 6.2 -0.6 0.36
## 5 6.9 7.8 -0.9 0.81
## 6 8.5 8.3 0.2 0.04
## 7 5.3 5.9 -0.6 0.36
## 8 7.1 6.5 0.6 0.36
## 9 3.2 5.8 -2.6 6.76
## 10 4.4 4.9 -0.5 0.25
str(SampleData)
## 'data.frame': 10 obs. of 4 variables:
## $ SampleDataX1: num 4.6 7.8 9.1 5.6 6.9 8.5 5.3 7.1 3.2 4.4
## $ SampleDataX2: num 6.6 7.7 9 6.2 7.8 8.3 5.9 6.5 5.8 4.9
## $ D : num -2 0.1 0.1 -0.6 -0.9 ...
## $ DSquared : num 4 0.01 0.01 0.36 0.81 ...
#calcs from sample data
n <- nrow(SampleData)
SumD <- sum(SampleData$D)
SumDSquared <- sum(SampleData$DSquared)
Dbar <- SumD/n
SumD
## [1] -6.2
SumDSquared
## [1] 12.96
Dbar
## [1] -0.62
df <- n-1
SDofDnum <- (n*SumDSquared) - SumD^2
SDofDDenom <- n*(n-1)
SDofD <- sqrt(SDofDnum/SDofDDenom)
# STEP 1: Hypotheses and Claims
# NULL: muD = 0
# ALT: muD < 0
# CLAIM: muD < 0
muD <- 0 #hypothesized
claim <- c("There is an improvement in the average person's sleep quality after meditating")
desiredaction <- c("support")
alphastatement <- c("at alpha=",alpha,".")
# STEP 2: Find critical values
# We don't know sigma and n<30, so this is a t test
# This is a one-tail left side test
alphatouse<-alpha
CV <- qt(p=alphatouse, df=df, lower.tail=TRUE)
CV
## [1] -1.833113
# STEP 2.1 -- DRAW THE CURVE (example using regular R plotting)
x <- seq(-6,6,length=1000)
y <- dt(x, df=df)
plot(x, y, type="l", main="Dependent t-test")
polygon(c(x[x<=CV], CV, min(x)), c(y[x<=CV], 0,0), col="red")
cvtext <- paste("C.V.:",round(CV,digits=3),sep=" ")
text(
x=CV+0.5,
y=max(y)*.75,
labels=cvtext,
col="red",
cex=2
)
alphatext <- paste("Alpha:",round(alpha,digits=3),sep=" ")
text(
x=CV+0.25,
y=max(y)*.5,
labels=alphatext,
col="red",
cex=2
)
abline(v=CV, col="red")
#STEP 3 Compute the test value
#t = ( (Dbar-muD) / (SDofD/sqrt(n)) )
t <- ( (Dbar-muD) / (SDofD/sqrt(n)) )
t
## [1] -1.948098
# STEP 3.1 plot the test value
ttext <- paste("t:",round(t,digits=3),sep=" ")
text(
x=t-0.4,
y=max(y)*.65,
labels=ttext,
col="green",
cex=2
)
abline(v=t, col="green")
# STEP 4: Mke the decision to reject or not the null hypothesis
comparison <- abs(t)-abs(CV) # for one tail use actual value
comparison
## [1] 0.114985
if(comparison > 0) {
conclusion <- ("Reject the Null Hypothesis.")
conclusionstatement <- c("There is enough evidence to",desiredaction,"the following claim:")
} else {
conclusion <- ("Do Not Reject the Null Hypothesis.")
conclusionstatement <- c("There is not enough evidence to",desiredaction,"the following claim:")
}
# STEP 5: State the Conclusion
cat(conclusion, conclusionstatement, claim, alphastatement, sep=" ")
## Reject the Null Hypothesis. There is enough evidence to support the following claim: There is an improvement in the average person's sleep quality after meditating at alpha= 0.05 .
Kabacoff, R. I. (2015). R in Action (2nd ed.). Manning Publications.
Bluman, A. G. (2018). Elementary statistics: A step by step approach (10th ed.). McGraw Hill.