R Practice Week 4

Part 1

Click to expand

attach(cats)

male <- subset(cats, subset=(cats$Sex=="M"))
female <- subset(cats, subset=(cats$Sex=="F"))


# Part 1

# Hypothesis test
# H(o): mu1=mu2
# H(a): mu1!=mu2 (claim)

mu <- 0
alpha <- .05

# Two tailed test because the claim is about there being a difference
CVusingt.test <- t.test(male$Bwt, female$Bwt,
                        alternative="two.side",
                        mu=mu,
                        confidence=1-alpha)
CVusingt.test

## 
##  Welch Two Sample t-test
## 
## data:  male$Bwt and female$Bwt
## t = 8.7095, df = 136.84, p-value = 8.831e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4177242 0.6631268
## sample estimates:
## mean of x mean of y 
##  2.900000  2.359574

attributes(CVusingt.test)

## $names
##  [1] "statistic"   "parameter"   "p.value"     "conf.int"    "estimate"   
##  [6] "null.value"  "stderr"      "alternative" "method"      "data.name"  
## 
## $class
## [1] "htest"

CVusingt.test$statistic   # the t test statistic

##        t 
## 8.709488

CVusingt.test$parameter   # the degrees of freedom

##       df 
## 136.8379

CVusingt.test$p.value     # the p-value

## [1] 8.831034e-15

CVusingt.test$conf.int    # the confidence interval (2 numbers)

## [1] 0.4177242 0.6631268
## attr(,"conf.level")
## [1] 0.95

CVusingt.test$estimate    # the estimated mean

## mean of x mean of y 
##  2.900000  2.359574

CVusingt.test$null.value  # the specified hypothesized mean

## difference in means 
##                   0

CVusingt.test$stderr      # standard error of the mean

## [1] 0.0620502

CVusingt.test$alternative # which kind of test (<, > or =)

## [1] "two.sided"

# Used the below method to check work.

# PRE WORK -- document the given data#
#sampledata <- c(/values)
n1 <- length(male$Bwt) # given
n2 <- length(female$Bwt) # given
xbar1 <- mean(male$Bwt) # given
xbar2 <- mean(female$Bwt) # given
alpha <- 0.05  # given
s1 <- sd(male$Bwt) # given
s2 <- sd(female$Bwt) # given
df1<- n1-1
df2<- n2-1

# STEP 1: Hypotheses and Claims
# Hypothesis test
# H(o): mu1=mu2
# H(a): mu1!=mu2 (claim)
mu1minusmu2 <- 0 #hypothesized
claim <- c("There is a significant difference in the average cat bodyweight between male vs. female")
desiredaction <- c("support")
alphastatement <- c("at alpha=",alpha,".")

# STEP 2: Find critical values
# We don't know sigma and n<30, so this is a t test
# This is a two-tail test because claim is not about > or <
tails <- 2
alphatouse<-alpha/tails
dftouse <- min(df1,df2)   # use the lower of the 2 df's from the 2 samples
dftouse

## [1] 46

CV <- qt(p=1-alphatouse, df=dftouse, lower.tail=TRUE)
CV

## [1] 2.012896

# STEP 2.1 -- DRAW THE CURVE (example using regular R plotting)
x <- seq(-6,6,length=1000)
y <- dt(x, df=dftouse)
plot(x, y, type="l", main="Cat bwt Male vs. Female")
polygon(c(x[x>=CV], max(x), CV), c(y[x>=CV], 0,0), col="red")
polygon(c(x[x<=-CV], -CV, min(x)), c(y[x<=-CV], 0,0), col="red")
cvtext <- paste("C.V.:",round(CV,digits=3),sep=" ")
text(
  x=CV+0.5, 
  y=max(y)*.5, 
  labels=cvtext,
  col="red",
  cex=2
)
alphatext <- paste("Alpha:",round(alpha,digits=3),sep=" ")
text(
  x=CV+0.25, 
  y=max(y)*.75, 
  labels=alphatext,
  col="red",
  cex=2
)
abline(v=CV, col="red")

#STEP 3 Compute the test value
#t = ( (xbar1-xbar2) - (mu1minusmu2) / sqrt((s1^2/n1)+(s2^2/n2))
t <- ((xbar1-xbar2)-(mu1minusmu2))/sqrt((s1^2/n1)+(s2^2/n2))
t

## [1] 8.709488

# STEP 3.1 plot the test value

ttext <- paste("t:",round(t,digits=3),sep=" ")
text(
  x=t-0.4, 
  y=max(y)*.35, 
  labels=ttext,
  col="green",
  cex=2
)

abline(v=t, col="green")

# STEP 4:  Make the decision to reject or not the null hypothesis
comparison <- abs(t)-abs(CV)

if(comparison > 0) {
  conclusion <- ("Reject the Null Hypothesis.")
  conclusionstatement <- c("There is enough evidence to",desiredaction,"the following claim:")
} else { 
  conclusion <- ("Do Not Reject the Null Hypothesis.")
  conclusionstatement <- c("There is not enough evidence to",desiredaction,"the following claim:")
}

# STEP 5:  State the Conclusion

cat(conclusion, conclusionstatement, claim, alphastatement,sep=" ")

## Reject the Null Hypothesis. There is enough evidence to support the following claim: There is a significant difference in the average cat bodyweight between male vs. female at alpha= 0.05 .

Part 2

Click to expand

# Part 2

# STEP 1: Hypotheses and Claims
# NULL:  muD = 0  
# ALT:  muD < 0 
# CLAIM:  muD < 0 
muD <- 0 #hypothesized
alpha <- .05
SampleDataX1 <- c(4.6, 7.8, 9.1, 5.6, 6.9, 8.5, 5.3, 7.1, 3.2, 4.4)
SampleDataX2 <- c(6.6, 7.7, 9.0, 6.2, 7.8, 8.3, 5.9, 6.5, 5.8, 4.9)

ttestoutput <- t.test(
  x=SampleDataX1,
  y=SampleDataX2,
  alternative="less",
  mu=muD,
  paired=TRUE,
  conf.level=1-alpha
)
str(ttestoutput)

## List of 10
##  $ statistic  : Named num -1.95
##   ..- attr(*, "names")= chr "t"
##  $ parameter  : Named num 9
##   ..- attr(*, "names")= chr "df"
##  $ p.value    : num 0.0416
##  $ conf.int   : num [1:2] -Inf -0.0366
##   ..- attr(*, "conf.level")= num 0.95
##  $ estimate   : Named num -0.62
##   ..- attr(*, "names")= chr "mean difference"
##  $ null.value : Named num 0
##   ..- attr(*, "names")= chr "mean difference"
##  $ stderr     : num 0.318
##  $ alternative: chr "less"
##  $ method     : chr "Paired t-test"
##  $ data.name  : chr "SampleDataX1 and SampleDataX2"
##  - attr(*, "class")= chr "htest"

ttestoutput

## 
##  Paired t-test
## 
## data:  SampleDataX1 and SampleDataX2
## t = -1.9481, df = 9, p-value = 0.04161
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
##         -Inf -0.03659503
## sample estimates:
## mean difference 
##           -0.62

attributes(ttestoutput)

## $names
##  [1] "statistic"   "parameter"   "p.value"     "conf.int"    "estimate"   
##  [6] "null.value"  "stderr"      "alternative" "method"      "data.name"  
## 
## $class
## [1] "htest"

ttestoutput$statistic   # the t test statistic

##         t 
## -1.948098

ttestoutput$parameter   # the degrees of freedom

## df 
##  9

ttestoutput$p.value     # the p-value

## [1] 0.04161026

ttestoutput$conf.int    # the confidence interval (2 numbers)

## [1]        -Inf -0.03659503
## attr(,"conf.level")
## [1] 0.95

ttestoutput$estimate    # the estimated mean

## mean difference 
##           -0.62

ttestoutput$null.value  # the specified hypothesized mean

## mean difference 
##               0

ttestoutput$stderr      # standard error of the mean

## [1] 0.3182592

ttestoutput$alternative # which kind of test (<, > or =)

## [1] "less"

# Used the below method to check work.


#PRE WORK -- delete any previous variables and plots
rm(list=ls())
dev.off()

## null device 
##           1

# PRE WORK -- document the given data#

#sample data
alpha <- 0.05  # given
SampleDataSubjects <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
SampleDataX1 <- c(4.6, 7.8, 9.1, 5.6, 6.9, 8.5, 5.3, 7.1, 3.2, 4.4)
SampleDataX2 <- c(6.6, 7.7, 9.0, 6.2, 7.8, 8.3, 5.9, 6.5, 5.8, 4.9)
SampleData <- data.frame(SampleDataX1,SampleDataX2,row.names=SampleDataSubjects)
SampleData

##    SampleDataX1 SampleDataX2
## 1           4.6          6.6
## 2           7.8          7.7
## 3           9.1          9.0
## 4           5.6          6.2
## 5           6.9          7.8
## 6           8.5          8.3
## 7           5.3          5.9
## 8           7.1          6.5
## 9           3.2          5.8
## 10          4.4          4.9

SampleData <- mutate(SampleData, D=SampleDataX1-SampleDataX2, DSquared=D^2)
SampleData

##    SampleDataX1 SampleDataX2    D DSquared
## 1           4.6          6.6 -2.0     4.00
## 2           7.8          7.7  0.1     0.01
## 3           9.1          9.0  0.1     0.01
## 4           5.6          6.2 -0.6     0.36
## 5           6.9          7.8 -0.9     0.81
## 6           8.5          8.3  0.2     0.04
## 7           5.3          5.9 -0.6     0.36
## 8           7.1          6.5  0.6     0.36
## 9           3.2          5.8 -2.6     6.76
## 10          4.4          4.9 -0.5     0.25

str(SampleData)

## 'data.frame':    10 obs. of  4 variables:
##  $ SampleDataX1: num  4.6 7.8 9.1 5.6 6.9 8.5 5.3 7.1 3.2 4.4
##  $ SampleDataX2: num  6.6 7.7 9 6.2 7.8 8.3 5.9 6.5 5.8 4.9
##  $ D           : num  -2 0.1 0.1 -0.6 -0.9 ...
##  $ DSquared    : num  4 0.01 0.01 0.36 0.81 ...

#calcs from sample data
n <- nrow(SampleData)
SumD <- sum(SampleData$D)
SumDSquared <- sum(SampleData$DSquared)
Dbar <- SumD/n
SumD

## [1] -6.2

SumDSquared

## [1] 12.96

Dbar

## [1] -0.62

df <- n-1

SDofDnum <- (n*SumDSquared) - SumD^2
SDofDDenom <- n*(n-1)
SDofD <- sqrt(SDofDnum/SDofDDenom)


# STEP 1: Hypotheses and Claims
# NULL:  muD = 0  
# ALT:  muD < 0 
# CLAIM:  muD < 0 
muD <- 0 #hypothesized
claim <- c("There is an improvement in the average person's sleep quality after meditating")
desiredaction <- c("support")
alphastatement <- c("at alpha=",alpha,".")

# STEP 2: Find critical values
# We don't know sigma and n<30, so this is a t test
# This is a one-tail left side test
alphatouse<-alpha
CV <- qt(p=alphatouse, df=df, lower.tail=TRUE)
CV

## [1] -1.833113

# STEP 2.1 -- DRAW THE CURVE (example using regular R plotting)
x <- seq(-6,6,length=1000)
y <- dt(x, df=df)
plot(x, y, type="l", main="Dependent t-test")
polygon(c(x[x<=CV], CV, min(x)), c(y[x<=CV], 0,0), col="red")
cvtext <- paste("C.V.:",round(CV,digits=3),sep=" ")
text(
  x=CV+0.5, 
  y=max(y)*.75, 
  labels=cvtext,
  col="red",
  cex=2
)
alphatext <- paste("Alpha:",round(alpha,digits=3),sep=" ")
text(
  x=CV+0.25, 
  y=max(y)*.5, 
  labels=alphatext,
  col="red",
  cex=2
)
abline(v=CV, col="red")


#STEP 3 Compute the test value
#t = ( (Dbar-muD) / (SDofD/sqrt(n)) )
t <- ( (Dbar-muD) / (SDofD/sqrt(n)) )
t

## [1] -1.948098

# STEP 3.1 plot the test value

ttext <- paste("t:",round(t,digits=3),sep=" ")
text(
  x=t-0.4, 
  y=max(y)*.65, 
  labels=ttext,
  col="green",
  cex=2
)


abline(v=t, col="green")



# STEP 4:  Mke the decision to reject or not the null hypothesis
comparison <- abs(t)-abs(CV) # for one tail use actual value
comparison

## [1] 0.114985

if(comparison > 0) {
  conclusion <- ("Reject the Null Hypothesis.")
  conclusionstatement <- c("There is enough evidence to",desiredaction,"the following claim:")
} else { 
  conclusion <- ("Do Not Reject the Null Hypothesis.")
  conclusionstatement <- c("There is not enough evidence to",desiredaction,"the following claim:")
}

# STEP 5:  State the Conclusion

cat(conclusion, conclusionstatement, claim, alphastatement, sep=" ")

## Reject the Null Hypothesis. There is enough evidence to support the following claim: There is an improvement in the average person's sleep quality after meditating at alpha= 0.05 .

Work Cited

Kabacoff, R. I. (2015). R in Action (2nd ed.). Manning Publications.

Bluman, A. G. (2018). Elementary statistics: A step by step approach (10th ed.). McGraw Hill.

R Practice Week 4

Michael

2024-03-15

Part 1

Part 2

Work Cited