Ph.D. Cpurse Work -2024 on Quantitative Methods

Day 8 material -1 sample t-test case study

———————————————————————–

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)

#What is the probability 𝑧≤1 where 𝑧~𝑁(0,1) 
pnorm(1,mean=0,sd=1,lower.tail = TRUE)

## [1] 0.8413447

#What is the probability 𝑍>1 where 𝑍~𝑁(0,1) 
pnorm(1,mean=0,sd=1,lower.tail = FALSE)

## [1] 0.1586553

#Determine 𝑧 such that P(𝑍≤𝑧)=0.95 where Z~𝑁(0,1) 
qnorm(0.95,mean=0,sd=1,lower.tail = TRUE)

## [1] 1.644854

#Determine 𝑧 such that P(𝑍>𝑧)=0.95 where Z~𝑁(0,1)  
qnorm(0.95,mean=0,sd=1,lower.tail = FALSE)

## [1] -1.644854

#Load ozone data
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")

ozone=read.csv("ozone.csv")
#Calculate the descriptive statistics mean,standard deviation and size of the data.
stat=ozone%>%summarise(xbar=mean(Ozone),
                  s=sd(Ozone),
                  n=n())
#View the descriptive statistics
stat

##   xbar        s  n
## 1 69.3 11.67444 20

# Draw the boxplot and mark the hypothesized mean
ggplot(ozone)+
  geom_boxplot(aes(y=Ozone))+
  geom_hline(aes(yintercept = 75),color="red",
             linetype="dashed")+
  annotate("text",x=0,y=76,label="Hypothesized mean",
           colour="red")

#Perform the 1 sample t test 
xbar=stat$xbar
s=stat$s
n=stat$n
df=n-1
effect=(xbar-75)
noise=s/sqrt(n)
#t-value
t=effect/noise
#Determine p -value for two-tailed test
pval=2*pt(t,19)
#display p-value
pval

## [1] 0.04174488

#display the result
cat(sprintf("mean=%f\nstandard deviation=%f\nsample size=%d\ndf=%d\nt-val=%f\np-val=%f",xbar,s,n,df,t,pval))

## mean=69.300000
## standard deviation=11.674444
## sample size=20
## df=19
## t-val=-2.183502
## p-val=0.041745

#1 sample t test using a single r command
t.test(ozone$Ozone,mu=75)

## 
##  One Sample t-test
## 
## data:  ozone$Ozone
## t = -2.1835, df = 19, p-value = 0.04174
## alternative hypothesis: true mean is not equal to 75
## 95 percent confidence interval:
##  63.83619 74.76381
## sample estimates:
## mean of x 
##      69.3

# If we assume significance level of our test= 0.05 then from the above result (p-value < 0.05 ) shoes that the t value is in the rejection region of the test. Therefore, we can reject the null hypothesis and can conclude that the true mean of the population is not equal to 75