Two sample t test case study in R

Ph.D. Course Work - Computer Application

Part 3

———————————————————————–

#Remove the environment variable
rm(list=ls())
#Load Packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
#Set the directory
setwd("D:\\R Course")
#Load ozone concentration Data File
ozone = read.csv("ozone.csv")
#Display the file loaded
glimpse(ozone)
## Rows: 20
## Columns: 3
## $ Ozone           <dbl> 61.7, 64.0, 72.4, 56.8, 52.4, 44.8, 70.4, 67.6, 68.8, …
## $ Garden.location <chr> "West", "West", "West", "West", "West", "West", "West"…
## $ Garden.ID       <chr> "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8", "G9", …
#Sample Statistics
ozone%>%select(Ozone,Garden.location)%>%
  group_by(Garden.location)%>%
  summarise(mean_oz=mean(Ozone),
            sd_oz=sd(Ozone))
## # A tibble: 2 × 3
##   Garden.location mean_oz sd_oz
##   <chr>             <dbl> <dbl>
## 1 East               77.3  7.87
## 2 West               61.3  9.06
#Plot Histogram
ggplot(ozone,aes(x=Ozone))+
  geom_histogram(binwidth = 10)+
  facet_wrap(~Garden.location,ncol=1)+
  theme_bw()

#Design Boxplot
ggplot(ozone,aes(x=Garden.location,Ozone,fill=Garden.location))+
  geom_boxplot()+geom_point(size=2)+
  theme_bw()

r function for normal distribution
#Creating a normal distribution pdf with mean=5,sigma=2
x=seq(-10,15,.01) #x is sequence of values
y=dnorm(x,5,2)
plot(x,y,col="blue")

#Creating a standard normal distribution(mean=0,sd=1) pdf
x=seq(-5,5,.01)
y=dnorm(x,0,1)
plot(x,y,col="blue")

#Creating a standard normal cumulative distribution(mean=0,sd=1) cdf
yp=pnorm(x,0,1)
plot(x,yp,col="red")

#significance level(alpla)=.05
qnorm(.975,mean=0,1)
## [1] 1.959964
#critical region=(-inf,-1.959964]U[1.959964,inf)
#select 10 sample from standard normal distribution
rnorm(10,0,1)
##  [1]  0.7887246 -0.6651198 -1.3749193 -0.5141969  0.7073569  0.5582830
##  [7]  1.0217419 -0.1607195  0.8515551  0.8895515
r function for t distribution
#Creating a t distribution pdf with for different df
 
x=seq(-4,4,.01) #x is sequence of values
y=dnorm(x,0,1)
y1=dt(x,df=2)
y2=dt(x,df=6)
y3=dt(x,df=12)
df=data.frame(x,y,y1,y2,y3)
ggplot(df,aes(x))+
  geom_line(aes(y=y,linetype="Normal"),
            colour="black")+
  geom_line(aes(y=y1,linetype="df:1"),  colour="blue")+
  geom_line(aes(y=y2,linetype="df:2"), colour="red")+
  geom_line(aes(y=y3,linetype="df:3"), colour="green")

#Creating a cumulative t distribution(df=10) cdf
yp=pt(x,df=10)
plot(x,yp,col="red")

#significance level(alpla)=.05,df=10
qt(.975,10,lower.tail = TRUE)
## [1] 2.228139
#critical region=(-inf,-2.228139]U[2.228139,inf)
#select 20 sample from t distribution with df=10
rt(20,10)
##  [1] -0.5305557 -0.2590192  1.4495357  2.3360976  0.1844444  1.8614789
##  [7]  1.9481677  0.1666295 -1.6344156 -0.2585406  0.6736808  1.0026141
## [13] -1.1425539 -0.4980540 -0.1355924  1.1389694  0.4894910 -0.7760534
## [19]  1.9114064  0.3717847

#two sample t test done on ozone data

t.test(Ozone~Garden.location,ozone)
## 
##  Welch Two Sample t-test
## 
## data:  Ozone by Garden.location
## t = 4.2363, df = 17.656, p-value = 0.0005159
## alternative hypothesis: true difference in means between group East and group West is not equal to 0
## 95 percent confidence interval:
##   8.094171 24.065829
## sample estimates:
## mean in group East mean in group West 
##              77.34              61.26
The output shows the p value is very low.Even If we consider the sigificance level as .001, the null hypothesis can be rejected and we can conclude that the population means are different between the groups.One more point t note that r assume the variance is different for the groups. Therefore Welch Two sample t test has been performed.To understand whether variance of the groups are really different or not we can go for variance test.
var.test(Ozone~Garden.location,ozone)
## 
##  F test to compare two variances
## 
## data:  Ozone by Garden.location
## F = 0.75503, num df = 9, denom df = 9, p-value = 0.6823
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1875386 3.0397437
## sample estimates:
## ratio of variances 
##          0.7550293
The F test shows the variance are not different so we can forcefully apply two sample t test instead of Welch two sample t test .
t.test(Ozone~Garden.location,ozone,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  Ozone by Garden.location
## t = 4.2363, df = 18, p-value = 0.0004966
## alternative hypothesis: true difference in means between group East and group West is not equal to 0
## 95 percent confidence interval:
##   8.105323 24.054677
## sample estimates:
## mean in group East mean in group West 
##              77.34              61.26