#Remove the environment variable
rm(list=ls())
#Load Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
#Set the directory
setwd("D:\\R Course")
#Load ozone concentration Data File
ozone = read.csv("ozone.csv")
#Display the file loaded
glimpse(ozone)
## Rows: 20
## Columns: 3
## $ Ozone <dbl> 61.7, 64.0, 72.4, 56.8, 52.4, 44.8, 70.4, 67.6, 68.8, …
## $ Garden.location <chr> "West", "West", "West", "West", "West", "West", "West"…
## $ Garden.ID <chr> "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8", "G9", …
#Sample Statistics
ozone%>%select(Ozone,Garden.location)%>%
group_by(Garden.location)%>%
summarise(mean_oz=mean(Ozone),
sd_oz=sd(Ozone))
## # A tibble: 2 × 3
## Garden.location mean_oz sd_oz
## <chr> <dbl> <dbl>
## 1 East 77.3 7.87
## 2 West 61.3 9.06
#Plot Histogram
ggplot(ozone,aes(x=Ozone))+
geom_histogram(binwidth = 10)+
facet_wrap(~Garden.location,ncol=1)+
theme_bw()
#Design Boxplot
ggplot(ozone,aes(x=Garden.location,Ozone,fill=Garden.location))+
geom_boxplot()+geom_point(size=2)+
theme_bw()
#Creating a normal distribution pdf with mean=5,sigma=2
x=seq(-10,15,.01) #x is sequence of values
y=dnorm(x,5,2)
plot(x,y,col="blue")
#Creating a standard normal distribution(mean=0,sd=1) pdf
x=seq(-5,5,.01)
y=dnorm(x,0,1)
plot(x,y,col="blue")
#Creating a standard normal cumulative distribution(mean=0,sd=1) cdf
yp=pnorm(x,0,1)
plot(x,yp,col="red")
#significance level(alpla)=.05
qnorm(.975,mean=0,1)
## [1] 1.959964
#critical region=(-inf,-1.959964]U[1.959964,inf)
#select 10 sample from standard normal distribution
rnorm(10,0,1)
## [1] 0.7887246 -0.6651198 -1.3749193 -0.5141969 0.7073569 0.5582830
## [7] 1.0217419 -0.1607195 0.8515551 0.8895515
#Creating a t distribution pdf with for different df
x=seq(-4,4,.01) #x is sequence of values
y=dnorm(x,0,1)
y1=dt(x,df=2)
y2=dt(x,df=6)
y3=dt(x,df=12)
df=data.frame(x,y,y1,y2,y3)
ggplot(df,aes(x))+
geom_line(aes(y=y,linetype="Normal"),
colour="black")+
geom_line(aes(y=y1,linetype="df:1"), colour="blue")+
geom_line(aes(y=y2,linetype="df:2"), colour="red")+
geom_line(aes(y=y3,linetype="df:3"), colour="green")
#Creating a cumulative t distribution(df=10) cdf
yp=pt(x,df=10)
plot(x,yp,col="red")
#significance level(alpla)=.05,df=10
qt(.975,10,lower.tail = TRUE)
## [1] 2.228139
#critical region=(-inf,-2.228139]U[2.228139,inf)
#select 20 sample from t distribution with df=10
rt(20,10)
## [1] -0.5305557 -0.2590192 1.4495357 2.3360976 0.1844444 1.8614789
## [7] 1.9481677 0.1666295 -1.6344156 -0.2585406 0.6736808 1.0026141
## [13] -1.1425539 -0.4980540 -0.1355924 1.1389694 0.4894910 -0.7760534
## [19] 1.9114064 0.3717847
#two sample t test done on ozone data
t.test(Ozone~Garden.location,ozone)
##
## Welch Two Sample t-test
##
## data: Ozone by Garden.location
## t = 4.2363, df = 17.656, p-value = 0.0005159
## alternative hypothesis: true difference in means between group East and group West is not equal to 0
## 95 percent confidence interval:
## 8.094171 24.065829
## sample estimates:
## mean in group East mean in group West
## 77.34 61.26
var.test(Ozone~Garden.location,ozone)
##
## F test to compare two variances
##
## data: Ozone by Garden.location
## F = 0.75503, num df = 9, denom df = 9, p-value = 0.6823
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1875386 3.0397437
## sample estimates:
## ratio of variances
## 0.7550293
t.test(Ozone~Garden.location,ozone,var.equal=TRUE)
##
## Two Sample t-test
##
## data: Ozone by Garden.location
## t = 4.2363, df = 18, p-value = 0.0004966
## alternative hypothesis: true difference in means between group East and group West is not equal to 0
## 95 percent confidence interval:
## 8.105323 24.054677
## sample estimates:
## mean in group East mean in group West
## 77.34 61.26