Data
iris
Shapiro-Wilk test
H0 : the variable follow a normal distribution
H1 : the variable NOT follow a normal distribution
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
iris %>%
shapiro_test(Sepal.Length,Petal.Width)
## # A tibble: 2 × 3
## variable statistic p
## <chr> <dbl> <dbl>
## 1 Petal.Width 0.902 0.0000000168
## 2 Sepal.Length 0.976 0.0102
Petal.Width & Sepal.Length :
p-value < 0.05 Reject H0,the p values are statistically significant ( the variable NOT follow a normal distribution)
Q-Q plot using ggplot2
library(ggplot2)
library(qqplotr)
##
## Attaching package: 'qqplotr'
## The following objects are masked from 'package:ggplot2':
##
## stat_qq_line, StatQqLine
p1<-ggplot(data = iris, mapping = aes(sample = Sepal.Length)) +
stat_qq_band(fill="Purple",alpha = 0.15) +
stat_qq_line(col="Purple") +
stat_qq_point(col="black",size=1)+
labs(title = "Sepal.Length")+theme_bw()
p2<-ggplot(data = iris, mapping = aes(sample = Petal.Width)) +
stat_qq_band(fill="blue",alpha = 0.15) +
stat_qq_line(col="blue") +
stat_qq_point(col="black",size=1)+
labs(title = "Petal.Width")+theme_bw()
Arrange in a grid using package gridExtra
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#p1+p2
grid.arrange(p1,p2,ncol=2)
Density plot using ggplot2
p3<-ggplot(data=iris, aes(x = Sepal.Length)) +
geom_density(alpha = 0.15,fill="Purple")+
labs(title = "Sepal.Length")+theme_bw()
p4<-ggplot(data=iris, aes(x = Petal.Width)) +
geom_density(alpha = 0.15,fill="blue")+
labs(title = "Petal.Width")+theme_bw()
#p3+p4
grid.arrange(p3,p4,ncol=2)
Histogram plot using ggplot2
p5<-ggplot(data=iris, aes(x=Sepal.Length)) +
geom_histogram(aes(x=Sepal.Length, y=..density..), bins=50,fill="Purple",alpha = 0.15) +
stat_function(fun=dnorm, args = list(mean=mean(iris$Sepal.Length),
sd=sd(iris$Sepal.Length)), color="Purple",size=1)+
labs(title = "Sepal.Length")+theme_bw()
p6<-ggplot(data=iris, aes(x=Petal.Width)) +
geom_histogram(aes(x=Petal.Width, y=..density..), bins=50,fill="blue",alpha = 0.15) +
stat_function(fun=dnorm, args = list(mean=mean(iris$Petal.Width),
sd=sd(iris$Petal.Width)), color="blue",size=1)+
labs(title = "Petal.Width")+theme_bw()
#p5 + p6
grid.arrange(p5,p6,ncol=2)