Multiple Plots & Splitting the data using R

Alok Pratap Singh

`October, 2020

Box and Whisker plot using `base-R`

dataset <- iris
kableExtra::kable(head(iris))

Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
5.1	3.5	1.4	0.2	setosa
4.9	3.0	1.4	0.2	setosa
4.7	3.2	1.3	0.2	setosa
4.6	3.1	1.5	0.2	setosa
5.0	3.6	1.4	0.2	setosa
5.4	3.9	1.7	0.4	setosa

x <- dataset[,1:4]
y <- dataset[,5]
# boxplot for each attribute on one image
par(mfrow=c(1,4), col="steelblue", lwd=5, bg= "black",fg="white")

for (i in 1:4) {
  boxplot(
    x[, i],
    main = names(iris)[i],
    col = 0,
    col.main = "tomato2",
    col.axis = "gold",
    cex.axis = 1.5,
    cex.main = 2
  )
}

Box and Whisker plot using `caret package`

caret::featurePlot(x= x, y= y, plot = "box")

Density plot using `caret package`

# density plots for each attribute by class value
library(caret)
scales <- list(x = list(relation = "free"),
               y = list(relation = "free"))
featurePlot(
  x = x,
  y = y,
  plot = "density",
  scales = scales
)

Pair plot using `ggplot2` and `GGally`

data(iris)
library(ggplot2)
library(ggthemes)
library(GGally)
theme_set(theme_bw())

ggpairs(
  data = iris,
  columns = 2:5,
  mapping = aes(col = Species, alpha = .9)
) +
  scale_fill_colorblind() +
  scale_color_colorblind() +
  labs(title = "Machine Learning Project")

data(iris)
ggpairs(data = iris,
        columns = 1:5,
        mapping = aes(col = Species))

Pair plot using `psych package`

data(iris)
psych::pairs.panels(iris[1:5],
                    hist.col = c("tomato2","steelblue","gold",
                                 "palegreen4","orange","lightblue"),
                    breaks = 10,
                    lwd=2, 
                    labels= c("Sepal Length", "Sepal Width",
                              "Petal Length"," Petal Width"))

Plotting manually using `ggplot2` and `gridExtra`

data(iris)
library(ggplot2)
library(ggthemes)
theme_set(theme_bw())

ggplot(iris, aes(Sepal.Length,Petal.Length, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->a

ggplot(iris, aes(Sepal.Length, Sepal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() -> b

ggplot(iris, aes(Sepal.Length, Petal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->c

ggplot(iris, aes(Petal.Length, Sepal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->d

ggplot(iris, aes(Petal.Length, Petal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->e

ggplot(iris, aes(Sepal.Width, Petal.Width, col= Species)) + 
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = T) +
  scale_color_calc() ->f


library(gridExtra)
theme_set(theme_base())
grid.arrange(a,b,c,d,e,f)

ellipse plot using `caret package`

data(iris)
caret::featurePlot(x = iris[,1:4], y= iris[,5],
                   plot = "ellipse", lwd= 3,
                   main= "Machine Learning Excercise")

Sample split method 1 (R base)

# data(iris)
# split_positions <- sample(1:nrow(iris), size= .75 * nrow(iris))
# split_positions
# train <- iris[split_positions,]
# test <- iris[-split_positions,]

Sample split method 2 (Package: caret)

# data(iris)
# caret::createDataPartition(1:nrow(iris), p= .75, list= F)
# train <- iris[split_position,]
# test <- iris[-split_position,]

Sample split method 3 (Package: catools)

# data(iris)
# caTools::sample.split(iris$Sepal.Length, SplitRatio = .75)-> split_tag
# train <- iris[split_tag==T,]
# test <- iris[split_tag==F,]
# train <- subset(iris, split_tag==T)
# test <- subset(iris, split_tag==F)

# dim(train);dim(test)

Sample split method 4 (Package: dplyr)

library(dplyr)
data(iris)
iris %>% mutate(id= row_number()) %>% relocate(id) -> iris
iris %>% sample_frac(.75) -> train
anti_join(iris, train, by= "id") -> test
dim(train);dim(test)

## [1] 112   6

## [1] 38  6

pander::pander(head(train))

id	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
92	6.1	3	4.6	1.4	versicolor
16	5.7	4.4	1.5	0.4	setosa
37	5.5	3.5	1.3	0.2	setosa
85	5.4	3	4.5	1.5	versicolor
52	6.4	3.2	4.5	1.5	versicolor
133	6.4	2.8	5.6	2.2	virginica

Regards

Please visit my profile

Alok Pratap Singh (Research Scholar)

Linkedin (Open in New TAB)

Department of Psychology

apsingh@alluniv.ac.in

University of Allahabad

Without data you’re just another person with an opinion

Multiple Plots & Splitting the data using R

Box and Whisker plot using base-R

Box and Whisker plot using caret package

Density plot using caret package

Pair plot using ggplot2 and GGally

Pair plot using psych package

Plotting manually using ggplot2 and gridExtra

ellipse plot using caret package

Sample split method 1 (R base)

Sample split method 2 (Package: caret)

Sample split method 3 (Package: catools)

Sample split method 4 (Package: dplyr)

Box and Whisker plot using `base-R`

Box and Whisker plot using `caret package`

Density plot using `caret package`

Pair plot using `ggplot2` and `GGally`

Pair plot using `psych package`

Plotting manually using `ggplot2` and `gridExtra`

ellipse plot using `caret package`