Multiple Plots & Splitting the data using R

Alok Pratap Singh

`October, 2020

Box and Whisker plot using base-R

dataset <- iris
kableExtra::kable(head(iris))
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa
x <- dataset[,1:4]
y <- dataset[,5]
# boxplot for each attribute on one image
par(mfrow=c(1,4), col="steelblue", lwd=5, bg= "black",fg="white")

for (i in 1:4) {
  boxplot(
    x[, i],
    main = names(iris)[i],
    col = 0,
    col.main = "tomato2",
    col.axis = "gold",
    cex.axis = 1.5,
    cex.main = 2
  )
}

Box and Whisker plot using caret package

caret::featurePlot(x= x, y= y, plot = "box")

Density plot using caret package

# density plots for each attribute by class value
library(caret)
scales <- list(x = list(relation = "free"),
               y = list(relation = "free"))
featurePlot(
  x = x,
  y = y,
  plot = "density",
  scales = scales
)

Pair plot using ggplot2 and GGally

data(iris)
library(ggplot2)
library(ggthemes)
library(GGally)
theme_set(theme_bw())

ggpairs(
  data = iris,
  columns = 2:5,
  mapping = aes(col = Species, alpha = .9)
) +
  scale_fill_colorblind() +
  scale_color_colorblind() +
  labs(title = "Machine Learning Project")

or

data(iris)
ggpairs(data = iris,
        columns = 1:5,
        mapping = aes(col = Species))

Pair plot using psych package

data(iris)
psych::pairs.panels(iris[1:5],
                    hist.col = c("tomato2","steelblue","gold",
                                 "palegreen4","orange","lightblue"),
                    breaks = 10,
                    lwd=2, 
                    labels= c("Sepal Length", "Sepal Width",
                              "Petal Length"," Petal Width"))

Plotting manually using ggplot2 and gridExtra

data(iris)
library(ggplot2)
library(ggthemes)
theme_set(theme_bw())

ggplot(iris, aes(Sepal.Length,Petal.Length, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->a

ggplot(iris, aes(Sepal.Length, Sepal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() -> b

ggplot(iris, aes(Sepal.Length, Petal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->c

ggplot(iris, aes(Petal.Length, Sepal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->d

ggplot(iris, aes(Petal.Length, Petal.Width, col= Species)) +
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = F) +
  scale_color_calc() ->e

ggplot(iris, aes(Sepal.Width, Petal.Width, col= Species)) + 
  geom_point(size= 2, show.legend = F) +
  stat_ellipse(size= 1.3, linetype=1, show.legend = T) +
  scale_color_calc() ->f


library(gridExtra)
theme_set(theme_base())
grid.arrange(a,b,c,d,e,f)

ellipse plot using caret package

data(iris)
caret::featurePlot(x = iris[,1:4], y= iris[,5],
                   plot = "ellipse", lwd= 3,
                   main= "Machine Learning Excercise")

Sample split method 1 (R base)

# data(iris)
# split_positions <- sample(1:nrow(iris), size= .75 * nrow(iris))
# split_positions
# train <- iris[split_positions,]
# test <- iris[-split_positions,]

Sample split method 2 (Package: caret)

# data(iris)
# caret::createDataPartition(1:nrow(iris), p= .75, list= F)
# train <- iris[split_position,]
# test <- iris[-split_position,]

Sample split method 3 (Package: catools)

# data(iris)
# caTools::sample.split(iris$Sepal.Length, SplitRatio = .75)-> split_tag
# train <- iris[split_tag==T,]
# test <- iris[split_tag==F,]
# train <- subset(iris, split_tag==T)
# test <- subset(iris, split_tag==F)

# dim(train);dim(test)

Sample split method 4 (Package: dplyr)

library(dplyr)
data(iris)
iris %>% mutate(id= row_number()) %>% relocate(id) -> iris
iris %>% sample_frac(.75) -> train
anti_join(iris, train, by= "id") -> test
dim(train);dim(test)
## [1] 112   6
## [1] 38  6
pander::pander(head(train))
id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
92 6.1 3 4.6 1.4 versicolor
16 5.7 4.4 1.5 0.4 setosa
37 5.5 3.5 1.3 0.2 setosa
85 5.4 3 4.5 1.5 versicolor
52 6.4 3.2 4.5 1.5 versicolor
133 6.4 2.8 5.6 2.2 virginica

Regards

Please visit my profile

Alok Pratap Singh (Research Scholar)

Linkedin (Open in New TAB)

Department of Psychology

University of Allahabad

Without data you’re just another person with an opinion