Part I
The Grammar of Graphics - ggplot2
Elements
|
Description
|
Data
|
The dataset being plotted
|
Aesthetics
|
Scale onto which we map our data
|
Geometries
|
Visual elements used for our data
|
Themes
|
All non-data ink
|
Statistics
|
Representation of our data to aid understanding
|
Coordinates
|
The space on which the data will be plotted
|
Sample dataset of MPG
Dataset of automobile - MPG
manufacturer
|
model
|
displ
|
year
|
cyl
|
trans
|
drv
|
cty
|
hwy
|
fl
|
class
|
audi
|
a4
|
1.8
|
1999
|
4
|
auto(l5)
|
f
|
18
|
29
|
p
|
compact
|
audi
|
a4
|
1.8
|
1999
|
4
|
manual(m5)
|
f
|
21
|
29
|
p
|
compact
|
audi
|
a4
|
2.0
|
2008
|
4
|
manual(m6)
|
f
|
20
|
31
|
p
|
compact
|
audi
|
a4
|
2.0
|
2008
|
4
|
auto(av)
|
f
|
21
|
30
|
p
|
compact
|
audi
|
a4
|
2.8
|
1999
|
6
|
auto(l5)
|
f
|
16
|
26
|
p
|
compact
|
How to create scatterplot
# Simple scatterplot
ggplot(mpg, aes(x=displ, y=hwy))+geom_point()

# How to create scatterplot and creating colors, fill
ggplot(mpg, aes(x=displ, y=hwy, color=class))+geom_point()+labs(title="Displ Vs Hwy")

# To avoid overplotting and to show all data, we can use "jitter" in ggplot
ggplot(mpg, aes(x=displ, y=hwy, color=class))+geom_jitter()+labs(title="Displ Vs Hwy - Jitterred")

Position adjustment
# Basic barchart
ggplot(mpg, aes(x=cyl))+geom_bar()+labs(title="Number of Cyl")

# How to put colors inside each stack based on categories
t1 <- count(x = mpg, class, cyl) %>% rename(freq_2 = n)
ggplot(t1, mapping = aes(x = cyl, y = freq_2, fill = class)) +
geom_bar(stat = "identity") +
ggtitle(label = "A stacked bar chart")+ coord_flip()

# How to create dodged barchart with position="dodge"
ggplot(t1, mapping = aes(x = cyl, y = freq_2, fill = class)) +
geom_bar(stat = "identity", position="dodge") +
ggtitle(label = "A stacked bar chart")

How to add customized colors in scatterplot
ggplot(mpg, aes(x=displ, hwy, color=class))+geom_point()+
guides(color=guide_legend(override.aes = list(size = 4)))+ # Increasing the size of points
scale_color_brewer(palette="Dark2") + # Adding color
labs(title= "Hwy vs Displ")

Creating boxplot
mycol <- if_else(mpg$hwy > 25, rgb(0.1,0.1,0.7,0.5), rgb(0.8,0.1,0.3,0.6))
ggplot(mpg, aes(x=class, y=hwy, color=mycol))+
geom_boxplot()

Usage of facet wraps for categorical variables
ggplot(mpg, aes(x=displ,y=hwy))+geom_point()+geom_line()+facet_wrap(~class,nrow=3) #nrow sets how many plots we need in each row

Create scatterplot through filtering “subcompact”
ggplot(mpg, aes(x=displ, hwy, color=manufacturer))+geom_point(data = filter(mpg, class == "subcompact"))

Create Scatter plot - Cyl as a categorical variable
ggplot(mpg, aes(x=factor(cyl), y=hwy, color=class))+geom_point() # Cyl is a categorical variable here

Part II - Datacamp
Creating basic scatterplot
ggplot(mtcars, aes(wt, mpg, color=disp))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mtcars, aes(wt, mpg, fill=factor(cyl), color=factor(am)))+geom_point() # Two categorical variables

# Plot text inside scatterplot
ggplot(mtcars, aes(wt, mpg))+geom_text(aes(label= factor(cyl)))

Create scatter plot in Diamonds dataset - Include Themes
ggplot(diamonds, aes(carat, price, color=clarity))+geom_point(alpha = 0.4)+geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(mtcars, aes(mpg, factor(cyl)))+geom_point(shape=1, size=4) +coord_flip()# Cyl is categorical and hence I put factor before (cyl)

ggplot(mtcars, aes(wt, mpg, color= factor(cyl)))+
geom_text(label= row.names(mtcars), color= 'red')

Position function in ggplot2
positions <- c("identity", "Dodge", "Stack", "fill", "jitter", "jitterdodge", "nudge")
number <- c(seq(1:7))
df2 <- data.frame(positions, number) #%>% select(number, everything())
df2 %>% select(number, everything()) %>% head(n=7) %>% kable() %>% kable_styling()
number
|
positions
|
1
|
identity
|
2
|
Dodge
|
3
|
Stack
|
4
|
fill
|
5
|
jitter
|
6
|
jitterdodge
|
7
|
nudge
|
# Create barchart with categorical variable
mtcars$fam <- factor(mtcars$am)
# Converting values into labels for "fam"
mtcars$fam <- factor(mtcars$am, labels= c("Automatic", "Manual"))
# Add scale function
colorr <- c(Automatic = "#377EB8", Manual = "#E41A1C")
ggplot(mtcars, aes(factor(cyl), fill=fam))+geom_bar()+ labs(x="number of cylinders", y="Count")+scale_fill_manual("Transmissions", values = colorr)

ggplot(mtcars, aes(factor(cyl), fill=fam))+geom_bar(position="dodge")+ labs(x="number of cylinders", y="Count")+scale_fill_manual("Transmissions", values = colorr)

# If you want to see the distribution of one column only and zooming in specific range of date by using xlim
ggplot(mtcars, aes(mpg, 0))+geom_point(position="jitter", color= "purple")+xlim(c(15,35))
## Warning: Removed 5 rows containing missing values (geom_point).

How to ovefcome with overpositioning
ggplot(diamonds, aes(carat, price))+geom_point() # Basic scatterplot

# We can use either of them to handle overplotting
ggplot(diamonds, aes(carat, price, color=color))+geom_point(position= position_jitter(width=0.5)) # or

ggplot(diamonds, aes(carat, price, color=color))+geom_jitter(alpha=0.2) # Alpha defines the transparency based on overplotted values

Barplot - for continuous x axis
ggplot(iris, aes(Sepal.Width, ..density..))+geom_histogram(color="red", bins=30, fill= "#51A8C9") # ..density.. shows frequency

ggplot(iris, aes(Sepal.Width, ..density..))+geom_histogram(color="red", bins=30, binwidth = 1)

# Positions on barplot
ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1)

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="dodge")

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="fill")
## Warning: Removed 16 rows containing missing values (geom_bar).

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="identity", alpha=0.4)

Barplot - for categorical x axix using “geom_bar and geom_count”
ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar()

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position=position_dodge(width=0.2),alpha=0.6)

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position="fill")+scale_fill_brewer()

Line plot using ggplot2 - Useful for time series data
#ggplot(fish.tidy, aes(Year, Capture, color = Species)) + geom_line()
ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")+theme_minimal()
Themes
ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position="fill")+scale_fill_brewer() +
theme(legend.position = "none") # legend.position removes the legend from plot

ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")+theme(axis.line = element_line(color="red", linetype = "dashed"))

# Manual theme
g <- ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")
manual_theme <- theme(
text= element_text(family="serif", size= 14),
rect= element_blank(),
panel.grid = element_blank(),
title= element_text(color= "#8b0000"),
axis.line= element_line(color="black")
)
g + manual_theme

# ggplot2's built-in themes
mtcars <- ggplot(mtcars, aes(mpg, wt, color=disp))+geom_jitter()
mtcars + theme_classic()

# ggthemes
library(ggthemes)
mtcars + theme_fivethirtyeight()

mtcars + theme_tufte()

mtcars + geom_segment(aes(xend=25, yend=wt), size=2)+geom_text(aes(label=disp), color="white", size=1.5)

theme2 <- theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_blank(),
legend.position = "none")
mtcars + theme2
