Part I

The Grammar of Graphics - ggplot2

Elements Description
Data The dataset being plotted
Aesthetics Scale onto which we map our data
Geometries Visual elements used for our data
Themes All non-data ink
Statistics Representation of our data to aid understanding
Coordinates The space on which the data will be plotted

Sample dataset of MPG

Dataset of automobile - MPG
manufacturer model displ year cyl trans drv cty hwy fl class
audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
audi a4 2.0 2008 4 auto(av) f 21 30 p compact
audi a4 2.8 1999 6 auto(l5) f 16 26 p compact

How to create scatterplot

# Simple scatterplot
ggplot(mpg, aes(x=displ, y=hwy))+geom_point()

# How to create scatterplot and creating colors, fill

ggplot(mpg, aes(x=displ, y=hwy, color=class))+geom_point()+labs(title="Displ Vs Hwy")

# To avoid overplotting and to show all data, we can use "jitter" in ggplot

ggplot(mpg, aes(x=displ, y=hwy, color=class))+geom_jitter()+labs(title="Displ Vs Hwy - Jitterred")

Position adjustment

# Basic barchart
ggplot(mpg, aes(x=cyl))+geom_bar()+labs(title="Number of Cyl")

# How to put colors inside each stack based on categories
t1 <- count(x = mpg, class, cyl) %>% rename(freq_2 = n)

ggplot(t1, mapping = aes(x = cyl, y = freq_2, fill = class)) +
geom_bar(stat = "identity") +
ggtitle(label = "A stacked bar chart")+ coord_flip()

# How to create dodged barchart with position="dodge"
ggplot(t1, mapping = aes(x = cyl, y = freq_2, fill = class)) +
geom_bar(stat = "identity", position="dodge") +
ggtitle(label = "A stacked bar chart")

How to add customized colors in scatterplot

ggplot(mpg, aes(x=displ, hwy, color=class))+geom_point()+
  guides(color=guide_legend(override.aes = list(size = 4)))+ # Increasing the size of points
  scale_color_brewer(palette="Dark2") + # Adding color 
  labs(title= "Hwy vs Displ")

Creating boxplot

mycol <- if_else(mpg$hwy > 25, rgb(0.1,0.1,0.7,0.5), rgb(0.8,0.1,0.3,0.6))

ggplot(mpg, aes(x=class, y=hwy, color=mycol))+
  geom_boxplot()

Usage of facet wraps for categorical variables

ggplot(mpg, aes(x=displ,y=hwy))+geom_point()+geom_line()+facet_wrap(~class,nrow=3) #nrow sets how many plots we need in each row

Create scatterplot through filtering “subcompact”

ggplot(mpg, aes(x=displ, hwy, color=manufacturer))+geom_point(data = filter(mpg, class == "subcompact"))

Create Scatter plot - Cyl as a categorical variable

ggplot(mpg, aes(x=factor(cyl), y=hwy, color=class))+geom_point() # Cyl is a categorical variable here

Part II - Datacamp

Creating basic scatterplot

ggplot(mtcars, aes(wt, mpg, color=disp))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mtcars, aes(wt, mpg, fill=factor(cyl), color=factor(am)))+geom_point() # Two categorical variables

# Plot text inside scatterplot

ggplot(mtcars, aes(wt, mpg))+geom_text(aes(label= factor(cyl)))

Create scatter plot in Diamonds dataset - Include Themes

ggplot(diamonds, aes(carat, price, color=clarity))+geom_point(alpha = 0.4)+geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(mtcars, aes(mpg, factor(cyl)))+geom_point(shape=1, size=4) +coord_flip()# Cyl is categorical and hence I put factor before (cyl)

ggplot(mtcars, aes(wt, mpg, color= factor(cyl)))+
  geom_text(label= row.names(mtcars), color= 'red')

Position function in ggplot2

positions <- c("identity", "Dodge", "Stack", "fill", "jitter", "jitterdodge", "nudge")
number <- c(seq(1:7))

df2 <- data.frame(positions, number) #%>% select(number, everything())
df2 %>% select(number, everything()) %>% head(n=7) %>% kable() %>% kable_styling()
number positions
1 identity
2 Dodge
3 Stack
4 fill
5 jitter
6 jitterdodge
7 nudge
# Create barchart with categorical variable
mtcars$fam <- factor(mtcars$am)

# Converting values into labels for "fam"
mtcars$fam <- factor(mtcars$am, labels= c("Automatic", "Manual"))

# Add scale function
colorr <- c(Automatic = "#377EB8", Manual = "#E41A1C")
ggplot(mtcars, aes(factor(cyl), fill=fam))+geom_bar()+ labs(x="number of cylinders", y="Count")+scale_fill_manual("Transmissions", values = colorr)

ggplot(mtcars, aes(factor(cyl), fill=fam))+geom_bar(position="dodge")+ labs(x="number of cylinders", y="Count")+scale_fill_manual("Transmissions", values = colorr)

# If you want to see the distribution of one column only and zooming in specific range of date by using xlim
ggplot(mtcars, aes(mpg, 0))+geom_point(position="jitter", color= "purple")+xlim(c(15,35))
## Warning: Removed 5 rows containing missing values (geom_point).

How to ovefcome with overpositioning

ggplot(diamonds, aes(carat, price))+geom_point() # Basic scatterplot

# We can use either of them to handle overplotting
ggplot(diamonds, aes(carat, price, color=color))+geom_point(position= position_jitter(width=0.5)) # or

ggplot(diamonds, aes(carat, price, color=color))+geom_jitter(alpha=0.2) # Alpha defines the transparency based on overplotted values

Barplot - for continuous x axis

ggplot(iris, aes(Sepal.Width, ..density..))+geom_histogram(color="red", bins=30, fill= "#51A8C9") # ..density.. shows frequency

ggplot(iris, aes(Sepal.Width, ..density..))+geom_histogram(color="red", bins=30, binwidth = 1)

# Positions on barplot
ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1)

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="dodge")

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="fill")
## Warning: Removed 16 rows containing missing values (geom_bar).

ggplot(mtcars, aes(mpg, fill=factor(am)))+geom_histogram(binwidth = 1, position="identity", alpha=0.4)

Barplot - for categorical x axix using “geom_bar and geom_count”

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar()

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position=position_dodge(width=0.2),alpha=0.6)

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position="fill")+scale_fill_brewer()

Line plot using ggplot2 - Useful for time series data

#ggplot(fish.tidy, aes(Year, Capture, color = Species)) +  geom_line()

ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")+theme_minimal()

Themes

ggplot(mtcars, aes(cyl, fill=factor(am)))+geom_bar(position="fill")+scale_fill_brewer() + 
  theme(legend.position = "none") # legend.position removes the legend from plot

ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")+theme(axis.line = element_line(color="red", linetype = "dashed"))

# Manual theme
g <- ggplot(BOD, aes(Time, demand))+geom_line(color="Purple")
manual_theme <- theme(
  text= element_text(family="serif", size= 14),
  rect= element_blank(),
  panel.grid = element_blank(),
  title= element_text(color= "#8b0000"),
  axis.line= element_line(color="black")
)

g + manual_theme

# ggplot2's built-in themes

mtcars <- ggplot(mtcars, aes(mpg, wt, color=disp))+geom_jitter()
mtcars + theme_classic()

# ggthemes

library(ggthemes)
mtcars + theme_fivethirtyeight()

mtcars + theme_tufte()

mtcars + geom_segment(aes(xend=25, yend=wt), size=2)+geom_text(aes(label=disp), color="white", size=1.5)

theme2 <- theme_classic() +
  theme(axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text = element_text(color = "black"),
        axis.title = element_blank(),
        legend.position = "none")
mtcars + theme2