Load the ggplot2 library. If not installed install it using - install.packages(“ggplot2”)
library(ggplot2)# Ggplot2 library
library(ggplot2)
# Use the mtcars dataset.
head(mtcars)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Basic barplot:
ggplot(mtcars, aes(x=as.factor(cyl) )) + geom_bar() + xlab("Number of Cylinders") + ylab("Count")# 1: uniform color. Color is for the border, fill is for the inside
ggplot(mtcars, aes(x=as.factor(cyl) )) +
geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7) )# 2: Using Hue
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) + geom_bar( ) +
scale_fill_hue(c = 40)# 3: Using RColorBrewer
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) + geom_bar( ) +
scale_fill_brewer(palette = "Set1")# 4: Using greyscale:
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) + geom_bar( ) +
scale_fill_grey(start = 0.25, end = 0.75)# 5: Set manualy
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) + geom_bar( ) +
scale_fill_manual(values = c("red", "green", "blue") )# 6: horizontal barplot
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
geom_bar() +
coord_flip()# 7: Custom bar width
ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
geom_bar(width=0.4) # library
library(ggplot2)
# create a dataset
specie=c(rep("apple" , 3) , rep("mango" , 3) , rep("banana" , 3) , rep("peach" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)
head(data)## specie condition value
## 1 apple normal 21.217761
## 2 apple stress 18.149166
## 3 apple Nitrogen 26.155190
## 4 mango normal 15.827057
## 5 mango stress 4.615567
## 6 mango Nitrogen 4.948118
# Grouped
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="dodge", stat="identity")# Stacked
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity")# Stacked Percent
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill")# color with RcolorBrewer
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill") +
scale_fill_brewer(palette = "Set1")# Faceting
ggplot(data, aes(y=value, x=specie, color=specie, fill=specie)) +
geom_bar( stat="identity") +
facet_wrap(~condition)# library
library(ggplot2)
# dataset:
data=data.frame(value=rnorm(10000))
head(data)## value
## 1 -1.9866024
## 2 0.5871844
## 3 -0.2282852
## 4 0.3897893
## 5 0.7204838
## 6 0.3417003
# Basic histogram
ggplot(data, aes(x=value)) + geom_histogram()# Custom Binning. I can just give the size of the bin
ggplot(data, aes(x=value)) + geom_histogram(binwidth = 0.05)# Uniform color
ggplot(data, aes(x=value)) +
geom_histogram(binwidth = 0.2, color="white", fill=rgb(0.2,0.7,0.1,0.4) ) # Proportional color
ggplot(data, aes(x=value)) +
geom_histogram(binwidth = 0.2, aes(fill = ..count..) )# library
library(ggplot2)
# The iris dataset is proposed by R
head(iris)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# basic scatterplot
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width)) +
geom_point()# use options!
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width)) +
geom_point(
color="black",
fill="blue",
shape=21,
alpha=0.5,
size=2,
stroke = 2
)# Color and shape depend on factor (categorical variable)
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species,shape=Species)) +
geom_point(size=6, alpha=0.6)# Color and shape depend on factor (categorical variable)
ggplot(iris, aes(x=Sepal.Length, y=Sepal.Width, color=Petal.Length, size=Petal.Length)) +
geom_point(alpha=0.6)data=data.frame(cond = rep(c("condition_1", "condition_2"), each=10), my_x = 1:100 + rnorm(100,sd=9), my_y = 1:100 + rnorm(100,sd=16) )
head(data)## cond my_x my_y
## 1 condition_1 -7.5574224 -2.438493
## 2 condition_1 -0.6726812 -3.488034
## 3 condition_1 6.6869147 -14.094291
## 4 condition_1 7.4804401 16.536489
## 5 condition_1 -4.1656649 -1.750653
## 6 condition_1 15.3304614 -14.875692
# Add a linear trend :
ggplot(data, aes(x=my_x, y=my_y)) + geom_point(shape=1) + geom_smooth(method=lm , color="red", se=FALSE) # Add linear regression line # Add a linear trend :
ggplot(data, aes(x=my_x, y=my_y)) + geom_point(shape=1) + geom_smooth(method=lm , color="red", se=TRUE) # Add linear regression line with confidence interval# library
library(ggplot2)
# The mtcars dataset is proposed in R
data=head(mtcars, 30)
head(data)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# add text with geom_text, use nudge to nudge the text
ggplot(data, aes(x=wt, y=mpg)) +
geom_point() +
geom_text(label=rownames(data), nudge_x = 0.25, nudge_y = 0.25, check_overlap = T)# to improve readability, use geom_label
ggplot(data, aes(x=wt, y=mpg)) +
geom_point() +
geom_label(label=rownames(data), nudge_x = 0.25, nudge_y = 0.2)# custom geom_label like any other geom.
ggplot(data, aes(x=wt, y=mpg, fill=cyl)) +
geom_label(label=rownames(data), color="white", size=5)# library
library(ggplot2)
# The mtcars dataset is proposed in R
head(mtcars)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# A really basic boxplot.
ggplot(mtcars, aes(x=as.factor(cyl), y=mpg)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("cyl")# The mtcars dataset is proposed in R
head(mpg)## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p
## # ... with 1 more variables: class <chr>
# Set a different color for each group
ggplot(mpg, aes(x=class, y=hwy, fill=class)) +
geom_boxplot(alpha=0.3) +
theme(legend.position="none")# create a data frame
variety=rep(LETTERS[1:7], each=40)
treatment=rep(c("high","low"),each=20)
note=seq(1:280)+sample(1:150, 280, replace=T)
data=data.frame(variety, treatment , note)
head(data)## variety treatment note
## 1 A high 98
## 2 A high 90
## 3 A high 100
## 4 A high 148
## 5 A high 98
## 6 A high 70
# grouped boxplot
ggplot(data, aes(x=variety, y=note, fill=treatment)) +
geom_boxplot()# One box per treatment
ggplot(data, aes(x=variety, y=note, fill=treatment)) +
geom_boxplot() +
facet_wrap(~treatment)# one box per variety
ggplot(data, aes(x=variety, y=note, fill=treatment)) +
geom_boxplot() +
facet_wrap(~variety, scale="free")library(ggplot2)
# create data
names=c(rep("A", 20) , rep("B", 8) , rep("C", 30), rep("D", 80))
value=c( sample(2:5, 20 , replace=T) , sample(4:10, 8 , replace=T), sample(1:7, 30 , replace=T), sample(3:8, 80 , replace=T) )
data=data.frame(names,value)
head(data)## names value
## 1 A 2
## 2 A 4
## 3 A 2
## 4 A 2
## 5 A 5
## 6 A 3
# plot
ggplot(data, aes(x=names, y=value, fill=names)) +
geom_boxplot(alpha=0.4) +
stat_summary(fun.y=mean, geom="point", shape=20, size=10, color="red", fill="red") +
theme(legend.position="none") +
scale_fill_brewer(palette="Set3")# ggplot2 library
library(ggplot2)
# Let's use the diamonds dataset
data(diamonds)
head(diamonds)## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
# plot 1: Density of price for each type of cut of the diamond:
ggplot(data=diamonds,aes(x=price, group=cut, fill=cut)) +
geom_density(adjust=1.5)# plot 2: Density plot with transparency (using the alpha argument):
ggplot(data=diamonds,aes(x=price, group=cut, fill=cut)) +
geom_density(adjust=1.5 , alpha=0.2)#install.packages("GGally")
library("GGally")
# Prepare some data
df <- mtcars[, c(1,3,4,5,6,7)]
head (df)## mpg disp hp drat wt qsec
## Mazda RX4 21.0 160 110 3.90 2.620 16.46
## Mazda RX4 Wag 21.0 160 110 3.90 2.875 17.02
## Datsun 710 22.8 108 93 3.85 2.320 18.61
## Hornet 4 Drive 21.4 258 110 3.08 3.215 19.44
## Hornet Sportabout 18.7 360 175 3.15 3.440 17.02
## Valiant 18.1 225 105 2.76 3.460 20.22
# Correlation plot 1
ggcorr(df, palette = "RdBu", label = TRUE)# Correlation plot 2
ggpairs(df)These are some of the plots which you will need for exploratory data analysis. If you wish to learn more I am including few resources. Please check it out
Advanced Plots Documentation GGplot2 cheatsheet GGPlot2 book