ggplot2 package **1.1 Using geom_histogram to plot one numeric variable
library(caret)
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
library(ggplot2)
# trying various ways of plotting data. Note: geom_ = stat_
ggplot(data=iris,aes(x=iris$Sepal.Length)) + geom_histogram(color=3,fill=6) + theme_bw() + ggtitle("Histogram Plot") + ylab("Count") + xlab("Sepal Length") + theme(plot.title = element_text(hjust=0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
1.2 Using geom_bar
ggplot(data=iris,aes(x=iris$Sepal.Length)) + geom_bar(color=3, fill=4) + theme_bw() + xlab("Sepal Length") + ylab("Frequency") + ggtitle("Bar Plot") + theme(plot.title = element_text(hjust=0.5))
1.3 Using stat_ function
# stat_bin is similar to geom_histogram()
ggplot(data=iris,aes(x=iris$Sepal.Length)) + stat_bin(color=3,fill=2) + theme_bw() + ylab("Count") + xlab("Sepal Length") + ggtitle("Histogram Plot") + theme(plot.title = element_text(hjust=0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
1.4 Using some other geom functions
# Using iris dataset from `caret` package
ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + ggtitle("Scatter Plot") + geom_smooth(method="lm",se=F,color=4) + geom_smooth(aes(color=Species),method="lm",se=F) +theme_bw()+ theme(plot.title = element_text(hjust=0.5))
1.5 Using stat_summary to plot error bar graph
ggplot(data=iris,aes(x=Species,y=iris$Sepal.Length)) + stat_summary(fun.y = mean,geom="point") + stat_summary(fun.data = mean_sdl,fun.args = list(mult=1), geom="errorbar",width=0.2) + xlab("Species") + ylab("Sepal Length") + ggtitle("Error Bar Plot") + theme(plot.title = element_text(hjust=0.5))
Another way to plot the above graph, but not recommended
ggplot(data=iris,aes(x=Species,y=iris$Sepal.Length,fill=Species)) + stat_summary(fun.y=mean,geom="bar") + stat_summary(fun.data = mean_sdl, geom="errorbar",fun.args = list(mult=1),width=0.2,color=2) + theme_bw() + ylab("Sepal Length") + ggtitle("Error Bar Plot") + theme(plot.title = element_text(hjust=0.5))
1.6 Check normal distribution of numeric data variables
library(MASS)
## Warning: package 'MASS' was built under R version 3.2.5
head(mammals)
## body brain
## Arctic fox 3.385 44.5
## Owl monkey 0.480 15.5
## Mountain beaver 1.350 8.1
## Cow 465.000 423.0
## Grey wolf 36.330 119.5
## Goat 27.660 115.0
animal.new<-data.frame(body=log10(mammals$body))
ggplot(data=animal.new,aes(x=body)) + geom_histogram(aes(y=..density..)) + geom_rug() + stat_function(fun=dnorm,color=4,args = list(mean(animal.new$body),sd(animal.new$body)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Using qqplot to check normal distribution
animal.new$slope<-diff(quantile(animal.new$body,c(0.25,0.75)))/diff(qnorm(c(0.25,0.75)))
animal.new$int<-quantile(animal.new$body,0.25)-animal.new$slope*qnorm(0.25)
ggplot(data=animal.new,aes(sample=body)) + stat_qq() + geom_abline(aes(slope = slope,intercept = int),col="blue")
1.7 Zooming in certain range using functions below
ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + ggtitle("Scatter Plot") + theme_bw()+ theme(plot.title = element_text(hjust=0.5)) + scale_x_continuous(limits = c(4,8))
# We can use `coord_cartesian(xlim=c(4,8))
1.8 Boxplot
ggplot(data = iris,aes(x=Species,y=iris$Sepal.Length,color=Species)) + geom_boxplot() + geom_jitter(alpha=0.7,aes(color=Species)) + ylab("Sepal Length") + ggtitle("Boxplot")
1.9 Using facets
ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + facet_grid(.~Species)
# .~ Species means that row~column
2.0 Themes layer
theme() layer has folowing argument theme(text=, title=,plot.title=,legend.text=,legend.title=,axis.title=,axis.title.x=,axis.text=,axis.title.y=,axis.text.x=,axis.text.y=,legend.position="bottom")
ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + facet_grid(.~Species) + theme(strip.background = element_blank()) +theme_classic()
2.1 Using heat map plot
# Create color palette
library(RColorBrewer)
myColors <- brewer.pal(9, "Reds")
# Build the heat map from scratch
ggplot(barley, aes(x = year, y = variety, fill = yield)) +
geom_tile() +
facet_wrap( ~ site, ncol = 1) +
scale_fill_gradientn(colors = myColors)