1. Different ways of representing data

1.1 Using geom_histogram to plot one numeric variable

library(caret)
## Warning: package 'caret' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
library(ggplot2)

# trying various ways of plotting data. Note: geom_ = stat_ 

ggplot(data=iris,aes(x=iris$Sepal.Length)) + geom_histogram(color=3,fill=6) + theme_bw() + ggtitle("Histogram Plot") + ylab("Count") + xlab("Sepal Length") + theme(plot.title = element_text(hjust=0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.2 Using geom_bar

ggplot(data=iris,aes(x=iris$Sepal.Length)) + geom_bar(color=3, fill=4) + theme_bw() + xlab("Sepal Length") + ylab("Frequency") + ggtitle("Bar Plot") + theme(plot.title = element_text(hjust=0.5))

1.3 Using stat_ function

# stat_bin is similar to geom_histogram()
ggplot(data=iris,aes(x=iris$Sepal.Length)) + stat_bin(color=3,fill=2) + theme_bw() + ylab("Count") + xlab("Sepal Length") + ggtitle("Histogram Plot") + theme(plot.title = element_text(hjust=0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.4 Using some other geom functions

# Using iris dataset from `caret` package

ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + ggtitle("Scatter Plot") + geom_smooth(method="lm",se=F,color=4) + geom_smooth(aes(color=Species),method="lm",se=F) +theme_bw()+ theme(plot.title = element_text(hjust=0.5)) 

1.5 Using stat_summary to plot error bar graph

ggplot(data=iris,aes(x=Species,y=iris$Sepal.Length)) + stat_summary(fun.y = mean,geom="point") + stat_summary(fun.data = mean_sdl,fun.args = list(mult=1), geom="errorbar",width=0.2) + xlab("Species") + ylab("Sepal Length") + ggtitle("Error Bar Plot") + theme(plot.title = element_text(hjust=0.5))

Another way to plot the above graph, but not recommended

ggplot(data=iris,aes(x=Species,y=iris$Sepal.Length,fill=Species)) + stat_summary(fun.y=mean,geom="bar") + stat_summary(fun.data = mean_sdl, geom="errorbar",fun.args = list(mult=1),width=0.2,color=2) + theme_bw() + ylab("Sepal Length") + ggtitle("Error Bar Plot") + theme(plot.title = element_text(hjust=0.5))

1.6 Check normal distribution of numeric data variables

library(MASS)
## Warning: package 'MASS' was built under R version 3.2.5
head(mammals)
##                    body brain
## Arctic fox        3.385  44.5
## Owl monkey        0.480  15.5
## Mountain beaver   1.350   8.1
## Cow             465.000 423.0
## Grey wolf        36.330 119.5
## Goat             27.660 115.0
animal.new<-data.frame(body=log10(mammals$body))

ggplot(data=animal.new,aes(x=body)) + geom_histogram(aes(y=..density..)) + geom_rug() + stat_function(fun=dnorm,color=4,args = list(mean(animal.new$body),sd(animal.new$body)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Using qqplot to check normal distribution

animal.new$slope<-diff(quantile(animal.new$body,c(0.25,0.75)))/diff(qnorm(c(0.25,0.75)))


animal.new$int<-quantile(animal.new$body,0.25)-animal.new$slope*qnorm(0.25)

ggplot(data=animal.new,aes(sample=body)) + stat_qq() + geom_abline(aes(slope = slope,intercept = int),col="blue")

1.7 Zooming in certain range using functions below

ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + ggtitle("Scatter Plot") + theme_bw()+ theme(plot.title = element_text(hjust=0.5)) + scale_x_continuous(limits = c(4,8))

# We can use `coord_cartesian(xlim=c(4,8))

1.8 Boxplot

ggplot(data = iris,aes(x=Species,y=iris$Sepal.Length,color=Species)) + geom_boxplot() + geom_jitter(alpha=0.7,aes(color=Species)) + ylab("Sepal Length") + ggtitle("Boxplot")

1.9 Using facets

ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + facet_grid(.~Species)

# .~ Species means that row~column

2.0 Themes layer

theme() layer has folowing argument theme(text=, title=,plot.title=,legend.text=,legend.title=,axis.title=,axis.title.x=,axis.text=,axis.title.y=,axis.text.x=,axis.text.y=,legend.position="bottom")

ggplot(data=iris,aes(x=iris$Sepal.Length,y=iris$Sepal.Width)) + geom_point(aes(color=Species)) + ylab("Sepal Width") + xlab("Sepal Length") + facet_grid(.~Species) + theme(strip.background = element_blank()) +theme_classic()

2.1 Using heat map plot

# Create color palette
library(RColorBrewer)

myColors <- brewer.pal(9, "Reds")

# Build the heat map from scratch
ggplot(barley, aes(x = year, y = variety, fill = yield)) +
  geom_tile() +
  facet_wrap( ~ site, ncol = 1) +
  scale_fill_gradientn(colors = myColors)