Introduction
This article is an attempt to give a basic idea of how beautiful visualizations can be made using R packages base graphics and ggplot2.
and this is my first document in R pubs.
Loading packages needed
We need to download it for the first time before using it
require(ggplot2)
library(ggplot2)Vizualizations available with R Base package
plot
Several type of plots can be obtained using this function
Command
plot(x,y,main="heading",sub="Sub Heading",xlab="X-Axis label",ylab="Y Axis label",type="desired type",asp=n)
where desired type includes
“p” for points,
“l” for lines,
“b” for both,
“c” for the lines part alone of “b”,
“o” for both ‘overplotted’,
“h” for ‘histogram’ like (or ‘high-density’) vertical lines,
“s” for stair steps,
“S” for other steps, see ‘Details’ below,
“n” for no plotting
The following Charts are useful in dentifying relationship between two Quantitative variables ## Scatter Plot (2d)
plot(mtcars$wt,mtcars$mpg)Line Chart
plot(pressure$temperature, pressure$pressure,type="l")Points
plot(pressure$temperature,pressure$pressure,type="p",col="orange")# color based on grouping
plot(x = iris$Sepal.Length, y = iris$Sepal.Width, frame = FALSE,
xlab = "Sepal Length", ylab = "Sepal Width",
pch = 3, col = iris$Species)Both
plot(pressure$temperature,pressure$pressure,type="b",col="red")Separate slashes
plot(pressure$temperature,pressure$pressure,type="c")Continuous lines joining data points with points in it
plot(pressure$temperature,pressure$pressure,type="o")we are adding different layers of plots
plot(mtcars$wt,mtcars$mpg,type="l")
points(mtcars$wt,mtcars$mpg,col="orange")The Following are used to find the distribution of quantitative data
high density lines
plot(pressure$temperature,pressure$pressure,type="h")Stair steps
plot(pressure$temperature,pressure$pressure,type="s")Other steps
plot(pressure$temperature,pressure$pressure,type="S")No plot
This is basic layer of plotting, used to observe scaling and basic layout of graph
plot(pressure$temperature,pressure$pressure,type="n")Scatterplot for large data
smoothScatter(x, y = NULL, nbin = 128, bandwidth,
colramp = colorRampPalette(c("white", blues9)),
nrpoints = 100, ret.selection = FALSE,
pch = ".", cex = 1, col = "black",
transformation = function(x) x^.25,
postPlotHook = box,
xlab = NULL, ylab = NULL, xlim, ylim,
xaxs = par("xaxs"), yaxs = par("yaxs"), ...)
smoothScatter(mtcars$wt,mtcars$mpg,colramp = colorRampPalette(c("White","green")),col = "red")Bar graph
General Syntax
barplot(height, ...)
## method 1
barplot(height, width = 1, space = NULL,
names.arg = NULL, legend.text = NULL, beside = FALSE,
horiz = FALSE, density = NULL, angle = 45,
col = NULL, border = par("fg"),
main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
xlim = NULL, ylim = NULL, xpd = TRUE, log = "",
axes = TRUE, axisnames = TRUE,
cex.axis = par("cex.axis"), cex.names = par("cex.axis"),
inside = TRUE, plot = TRUE, axis.lty = 0, offset = 0,
add = FALSE, ann = !add && par("ann"), args.legend = NULL, ...)
## method 2
barplot(formula, data, subset, na.action,
horiz = FALSE, xlab = NULL, ylab = NULL, ...)
Used to understand Categorical data.
It is of two types
Simple
barplot(table(mtcars$gear),main = "Gear plot",xlab="gears",ylab="no_of_vehicles")barplot(table(mtcars$gear),main = "Gear plot",xlab="gears",ylab="no_of_vehicles",horiz=T)Changing color
barplot(table(mtcars$gear),main = "Gear plot",xlab="gears",ylab="no_of_vehicles",horiz=T,col="gold")barplot(table(mtcars$gear),main = "Gear plot",xlab="gears",ylab="no_of_vehicles",border = "red" ,col="gold")Stacked
barplot(cbind(Employed, Unemployed) ~ Year, data = longley,col=c("red","blue"))Pie Diagram
This is also used to represent Categorical data and effective in seeing which type is contributing more or which is less
Syntax
pie(x, labels = names(x), edges = 200, radius = .8,
density = NULL, angle = 45, col = NULL, border = NULL,
lty = NULL, main = NULL, ...)
pie(table(iris$Species) ,
density = NULL ,col=c("darkgreen","darkred","darkviolet"),border='yellow')Histogram
It is used to find the distribution of quantitative data
Syntax
hist(x, ...)
hist(mtcars$mpg)we can specify number of bins using the following command
hist(mtcars$mpg,breaks = 15)#grouping by a categorical variable
hist(mtcars$mpg,breaks = 15,col=mtcars$cyl)Density
d=density(mtcars$mpg)
plot(d,col="gold")Boxplot
boxplot(iris$Sepal.Length)boxplot(iris$Sepal.Length,col="blue")Categorical vs Numerical data
can be done in 2 ways
if they come from different data
plot(iris$Species,iris$Sepal.Length)if they’re from same data
boxplot(len ~ supp, data = ToothGrowth)we can also take interaction into consideration
boxplot(len ~ supp + dose, data = ToothGrowth)Plotting a function curve
curve(x^10 - 786*x, from=-4, to=4)Plotting multiple plots in single view
par(mfrow=c(1,2))
hist(mtcars$mpg,breaks = 15,col=mtcars$cyl)
hist(mtcars$mpg,breaks = 15,col="gold")ggplot2
Some simple plots using ggplot2 package ## General Syntax
ggplot(data=data name,aes(x=var 1 ,y=var 2,colour(or)col =variable by which we wish to categorise results,shape="qualitative variable with which we decide shape of points plotted"))+geom_typeofplot(size=n,shape=m,col="colour of points",fill="colour within structure of plot")
Scatter plot using ggplot2 Package
qplot(mtcars$wt,mtcars$mpg)We can get Weighted Scatter plots as follows
ggplot(mtcars,aes(x=wt,y=mpg,size= carb))+geom_point(shape=21,col="red",fill="yellow")Line graph Using ggplot2
Following are Two types of getting line plot with colours based on particular categories
# 1
ggplot(iris,aes(Sepal.Length,Sepal.Width,colour=Species))+geom_line()# 2
qplot(temperature,pressure,data=pressure, geom="line",col=temperature)We can adjust line width as follows
p<-ggplot(iris,aes(Sepal.Length,Sepal.Width,col=Species))+geom_line(size=1.05)
pFor adjusting labels we can use following command
p1<-p+labs(title="Relationship between Sepal length and width")+labs(x="len",y="wid")
p1For changing Colour palette
p2 <- p1 + scale_colour_manual(values = c("green","yellow","violet"))+theme_bw()
p2p3=p2+theme(legend.position = "left",
legend.direction = "vertical",
legend.title = element_blank())
p3Bar graph
qplot(mpg$drv,geom = "bar")ggplot(iris,aes(Sepal.Width,Sepal.Length,fill=Species))+geom_col()ggplot(iris,aes(Sepal.Length,Sepal.Width,fill=Species))+geom_col()Histogram
Used to find distribution of a variable
ggplot(airquality, aes(x = Ozone)) + geom_histogram()ggplot(airquality, aes(x = Ozone)) + geom_histogram(aes(y=..density..))+
stat_function(fun = dnorm, colour = "red",
args = list(mean = mean(airquality$Ozone, na.rm = TRUE),
sd = sd(airquality$Ozone, na.rm = TRUE)))ggplot(airquality, aes(x = Ozone)) +
geom_histogram(aes(fill = ..count..), binwidth = 5)+
scale_x_continuous(name = "Mean ozone in\nparts per billion",
breaks = seq(0, 175, 25), limits = c(0, 175)) +
scale_y_continuous(name = "Count") +
labs(title = "Frequency histogram of mean ozone",
subtitle = "Source: New York State Department of Conservation")Density plot
ggplot(airquality,aes(x=Ozone))+geom_density()+xlab("mean o2 parts")ggplot(airquality, aes(x = Ozone)) +
geom_density(fill = "gold1", colour ="goldenrod2") +
scale_x_continuous(name = "Mean ozone in\nparts per billion",
breaks = seq(0, 200, 25), limits = c(0, 200)) +
scale_y_continuous(name = "Density") +
labs(title = "Frequency histogram of mean ozone",
subtitle = "Source: New York State Department of Conservation")Boxplot
ggplot(iris,aes(Species,Sepal.Length))+geom_boxplot(fill="red",colour="goldenrod")ggplot(mtcars,aes(x=factor(gear),y=mpg,fill=factor(cyl)))+geom_boxplot()+theme_bw() Pie Diagram
ggplot(mtcars,
aes(x = factor(""), fill = cyl) ) +
geom_bar() +
coord_polar(theta = "y")These are just minimum options and with R’s help function one can explore more options for plotting