ggplot2 package is based on Grammer of Graphics. It has two main functions for Data Visualization qplot and ggplot. Funtionalities of qplot are subset of functionalities of ggplot. It good to learn qplot but sooner or later everyone moves completely to ggplot.
Data : The data (dataframe) that is being visualizedAesthetic Mappings : Mappings between variables in the data and components of the chart and how data are mapped to color, sizeGeometric Objects (geom) : The geometric objects that are used to display the data. For example, scatter plots use geom_point, bar plots use geom_bar, and line plots use geom_ablineAbove 3 are mandatory for a basic visualization. Further components are as below :
Scales : Scales control how variables are mapped to aesthetics. What scale an aesthetic map uses (e.g. male=red, female=green)Coordinates : Coordinates describe how data is mapped to the plot e.g., use simple Cartesian coordinates with coord_cartesian, polar coordinates with coord_polar, or geographic projections with coord_map.Statistical Transformations (stat) : Statistical transformations applied to the data to summarize the data e.g., boxplots use stat_boxplot, lines use stat_abline, and histograms use stat_bin.Facets : Describes how the data is partitioned into subsets and how these different subsets are plotted.Positional adjustments : Provides fine-grained control of where data is plotted.ggplot2 package works on dataframe.
kind : str ‘line’ : line plot (default) ‘bar’ : vertical bar plot ‘barh’ : horizontal bar plot ‘hist’ : histogram ‘box’ : boxplot ‘kde’ : Kernel Density Estimation plot ‘density’ : same as ‘kde’ ‘area’ : area plot ‘pie’ : pie plot ‘scatter’ : scatter plot ‘hexbin’ : hexbin plot
#install.packages("ggplot2") #uncomment this if package not already installed
library(ggplot2)
library(dplyr)
library(gcookbook) #for datasets from the book ggplot2 cookbook
#Horizontal adjust title
g <- g + theme(plot.title = element_text(hjust = 0.5))
# Black and white theme
g <- g + theme_bw()
# Remove legends
g <- g + theme(legend.position="none")
data(mtcars)
data(diamonds)
data(iris)
data(cabbage_exp)
#first 6 rows
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.230 Ideal E SI2 61.5 55. 326 3.95 3.98 2.43
## 2 0.210 Premium E SI1 59.8 61. 326 3.89 3.84 2.31
## 3 0.230 Good E VS1 56.9 65. 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58. 334 4.20 4.23 2.63
## 5 0.310 Good J SI2 63.3 58. 335 4.34 4.35 2.75
## 6 0.240 Very Good J VVS2 62.8 57. 336 3.94 3.96 2.48
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
head(cabbage_exp)
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
## 6 c52 d21 1.47 0.2110819 10 0.06674995
Summary Statistics
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#Scatter Plot using data, aes() and geom()
g <- ggplot(data=mtcars, mapping=aes(x=wt, y=mpg)) # data and aesthitic mapping (aes)
g <- g + geom_point() # geometric objects (geom)
g
#summary(g)
#Plot using data, aes() and geom()
g <- ggplot(data=mtcars, mapping=aes(x=wt, y=mpg)) # data and aesthitic mapping (aes)
g <- g + geom_point(aes(col=factor(cyl))) # geometric objects (geom)
g
#Plot using data, aes(), geom() and facets
g <- ggplot(data=mtcars, mapping=aes(x=wt, y=mpg)) # data and aesthitic mapping (aes)
g <- g + geom_point() # geometric objects (geom)
g <- g + facet_grid(.~cyl) #
g
#Plot using data, aes(), geom() and facets
g <- ggplot(data=mtcars, mapping=aes(x=wt, y=mpg)) # data and aesthitic mapping (aes)
g <- g + geom_point(aes(col=factor(cyl))) # geometric objects (geom)
g <- g + facet_grid(.~cyl) #
g
library(ggplot2)
e <- data.frame(f=rnorm(1000))
g <- ggplot(data=e, aes(x=f))
g <- g + geom_histogram(bins = 30)
g
#Histogram with Coloured fill and borders
g <- ggplot(data=e, aes(x=f))
g <- g + geom_histogram(bins = 30, fill="black", col="red")
g
df <- rnorm(2000)
g <- ggplot(data=NULL, aes(x=df))
g <- g + geom_histogram(bins = 30, fill="white", col="red")
g
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
ggplot(iris, aes(x=Sepal.Length))+ geom_histogram(bins=30, fill="blue", col="black")
#, eval=FALSE, include=FALSE
e <- data.frame(f=rnorm(1000))
# thehistogram <- qplot(x=f, data=e, geom="density")
# summary(thehistogram)
g <- ggplot(data=e, aes(x=f))
g <- g + geom_density()
g
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
no_cyl <- data.frame(table(mtcars$cyl))
barplot(table(mtcars$cyl))
g <- ggplot(no_cyl, aes(x=Var1, y=Freq, fill = Var1))
g <- g + geom_bar(stat = "identity")
g <- g + labs(x="No. of Cylinders", ylab="Frequency", title="Bar Plot")
g
g <- ggplot(no_cyl, aes(x=Var1, y=Freq, fill = Var1))
g <- g + geom_bar(stat = "identity")
g <- g + labs(x = "Sample Means of 1000 Simulations")
g <- g + labs(y = "Frequency")
g <- g + ggtitle("Figure 2 \n Compareing Theoretical(Normal) and Sample Distribution")
g <- g + theme(plot.title = element_text(hjust = 0.5))
g
prop.table(table(mtcars$cyl)) * 100
##
## 4 6 8
## 34.375 21.875 43.750
library(gcookbook)
cabbage_exp
## Cultivar Date Weight sd n se
## 1 c39 d16 3.18 0.9566144 10 0.30250803
## 2 c39 d20 2.80 0.2788867 10 0.08819171
## 3 c39 d21 2.74 0.9834181 10 0.31098410
## 4 c52 d16 2.26 0.4452215 10 0.14079141
## 5 c52 d20 3.11 0.7908505 10 0.25008887
## 6 c52 d21 1.47 0.2110819 10 0.06674995
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity")
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity", position="dodge")
#Kernet : https://www.kaggle.com/thie1e/rossmann-store-sales/exploratory-analysis-rossmann
# train <- read.csv("train.csv")
# train$Date <- as.Date(train$Date)
# train$Month <- month(train$Date)
# train$Day <- day(train$Date)
# train$Year <- year(train$Date)
#
# #Day wise Sales
# day_sale <- aggregate(Sales~Day,data = train, sum)
# day_sale
#
# library(ggplot2)
# d <- ggplot(day_sale, aes(factor(Day), Sales))
# d <- d + geom_bar(stat = "identity")
# d
#
# d1 <- ggplot(train, aes(factor(Day))) +
# geom_bar() +
# stat_summary_bin(aes(y = Sales), fun.y = "sum", geom = "bar")
# d1
#
# library(gridExtra)
# grid.arrange(d,d1,ncol=2)
#Bar Plot
library(ggplot2)
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.230 Ideal E SI2 61.5 55. 326 3.95 3.98 2.43
## 2 0.210 Premium E SI1 59.8 61. 326 3.89 3.84 2.31
## 3 0.230 Good E VS1 56.9 65. 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58. 334 4.20 4.23 2.63
## 5 0.310 Good J SI2 63.3 58. 335 4.34 4.35 2.75
## 6 0.240 Very Good J VVS2 62.8 57. 336 3.94 3.96 2.48
color_count = diamonds %>% group_by(color) %>% summarize(count=n())
g = ggplot(color_count, aes(x=color, y=count, fill=color))
g = g + geom_bar(stat = "identity")
g = g + labs(x="Diamond Color",
y="Diamond Count",
title="Bar Plot for Color Diamonds")
g = g + geom_text(aes(label = count), hjust = 0.5, vjust = 2)
g
ggplot(color_count, aes(x = color,y = count)) +
geom_bar(stat='identity',colour="white", aes(fill = color)) +
geom_text(aes(x = color, y = 1, label = paste0("(",count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Color',
y = 'Count',
title = 'Count of Colours') +
coord_flip() +
theme_bw() +
theme(legend.position="none")
#
g = ggplot(color_count, aes(x=color, y=count, fill=color))
g = g + geom_bar(stat = "identity")
g = g + labs(x="Diamond Color",
y="Diamond Count",
title="Bar Plot for Color Diamonds")
g = g + geom_text(aes(x=color, y=1, label = paste0(" ",count)), hjust = 0, vjust = 0, size = 4)
g = g + coord_flip()
g
Year <- c(rep(c("2006-07", "2007-08", "2008-09", "2009-10"), each = 4))
Category <- c(rep(c("A", "B", "C", "D"), times = 4))
Frequency <- c(168, 259, 226, 340, 216, 431, 319, 368, 423, 645, 234, 685, 166, 467, 274, 251)
Data <- data.frame(Year, Category, Frequency)
ggplot(Data, aes(x = Year, y = Frequency, fill = Category, label = Frequency)) +
geom_bar(stat = "identity") +
geom_text(size = 3, position = position_stack(vjust = 0.5))
# g = ggplot(block_count, aes(blocks, busses, fill=blocks))
# g = g + geom_bar(stat = "identity")
# g = g + geom_text(aes(label = busses), hjust = 0.5, vjust = 2)
# g
#facet_grid, facet_wrap
ggplot(mtcars, aes(wt,mpg)) + geom_point()
ggplot(mtcars, aes(wt,mpg)) + geom_point() + facet_grid(.~cyl)
ggplot(mtcars, aes(wt,mpg)) + geom_point() + facet_wrap(~cyl,ncol=1)
g <- ggplot(mpg, aes(displ, hwy))
g + geom_point(alpha=1/3) + facet_grid(.~class)
g + geom_point(alpha=1/3) + facet_wrap(~class, ncol=1)
# load data
data(sleep)
library(ggplot2)
# plot the first and second observations
g <- ggplot(sleep, aes(x = group, y = extra, group = factor(ID)))
g <- g + geom_line(size = 1, aes(colour = ID)) + geom_point(size =10, pch = 21, fill = "salmon", alpha = 0.5)
g