Not Just reading R graph cookbook, I am also trying to work out examples as I read through the book.

Scatter Plot

Scatter plot using built in data set cars

head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
plot(cars$dist ~ cars$speed)

plot of chunk unnamed-chunk-1

Adding more parameters to make graph better

plot(cars$dist ~ cars$speed,
main="Relationship between car distance and speed", #Plot title
xlab="Speed (miles per hour)",
ylab="Distance travelled (miles)",
xlim=c(0,30),
ylim=c(0,140),
yaxs="i",
xaxs="i",
col="red",
pch=19 )

plot of chunk unnamed-chunk-2

Creating line graphs

Creating line graph using sales data which has just date of sale and number of unit sold.

sales <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/dailysales.csv")
#Checking data
head(sales)
##         date units
## 1 01/01/2010  5064
## 2 02/01/2010  6115
## 3 03/01/2010  5305
## 4 04/01/2010  3185
## 5 05/01/2010  4182
## 6 06/01/2010  5816
#Line plot sales data
plot(sales$units~as.Date(sales$date,"%d/%m/%y"),
     type="l", #Specify type of plot is line plot
     main="Unit sales in the month of Jan 2010",
     xlab="Date",
     ylab="Number of Units sold",
     col="blue")

plot of chunk unnamed-chunk-3

If you want to plot another line on the same graph following function can be used lines(sales\(units2~as.Date(sales\)date,“%d/%m/%y”), col=“red”)

Creating bar charts

Bar plots are useful for visualizing summary data across various categories. Following example is going to show total sales across different cities

sales <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/citysales.csv")
head(sales)
##      City ProductA ProductB ProductC
## 1 Seattle       23       11       12
## 2  London       89        6       56
## 3   Tokyo       24        7       13
## 4  Berlin       36       34       44
## 5  Mumbai        3       78       14
barplot(sales$ProductA,names.arg=sales$City, col="blue")

plot of chunk unnamed-chunk-4

Chaning oriantation for bars from vertical to horizontal

barplot(sales$ProductA,names.arg=sales$City,horiz=TRUE, col="black")

plot of chunk unnamed-chunk-5

Bar plots are often used to compare the values of groups of values across categories. For example, we can plot the sales in different cities for more than one product using the beside argument

barplot(as.matrix(sales[,2:4]),beside=TRUE,legend=sales$City,col=heat.colors(5),border="white")

plot of chunk unnamed-chunk-6

barplot(as.matrix(sales[,2:4]),beside=TRUE,legend=sales$City,col=heat.colors(5),border="white", horiz=TRUE)

plot of chunk unnamed-chunk-6

mat <- as.matrix(sales[,2:4], dimnames=list(sales[,1],names(sales(,2:4))))
#Setting up dimension's label to plot one way or other
dimnames(mat) <- list(sales[,1],names(sales[,2:4]))

#Now it can be plotted in any way
barplot(mat,beside=TRUE,legend=sales$City,col=heat.colors(5),border="white")

plot of chunk unnamed-chunk-6

#t() function is used to transpose matrix
barplot(t(mat),beside=TRUE,legend=names(sales[,2:4]),col=heat.colors(3),border="white")

plot of chunk unnamed-chunk-6

Histograms and Density Plot

This is useful to look at the distribution of values in a data set. Univariate.

A quick way to demonstrate this is, generate normal distribution and plot histogram

hist(rnorm(1000))

plot of chunk unnamed-chunk-7

head(islands)
##       Africa   Antarctica         Asia    Australia Axel Heiberg 
##        11506         5500        16988         2968           16 
##       Baffin 
##          184
hist(islands)

plot of chunk unnamed-chunk-8

Little highlights on density plots. More on coming chapters

plot(density(rnorm(1000)))

plot of chunk unnamed-chunk-9

plot(density(islands))

plot of chunk unnamed-chunk-9

Box plot

Comparing the spread of values in different measurements. It is mainly used with numerical values

metals <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/metals.csv")
head(metals)
##   Source      As    Ba        Cd    Cr     Cu     Mn     Mo     Ni     Pb
## 1  Site1  0.7017 76.18  0.081646 -3.47 19.700  3.082 0.7453 -1.862  7.283
## 2  Site2 -1.9193 41.01 -0.003449 30.69 10.643  4.513 0.9386  7.227  4.803
## 3  Site1  0.7394 71.91  0.654753 51.51 33.792 14.816 3.4670 17.957 50.666
## 4  Site2 -2.5964 41.88  0.082761 22.84  5.353  4.546 0.5725  5.640  4.723
## 5  Site2  0.9386 79.95  0.105580 42.08 19.890  6.281 2.2800 16.368  8.973
## 6  Site2  2.7134 83.47  0.717621 27.58 26.966 27.521 2.9449  9.069 38.654
##      Sb     Sr      V     Zn
## 1 4.699 0.7425 -8.716  52.88
## 2 2.502 0.6389 24.412  34.03
## 3 7.755 2.2014 47.181  88.26
## 4 1.353 0.6486 17.085  44.92
## 5 4.666 1.2388 36.515  86.47
## 6 7.911 0.8395 30.041 117.81
boxplot(metals[,-1],xlab="Metals",ylab="Atmospheric Concentration in ng per cubic metre",
        main="Atmospheric Metal concentrations in London")

plot of chunk unnamed-chunk-10

copper <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/copper_site.csv")
head(copper)
##   Source     Cu
## 1  Site1 19.700
## 2  Site2 10.643
## 3  Site1 33.792
## 4  Site2  5.353
## 5  Site2 19.890
## 6  Site2 26.966
boxplot(copper$Cu,xlab="Metals",ylab="Atmospheric Concentration in ng per cubic metre",
        main="Atmospheric Metal concentrations in London")

plot of chunk unnamed-chunk-11

It is also possible to plot box plot by site too. Check this out

boxplot(copper$Cu~copper$Source,xlab="Measurement Site",ylab="Atmospheric Concentration in ng per cubic metre",
        main="Atmospheric Metal concentrations in London")

plot of chunk unnamed-chunk-12

Heat Maps

heatmap(as.matrix(mtcars),
Rowv=NA, Colv=NA, col = heat.colors(256), scale="column",
margins=c(2,8),main="Car characteristics by Model")

plot of chunk unnamed-chunk-13

Creating Pair plots

Pair plot is a very handy visualization for quickly scanning the correlations between many varaibales in a data set

pairs(iris[,1:4])

plot of chunk unnamed-chunk-14

Setting up more parameters in pairs function to beautify the graph. Please note here plot function has been used instead of plot. Basically, it provides same graph

plot(iris[,1:4],
main="Relationship between characteristics of iris flowers",
pch=19,
col="blue",
cex=0.9
)

plot of chunk unnamed-chunk-15

Plotting multiple plot matrix layouts

To create 2X3 matrix of graphs, we can use par()

par(mfrow=c(2,3))
plot(rnorm(100),col="blue",main="Plot No.1")
plot(rnorm(100),col="green",main="Plot No.2")
plot(rnorm(100),col="red",main="Plot No.3")
plot(rnorm(100),col="orange",main="Plot No.4")
plot(rnorm(100),col="magenta",main="Plot No.5")
plot(rnorm(100),col="black",main="Plot No.6")

plot of chunk unnamed-chunk-16

par(mfrow=c(1,1)) #Setting it back to default one

Some more practical example

market <-  read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/dailymarket.csv")
head(market)
##         date revenue customers profits
## 1 01/01/2010    5064       100  2025.5
## 2 02/01/2010    6115        67  2690.7
## 3 03/01/2010    5305        33  1591.5
## 4 04/01/2010    3185       111   318.5
## 5 05/01/2010    4182        34  3763.5
## 6 06/01/2010    5816       222  4652.4
market_date <- as.Date(market$date,"%d/%m/%y")
par(mfrow=c(3,1))

plot(market$revenue ~ market_date, type="l", main="Revenue",xlab="Date",ylab="US Dollars", col="blue")

plot(market$customers ~ market_date, type="l", main="Customers Visit",xlab="Date",ylab="No. of people", col="green")

plot(market$profits ~ market_date, type="l", main="Profits",xlab="Date",ylab="US Dollars", col="blue")

plot of chunk unnamed-chunk-17

par(mfrow=c(1,1))

Adding and formatting legends

rain <-  read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/cityrain.csv")
head(rain)
##   Month Tokyo NewYork London Berlin
## 1   Jan  49.9    83.6   48.9   42.4
## 2   Feb  71.5    78.8   38.8   33.2
## 3   Mar 106.4    98.5   39.3   34.5
## 4   Apr 129.2    93.4   42.4   39.7
## 5   May 144.0   106.0   47.0   52.6
## 6   Jun 176.0    84.5   48.3   70.5
plot(rain$Tokyo, type="l", col="red", ylim=c(0,300), main="Monthly rainfall in Major cities", xlab="Month of year",ylab="Rainfall (mm)",lwd=2)
lines(rain$NewYork, type="l", col="blue",lwd=2)
lines(rain$London, type="l", col="green",lwd=2)
lines(rain$Berlin, type="l", col="orange",lwd=2)
legend("topright", legend=c("Tokyo","NewYork","London","Berlin"),
       col=c("red","blue","green","orange"),lty=1,lwd=2)
legend("topleft", legend=c("Tokyo","NewYork","London","Berlin"),
       col=c("red","blue","green","orange"),lty=3,lwd=4)

plot of chunk unnamed-chunk-18

lty represents line type and lwd represents line width in the legend function. 2nd legend function call slightly tweaked that.

Use of cex parameter

rain <-  read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/cityrain.csv")
head(rain)
##   Month Tokyo NewYork London Berlin
## 1   Jan  49.9    83.6   48.9   42.4
## 2   Feb  71.5    78.8   38.8   33.2
## 3   Mar 106.4    98.5   39.3   34.5
## 4   Apr 129.2    93.4   42.4   39.7
## 5   May 144.0   106.0   47.0   52.6
## 6   Jun 176.0    84.5   48.3   70.5
plot(rain$Tokyo, type="l", col="red", ylim=c(0,300), main="Monthly rainfall in Major cities", xlab="Month of year",ylab="Rainfall (mm)",lwd=2)
lines(rain$NewYork, type="l", col="blue",lwd=2)
lines(rain$London, type="l", col="green",lwd=2)
lines(rain$Berlin, type="l", col="orange",lwd=2)
legend("top", legend=c("Tokyo","NewYork","London","Berlin"),ncol=4, cex=0.6,bty="n",
       col=c("red","blue","green","orange"),lty=1,lwd=2)

plot of chunk unnamed-chunk-19

Graph with map

library(maps)
map()

plot of chunk unnamed-chunk-20

map('world',fill=T, col=heat.colors(10))

plot of chunk unnamed-chunk-20

More about map later