Not Just reading R graph cookbook, I am also trying to work out examples as I read through the book.
Scatter plot using built in data set cars
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
plot(cars$dist ~ cars$speed)
Adding more parameters to make graph better
plot(cars$dist ~ cars$speed,
main="Relationship between car distance and speed", #Plot title
xlab="Speed (miles per hour)",
ylab="Distance travelled (miles)",
xlim=c(0,30),
ylim=c(0,140),
yaxs="i",
xaxs="i",
col="red",
pch=19 )
Creating line graph using sales data which has just date of sale and number of unit sold.
sales <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/dailysales.csv")
#Checking data
head(sales)
## date units
## 1 01/01/2010 5064
## 2 02/01/2010 6115
## 3 03/01/2010 5305
## 4 04/01/2010 3185
## 5 05/01/2010 4182
## 6 06/01/2010 5816
#Line plot sales data
plot(sales$units~as.Date(sales$date,"%d/%m/%y"),
type="l", #Specify type of plot is line plot
main="Unit sales in the month of Jan 2010",
xlab="Date",
ylab="Number of Units sold",
col="blue")
If you want to plot another line on the same graph following function can be used lines(sales\(units2~as.Date(sales\)date,“%d/%m/%y”), col=“red”)
Bar plots are useful for visualizing summary data across various categories. Following example is going to show total sales across different cities
sales <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/citysales.csv")
head(sales)
## City ProductA ProductB ProductC
## 1 Seattle 23 11 12
## 2 London 89 6 56
## 3 Tokyo 24 7 13
## 4 Berlin 36 34 44
## 5 Mumbai 3 78 14
barplot(sales$ProductA,names.arg=sales$City, col="blue")
Chaning oriantation for bars from vertical to horizontal
barplot(sales$ProductA,names.arg=sales$City,horiz=TRUE, col="black")
Bar plots are often used to compare the values of groups of values across categories. For example, we can plot the sales in different cities for more than one product using the beside argument
barplot(as.matrix(sales[,2:4]),beside=TRUE,legend=sales$City,col=heat.colors(5),border="white")
barplot(as.matrix(sales[,2:4]),beside=TRUE,legend=sales$City,col=heat.colors(5),border="white", horiz=TRUE)
mat <- as.matrix(sales[,2:4], dimnames=list(sales[,1],names(sales(,2:4))))
#Setting up dimension's label to plot one way or other
dimnames(mat) <- list(sales[,1],names(sales[,2:4]))
#Now it can be plotted in any way
barplot(mat,beside=TRUE,legend=sales$City,col=heat.colors(5),border="white")
#t() function is used to transpose matrix
barplot(t(mat),beside=TRUE,legend=names(sales[,2:4]),col=heat.colors(3),border="white")
This is useful to look at the distribution of values in a data set. Univariate.
A quick way to demonstrate this is, generate normal distribution and plot histogram
hist(rnorm(1000))
head(islands)
## Africa Antarctica Asia Australia Axel Heiberg
## 11506 5500 16988 2968 16
## Baffin
## 184
hist(islands)
Little highlights on density plots. More on coming chapters
plot(density(rnorm(1000)))
plot(density(islands))
Comparing the spread of values in different measurements. It is mainly used with numerical values
metals <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/metals.csv")
head(metals)
## Source As Ba Cd Cr Cu Mn Mo Ni Pb
## 1 Site1 0.7017 76.18 0.081646 -3.47 19.700 3.082 0.7453 -1.862 7.283
## 2 Site2 -1.9193 41.01 -0.003449 30.69 10.643 4.513 0.9386 7.227 4.803
## 3 Site1 0.7394 71.91 0.654753 51.51 33.792 14.816 3.4670 17.957 50.666
## 4 Site2 -2.5964 41.88 0.082761 22.84 5.353 4.546 0.5725 5.640 4.723
## 5 Site2 0.9386 79.95 0.105580 42.08 19.890 6.281 2.2800 16.368 8.973
## 6 Site2 2.7134 83.47 0.717621 27.58 26.966 27.521 2.9449 9.069 38.654
## Sb Sr V Zn
## 1 4.699 0.7425 -8.716 52.88
## 2 2.502 0.6389 24.412 34.03
## 3 7.755 2.2014 47.181 88.26
## 4 1.353 0.6486 17.085 44.92
## 5 4.666 1.2388 36.515 86.47
## 6 7.911 0.8395 30.041 117.81
boxplot(metals[,-1],xlab="Metals",ylab="Atmospheric Concentration in ng per cubic metre",
main="Atmospheric Metal concentrations in London")
copper <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/copper_site.csv")
head(copper)
## Source Cu
## 1 Site1 19.700
## 2 Site2 10.643
## 3 Site1 33.792
## 4 Site2 5.353
## 5 Site2 19.890
## 6 Site2 26.966
boxplot(copper$Cu,xlab="Metals",ylab="Atmospheric Concentration in ng per cubic metre",
main="Atmospheric Metal concentrations in London")
It is also possible to plot box plot by site too. Check this out
boxplot(copper$Cu~copper$Source,xlab="Measurement Site",ylab="Atmospheric Concentration in ng per cubic metre",
main="Atmospheric Metal concentrations in London")
heatmap(as.matrix(mtcars),
Rowv=NA, Colv=NA, col = heat.colors(256), scale="column",
margins=c(2,8),main="Car characteristics by Model")
Pair plot is a very handy visualization for quickly scanning the correlations between many varaibales in a data set
pairs(iris[,1:4])
Setting up more parameters in pairs function to beautify the graph. Please note here plot function has been used instead of plot. Basically, it provides same graph
plot(iris[,1:4],
main="Relationship between characteristics of iris flowers",
pch=19,
col="blue",
cex=0.9
)
To create 2X3 matrix of graphs, we can use par()
par(mfrow=c(2,3))
plot(rnorm(100),col="blue",main="Plot No.1")
plot(rnorm(100),col="green",main="Plot No.2")
plot(rnorm(100),col="red",main="Plot No.3")
plot(rnorm(100),col="orange",main="Plot No.4")
plot(rnorm(100),col="magenta",main="Plot No.5")
plot(rnorm(100),col="black",main="Plot No.6")
par(mfrow=c(1,1)) #Setting it back to default one
Some more practical example
market <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/dailymarket.csv")
head(market)
## date revenue customers profits
## 1 01/01/2010 5064 100 2025.5
## 2 02/01/2010 6115 67 2690.7
## 3 03/01/2010 5305 33 1591.5
## 4 04/01/2010 3185 111 318.5
## 5 05/01/2010 4182 34 3763.5
## 6 06/01/2010 5816 222 4652.4
market_date <- as.Date(market$date,"%d/%m/%y")
par(mfrow=c(3,1))
plot(market$revenue ~ market_date, type="l", main="Revenue",xlab="Date",ylab="US Dollars", col="blue")
plot(market$customers ~ market_date, type="l", main="Customers Visit",xlab="Date",ylab="No. of people", col="green")
plot(market$profits ~ market_date, type="l", main="Profits",xlab="Date",ylab="US Dollars", col="blue")
par(mfrow=c(1,1))
rain <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/cityrain.csv")
head(rain)
## Month Tokyo NewYork London Berlin
## 1 Jan 49.9 83.6 48.9 42.4
## 2 Feb 71.5 78.8 38.8 33.2
## 3 Mar 106.4 98.5 39.3 34.5
## 4 Apr 129.2 93.4 42.4 39.7
## 5 May 144.0 106.0 47.0 52.6
## 6 Jun 176.0 84.5 48.3 70.5
plot(rain$Tokyo, type="l", col="red", ylim=c(0,300), main="Monthly rainfall in Major cities", xlab="Month of year",ylab="Rainfall (mm)",lwd=2)
lines(rain$NewYork, type="l", col="blue",lwd=2)
lines(rain$London, type="l", col="green",lwd=2)
lines(rain$Berlin, type="l", col="orange",lwd=2)
legend("topright", legend=c("Tokyo","NewYork","London","Berlin"),
col=c("red","blue","green","orange"),lty=1,lwd=2)
legend("topleft", legend=c("Tokyo","NewYork","London","Berlin"),
col=c("red","blue","green","orange"),lty=3,lwd=4)
lty represents line type and lwd represents line width in the legend function. 2nd legend function call slightly tweaked that.
Use of cex parameter
rain <- read.csv("/Users/njvijay/big_data/R/Visualizations/Code/Chapter 1/Data Files/cityrain.csv")
head(rain)
## Month Tokyo NewYork London Berlin
## 1 Jan 49.9 83.6 48.9 42.4
## 2 Feb 71.5 78.8 38.8 33.2
## 3 Mar 106.4 98.5 39.3 34.5
## 4 Apr 129.2 93.4 42.4 39.7
## 5 May 144.0 106.0 47.0 52.6
## 6 Jun 176.0 84.5 48.3 70.5
plot(rain$Tokyo, type="l", col="red", ylim=c(0,300), main="Monthly rainfall in Major cities", xlab="Month of year",ylab="Rainfall (mm)",lwd=2)
lines(rain$NewYork, type="l", col="blue",lwd=2)
lines(rain$London, type="l", col="green",lwd=2)
lines(rain$Berlin, type="l", col="orange",lwd=2)
legend("top", legend=c("Tokyo","NewYork","London","Berlin"),ncol=4, cex=0.6,bty="n",
col=c("red","blue","green","orange"),lty=1,lwd=2)
library(maps)
map()
map('world',fill=T, col=heat.colors(10))
More about map later