Hi all, this section is about data visualisation using R codes. The 2 plotting systems as example here are:
- Base Plotting
- Grammer of Graphics, ggplot2
1. Base Plotting
- Bar Chart via barplot()
- Histogram via hist()
- Box & whiskers via boxplot()
- Scatterplot via plot()
# let's use available data 'mtcars'
# first we take a look
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# table gives the count of the objects and display in table form
counts_cyl <- table(mtcars$cyl)
counts_cyl
##
## 4 6 8
## 11 7 14
# barplot
barplot(counts_cyl)

# histogram
hist(mtcars$mpg)

# more aesthetics
hist(mtcars$mpg, breaks = 12, xlab = "Miles Per Gallon",
main = "Histogram with 12 Bins")

# Boxplot (Box & whiskers diagram)
boxplot(mtcars$mpg, main = "Boxplot of Miles/Gallon")

# more aesthetics & variables
boxplot(mtcars$mpg ~ factor(mtcars$vs), main = "Boxplot of Miles/Gallon for Different Engine Types")

attach(mtcars)
plot(wt, mpg, main = "SImple Scatter Plot of MPG vs. Weight",
xlab = "Car Weight (lbs/1000", ylab = "Miles Per Gallon")
# add trendline
abline(lm(mpg~wt, data = mtcars))

2. Grammer of Graphics, ggplot2 - covering some basics
Typical layer by layer drawing.
Download Property.csv here
geom objects
- geom_point()
- geom_line()
- geom_smooth()
- geom_bar()
- geom_histogram()
- geom_boxplot()
geom objects
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'mtcars':
##
## mpg
# example 1
# creating base (no point & no lines)
g0 <- ggplot(mtcars, aes(x = hp, y = mpg))
g0

# add points
g0 + geom_point()

# add color points by factor of am
g0 + geom_point(aes(color = factor(am)))

# add smoother (with points)
g0 + geom_point(aes(color = wt)) + geom_smooth(method = "lm")

property <- read.csv("Property.csv")
# example 2a
# creating base (no point & no lines)
p0 <- ggplot(property, aes(x = Unit.Price...psf.))
# histogram
p0 + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# more aesthetics (alpha = 1 is opaque, 0 is transparent)
p0 + geom_histogram(aes(fill = factor(Type)), alpha = 0.4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# bar
p0 + geom_bar(binwidth = 30)
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.

# more aesthetics
p0 + geom_bar(aes(fill = factor(Type)), binwidth = 30)
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.

# example 2b
# changing base mapping
q0 <- ggplot(property, aes(x = factor(Type), y = Area))
# add bar and y label
q0 + geom_bar(stat = "identity") + ylab("Total Area")

# example 3a
m0 <- ggplot(mtcars, aes(x = hp, y = mpg, group = 1))
# line plot
m0 + geom_line()

# example 3b
n0 <- ggplot(mtcars, aes(x = hp, y = mpg,
group = cyl, colour = cyl))
# line plot
n0 + geom_line()

# example 4
s0 <- ggplot(property, aes(x=factor(Type), y = Unit.Price...psf.))
# boxplot
s0 + geom_boxplot()

# violin plot
s0 + geom_violin()

# TRY 1
s0 + geom_boxplot() + stat_summary(fun.y = mean, geom = "point", shape = 5, size = 4)

# TRY 2
s0 + geom_jitter() + geom_violin()

## Exercise 2a
t0 <- ggplot(mtcars, aes(x = factor(vs), y = mpg))
t0 + geom_boxplot() + geom_point()

## Exercise 2b
u0 <- ggplot(mtcars, aes(x = wt, y = hp, colour = factor(am)))
u0 + geom_point() + geom_smooth(method = "lm")

scaling
v0 <- ggplot(mtcars, aes(x = factor(gear), y = mpg))
# coloured points
v1 <- v0 + geom_point(aes(colour = wt))
v1

# change x-axis
v2 <- v1 + scale_x_discrete(name = "Number of Gears",
breaks = c("3", "4", "5"),
labels = c("Three", "Four", "Five"))
v2

# change colour scale
v3 <- v2 + scale_colour_continuous(name = "Weight",
breaks = c(min(mtcars$wt), median(mtcars$wt), max(mtcars$wt)),
labels = c("Light", "Medium", "Heavy"))
v3

# points by size
v4 <- v0 + geom_point(aes(size = wt))
v4

v5 <- v4 + scale_size_continuous("Weight", range = c(2, 10))
v5

# shape and facets
v6 <- v0 + geom_point(aes(shape = factor(cyl))) + facet_grid(~cyl)
v6

# change themes, more available on ggthemes
v6 + theme_bw()
