1: Lattice

library(lattice)
library(datasets)

p <- xyplot(Ozone ~ Wind, data = airquality, pch = 20)
print(p)

table(airquality$Month)
## 
##  5  6  7  8  9 
## 31 30 31 31 30
airquality <- transform(airquality, Month = factor(Month))
xyplot(Ozone ~ Wind | Month, data = airquality, layout = c(5, 1))

# Panel function
set.seed(12) # remember 12 :)
x <- rnorm(100)
f <- rep(0:1, each = 50)
y <- x + f - f * x + rnorm(100, sd = 0.5)
f <- factor(f, labels = c("G1", "G2"))
xyplot(y ~ x | f, layout = c(2, 1), pch = 17)

## custome panel function
xyplot(y ~ x | f, panel = function(x, y, ...){
      panel.xyplot(x, y, ...)
      panel.abline(h = median(y), lty = 2)
})

xyplot(y ~ x | f, panel = function(x, y, ...){
      panel.xyplot(x, y, ...)
      panel.lmline(x, y, col = "red")
})

2:

# 1 --------------------------
# Base Plot
# In histograms, put a rug(variable) underline.
# This is just an example,
# library(help = "datasets")
data("airquality")
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
# View(airquality)

hist(airquality$Wind, col = "green")
rug(airquality$Wind)

hist(airquality$Wind, col = "green", breaks = 50)
rug(airquality$Wind)
abline(h = 7, col = "red")
abline(v = mean(airquality$Wind), col = "blue", lwd = 2)

# Boxplot
boxplot(airquality$Wind, col = "blue")
abline(h = 12, col = "red")

# Barplot
barplot(table(airquality$Month), col = "green")

# Multiple plots
attach(airquality)
boxplot(Solar.R ~ Month, data = airquality, col = "yellow")
abline(h = mean(Solar.R), col = "red") # why?

par(mfrow = c(5, 1), mar = c(4, 4, 2, 1))
hist(subset(airquality, Month == 5)$Solar.R, col = "red")
hist(subset(airquality, Month == 6)$Solar.R, col = "green")
hist(subset(airquality, Month == 7)$Solar.R, col = "blue")
hist(subset(airquality, Month == 8)$Solar.R, col = "yellow")
hist(subset(airquality, Month == 9)$Solar.R, col = "purple")

with(airquality, plot(x = Solar.R, y = Wind, col = Month))
abline(h = 12, lty = 2, lwd = 2)

plot(Ozone, Wind, col = Month)
abline(h = 12, lty = 2, lwd = 2)


par(mfrow = c(2, 2), mar = c(4, 4, 2, 1))

with(subset(airquality, Month == 5), plot(Solar.R, Wind, col = "red"))
with(subset(airquality, Month == 6), plot(Solar.R, Wind, col = "green"))
with(subset(airquality, Month == 7), plot(Solar.R, Wind, col = "blue"))
with(subset(airquality, Month == 8), plot(Solar.R, Wind, col = "black"))

detach(airquality)


# 2 --------------
# Lattice package
library(lattice)
library(datasets)
# library(help = "datasets")
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
attach(mtcars)

table(cyl)
## cyl
##  4  6  8 
## 11  7 14
xyplot(hp ~ mpg | cyl, data = mtcars, layour = c(3, 1))

detach(mtcars)


# 3 -------------
# ggplot
library(ggplot2)
data("mtcars")
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
attach(mtcars)
## The following object is masked from package:ggplot2:
## 
##     mpg
# ?ggplot
ggplot(data = mtcars, aes(x = mpg, y = hp, col = cyl)) +
      geom_point()

ggplot(data = mtcars) + geom_boxplot(aes(x=factor(cyl), 
                                         y=mpg, fill=factor(cyl)), 
                                     position=position_dodge(1),
                                     show.legend = FALSE)

ggplot(data = mtcars) + geom_boxplot(aes(x=factor(cyl), 
                                         y=hp, fill=factor(cyl)), 
                                     position=position_dodge(1),
                                     show.legend = FALSE)

# Under-this is not working, 
par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
ggplot(data = mtcars) + geom_boxplot(aes(x=factor(cyl), 
                                         y=mpg, fill=factor(cyl)), 
                                     position=position_dodge(1),
                                     show.legend = FALSE)

ggplot(data = mtcars) + geom_boxplot(aes(x=factor(cyl), 
                                         y=hp, fill=factor(cyl)), 
                                     position=position_dodge(1),
                                     show.legend = FALSE)

3:

set.seed(1234) # Remember 1234
x <- rnorm(12, mean = rep(1:3, each = 4), sd = 0.2)
y <- rnorm(12, mean = rep(c(1, 2, 1), each = 4), sd = 0.2)

plot(x, y, col = "blue", pch = 19, cex = 2)
text(x + 0.05, y + 0.05, labels = as.character(1:12))

d <- data.frame(x = x, y = y)

kmObj <- kmeans(d, centers = 3)
names(kmObj)
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
kmObj$cluster
##  [1] 3 1 1 3 2 2 2 2 2 2 2 2
plot(x, y, col = kmObj$cluster, pch = 19, cex = 2)
points(kmObj$centers, col = 1:3, pch = 3, cex = 3, lwd = 3)

4:

set.seed(1234) # Remember 1234
x <- rnorm(12, mean = rep(1:3, each = 4), sd = 0.2)
y <- rnorm(12, mean = rep(c(1, 2, 1), each = 4), sd = 0.2)

plot(x, y, col = "blue", pch = 19, cex = 2)
text(x + 0.05, y + 0.05, labels = as.character(1:12))

d <- data.frame(x = x, y = y)
d
##            x         y
## 1  0.7585869 0.8447492
## 2  1.0554858 1.0128918
## 3  1.2168882 1.1918988
## 4  0.5308605 0.9779429
## 5  2.0858249 1.8977981
## 6  2.1012112 1.8177609
## 7  1.8850520 1.8325657
## 8  1.8906736 2.4831670
## 9  2.8871096 1.0268176
## 10 2.8219924 0.9018628
## 11 2.9045615 0.9118904
## 12 2.8003227 1.0919179
dist <- dist(d)

hClus <- hclust(dist)
hClus1 <- hclust(dist, method = "single")
hClus2 <- hclust(dist, method = "average")
hClus3 <- hclust(dist, method = "complete")

# Dendogram
plot(hClus)

plot(hClus1)

plot(hClus2)

plot(hClus3)

heatmap(as.matrix(d)[sample(1:12), ])

5:

library(ggplot2)
library(ggplot2)
data(mpg)
# 1. qplot()------------
str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
dim(mpg)
## [1] 234  11
attach(mpg)
## The following object is masked from mtcars:
## 
##     cyl
names(mpg)
##  [1] "manufacturer" "model"        "displ"        "year"         "cyl"         
##  [6] "trans"        "drv"          "cty"          "hwy"          "fl"          
## [11] "class"
qplot(displ, hwy, data = mpg)

qplot(displ, hwy, data = mpg, color = drv)

qplot(displ, hwy, data = mpg, geom = c("point", "smooth"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

qplot(hwy, data = mpg, fill = drv, bins = 30)

qplot(displ, hwy, data = mpg, facets = . ~ drv)

qplot(hwy, data = mpg, facets = drv ~ ., binwidth = 2)

qplot(log(displ), data = mpg, bins = 30, fill = factor(cyl))

qplot(log(displ), data = mpg, geom = "density")

qplot(log(displ), data = mpg, geom = "density", color = factor(cyl),
      fill = factor(cyl))

qplot(log(displ), log(hwy), data = mpg, 
      shape = factor(cyl), color = factor(drv))

qplot(log(displ), log(hwy), data = mpg, color = factor(fl))

qplot(log(displ), log(hwy), data = mpg, 
      color = factor(fl)) + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

# 2. ggplot()---------
library(ggplot2)
# library(help = "ggplot2")
str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
ggplot(data = mpg, aes(displ, hwy))

ggplot(data = mpg, aes(displ, hwy)) + geom_point()

ggplot(data = mpg, aes(displ, hwy)) + geom_point(col = factor(cyl))

ggplot(data = mpg, aes(displ, hwy, col = factor(cyl))) + geom_point()

ggplot(data = mpg, aes(displ, hwy)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, aes(displ, hwy)) + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = mpg, aes(displ, hwy)) + geom_point() + 
      geom_smooth() + facet_grid(. ~ factor(cyl))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = mpg, aes(displ, hwy, col = factor(cyl))) + 
      geom_point() + xlab("Engine displacement, in litres") +
      ylab("Highway miles per gallon") +
      ggtitle("Plot for displ vs hwv") +
      labs(col = "No of Cylinder")

# 3. More ggplot()----------
library(ggplot2)

testdata <- data.frame(x = 1:100, y = rnorm(100))
str(testdata)
## 'data.frame':    100 obs. of  2 variables:
##  $ x: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ y: num  1.102 -0.476 -0.709 -0.501 -1.629 ...
plot(testdata$x, testdata$y, type = "l")

## let's put a outlier to data
testdata[50, 2] <- 100  # Outlier

plot(testdata$x, testdata$y, type = "l")

plot(testdata$x, testdata$y, type = "l", ylim = c(-4, 4))

ggplot(testdata, aes(x, y)) + geom_line()

ggplot(testdata, aes(x, y)) + geom_line() + ylim(-4, 4)

ggplot(testdata, aes(x, y)) + geom_line() + coord_cartesian(ylim = c(-4, 4))

6:

# Here we see how colors works in R

x <- rnorm(1000)
y <- rnorm(1000)
# ?rgb

plot(x, y, pch = 19, 
     col = rgb(red = 0.75, green = 0.25, blue = 0))

plot(x, y, pch = 19, 
     col = rgb(red = 0, green = 0.5, blue = 0.5))

## if we want see more
plot(x, y, pch = 19, 
     col = rgb(red = 0, green = 0.5, blue = 0.5, alpha = 0.3))

# ?image
image(x = matrix(rnorm(100), 10, 10), col = c("blue", "green"))

require("grDevices") # for colours
x <- y <- seq(-4*pi, 4*pi, length.out = 27)
r <- sqrt(outer(x^2, y^2, "+"))
image(z = z <- cos(r^2)*exp(-r/6), col = gray.colors(33))

image(t(volcano)[ncol(volcano):1,])

# A prettier display of the volcano
x <- 10*(1:nrow(volcano))
y <- 10*(1:ncol(volcano))
image(x, y, volcano, col = hcl.colors(100, "terrain"), axes = FALSE)
contour(x, y, volcano, levels = seq(90, 200, by = 5),
        add = TRUE, col = "brown")
axis(1, at = seq(100, 800, by = 100))
axis(2, at = seq(100, 600, by = 100))
box()
title(main = "Maunga Whau Volcano", font.main = 4)

# more search on,
# RColorBrewer
# colorRamp

x <- rnorm(10000)
y <- rnorm(10000)
smoothScatter(x, y)