1 Packages

library(ggplot2)
library(igraph)
library(RColorBrewer)
library(reshape2)
library(data.table)
library(ROCR)
library(rplot)

2 Basic Plots

2.1 Time Series

r.plot(matrixTimeSeries100)

r.plot(x=seq(0,1,1/199), y=matrixTimeSeries100)

r.plot(matrixTimeSeries5)
r.plot.add(matrixTimeSeries5, type='p')

2.2 Lines I

r.plot(x_seq, y_sin, type='l')

2.3 Lines II

r.plot.new(xlim=c(0,10), ylim=c(-1,1),xlab="x",ylab="y")
r.plot.add(x_seq, y_sin, type='l', icol=1)
r.plot.add(x_seq, y_cos, type='l', icol=2)

r.plot.new(xlim=c(0,10), ylim=c(-1,1))
r.plot.add(x_seq, y_sin, type='l', col=1)
r.plot.add(x_seq, y_cos, type='l', col=2)

r.plot.new(xlim=c(0,10), ylim=c(-1,1))
r.plot.add(x_seq, y_sin, type='l', col=rgb(0.8,0.8,0.0))
r.plot.add(x_seq, y_cos, type='l', col=rgb(0.0,0.8,0.8))

2.4 Points I

r.plot(y_norm[1:100], type='p')
r.plot.add(y_norm[1:100],type='l', col=rgb(0,0,0,0.1))

r.plot(x_norm[1:10], y_norm[1:10])
r.plot.add(x_norm[1:10], y_norm[1:10],type='l', col=rgb(0,0,0,0.1))

r.plot(x_unif, y_unif)

r.plot.new(x_unif, y_unif)
r.plot.add(x_unif, y_unif)

r.plot.new(xlim=c(-0.5,0.5), ylim=c(-0.5,0.5))
r.plot.add(x_unif, y_unif)
r.plot.add(x_unif, z_unif, icol=2)

2.5 Points II

r.plot.new(c(x_unif,x_unif), c(y_unif,z_unif))
r.plot.add(x_unif, y_unif, icol=1, alpha=0.3)
r.plot.add(x_unif, z_unif, icol=2, alpha=0.3)

r.plot.new(c(x_unif,x_unif), c(y_unif,z_unif))
r.plot.add(x_unif, y_unif, col=1, alpha=0.3)
r.plot.add(x_unif, z_unif, col=2, alpha=0.3)

r.plot.new(c(x_unif,x_unif), c(y_unif,z_unif))
r.plot.add(x_unif, y_unif, col=rgb(1,1,0), alpha=0.3)
r.plot.add(x_unif, z_unif, col=rgb(1,0,1), alpha=0.3)

r.plot(x_unif, y_unif, col=heat.colors(5))

r.plot(1:4, 1:4, col=terrain.colors(5), cex=20, alpha=0.6)

3 Other Plots

3.1 Third Axis

r.plot.new(x_seq, y_sin, thirdAxis=TRUE, main="Plot with 3rd Axis using new")
r.plot.add(x_seq, y_sin, type='l')
r.plot.coord(x_seq, y_exp)
r.plot.coord.axis(y_exp)
r.plot.add(x_seq, y_exp, col=2, type='l')

r.plot(x_seq, y_sin, type='l', thirdAxis=TRUE, main="Plot with 3rd Axis")
r.plot.add(x_seq, y_sin, type='p')
r.plot.coord(x_seq, y_exp)
r.plot.coord.axis(y_exp)
r.plot.add(x_seq, y_exp, col=2, type='l')
r.plot.add(x_seq, y_exp, col=2, type='p')

3.2 Bar plot

r.plot.bar(vecNum, main="numeric vector")

r.plot.bar(vecChar, main="character vector")

r.plot.bar(iris[c(1,51,101),c(5,1)], main="data.frame", sub="\ntwo columns (label, value)")

r.plot.bar(iris[c(1,51,101),1], main="vector", sub="\nautomatic useVector")

r.plot.bar(iris[c(1,51,51,101),1], main="vector", sub="\nautomatic useVector")

r.plot.bar(c(0.8,0.8,0.5,0.2), useVector=FALSE)

r.plot.bar(c(0.8,0.8,0.5,0.2), useVector=TRUE)

r.plot.bar(table_2, main="table")

r.plot.bar(table_3, main="table")

r.plot.bar(table_3, beside=TRUE)

r.plot.bar(table_3, horizontal=TRUE)

r.plot.bar(table_3, beside=TRUE, horizontal=TRUE)

r.plot.bar(table_3, beside=TRUE, horizontal=TRUE, background=TRUE, box=FALSE)

r.plot.bar(table_3, beside=TRUE, horizontal=TRUE, background=FALSE, box=FALSE)

3.3 Histogram plot

r.plot.histogram(x_gaussian)

r.plot.histogram(x_gaussian, main="Density", freq=FALSE)

r.plot.histogram(x_gaussian, breaks = 20)

3.4 Distribution plot

r.plot.distribution(x_gaussian)

3.5 Heatmap

x = runif(5000)
y = runif(5000)
z = (x-0.7)^2+(y-0.6)^2
r.plot.heatmap(x,y,z)

r.plot.heatmap(x,y,z,mean)

data(volcano)
r.plot.heatmap(matrixData=volcano)

r.plot.heatmap(matrixData=volcano, palette=terrain.colors(12))

r.plot.heatmap(matrixData=volcano, contour=FALSE, palette=terrain.colors(12))

r.plot.heatmap(rnorm(10000), rnorm(10000), 
               xbreaks=30, ybreaks=30, 
               contour=FALSE, 
               palette=r.color.gradient.palette(c("white", r.color(1))))

r.plot.heatmap(rnorm(10000), rnorm(10000), 
               xbreaks=30, ybreaks=30, 
               contour=FALSE, 
               smooth=0.8,
               palette=r.color.gradient.palette(c("white", r.color(1))))

3.6 Treemap

x = runif(1000)
f = cut(x, breaks = 10)
t = table(f)
segmentosNames <- names(t)
segmentosGroup <- names(t)
segmentosArea <- as.numeric(t)
segmentosColor <- runif(length(segmentosNames))

r.plot.treemap(
  segment=segmentosNames, 
  segmentgroup=segmentosGroup,
  area=segmentosArea, 
  color=segmentosColor,
  colorScaleLeft=rgb(0.8,0.2,0.8), colorScaleCenter="White", colorScaleRight=rgb(0.2,0.2,0.8),
  main="Tree Map")

3.7 Radial plot

x1 <- runif(5)
x2 <- 0.2+0.6*x1+0.2*runif(5)
df <- data.frame(group = c("Blue Collar Communities", "Prospering Suburbs"), matrix(c(x1,x2), nrow = 2, byrow = TRUE))
colnames(df)[2:ncol(df)] <- c("A", "B", "C", "D", "E")
r.plot.radial(df, legend=FALSE)

r.plot.radial(df)

r.ggplot.radial(df)

3.8 Correlation plots

Mcorr = cor(iris[,-5])
r.ggplot.corr(Mcorr)

Mcorr = cor(iris[,rep(1:4,10)])
r.ggplot.corr(Mcorr)

Mcorr = r.plot.matrix.communities(Mcorr)

r.ggplot.corr(Mcorr)

3.9 Matrix & Graph plots

data(UKfaculty, package = "igraphdata")
x = as.matrix(igraph::get.adjacency(UKfaculty))
mat = matrix(runif(400), 20)
r.plot.heatmap(matrixData=mat, contour=FALSE)

r.plot.matrix(mat)

r.plot.matrix(-mat)

r.plot.matrix(mat-0.5)

r.plot.matrix(x)

r.plot.matrix.communities(x)

r.plot.matrix(r.plot.matrix.communities(x))

r.plot.heatmap(matrixData=r.plot.matrix.communities(x), contour=FALSE)

r.plot.graph.text(x, vertexLabelCex=0.5, edgeWidthMax=1.5)

4 Palette

4.1 Gradient Colors

x = runif(10000)-0.5
y = runif(10000)-0.5

r.plot(x, y, col=r.color.gradient(x^2+y^2), alpha=0.4)

r.plot(x, y, col=r.color.gradient(x^2+y^2, levels=3), cex=2)

4.2 Show and Set Palette

r.palette.get()
##  [1] "#6673CCA6" "#CC3333A6" "#1AE61AA6" "#E68000A6" "#CC00CCA6"
##  [6] "#E0DB00A6" "#E60080A6" "#8000FFA6" "#80D900A6" "#0099E6A6"
## [11] "#666666A6" "#1A1A80A6" "#801A1AA6" "#006633A6" "#994D1AA6"
## [16] "#801A80A6" "#80801AA6" "#1A8080A6" "#00E680A6" "#333333A6"
r.palette.show()

r.palette.show(heat.colors(12))

r.palette.show(rainbow(12))

r.palette.show(r.color.gradient.palette(c("red", "blue", "green"), levels=20))

r.plot(1,1,type='p', cex=20)

r.setPalette(rev(rainbow(8)))
r.setColorAlpha(0.4)
r.plot(1,1,type='p', cex=20)

r.palette.restore()
r.plot(1,1,type='p', cex=20)

5 Data Analysis

5.1 Data exploration

df <- airquality
df$Month = factor(df$Month)
df$Day = factor(sample(1:28, nrow(df), replace=TRUE))
r.plot.data(df)

r.export.dataoverview

5.2 Dimensionality Redution

r.plot2D.data(iris[,-5])

r.plot2D.pca(iris[,-5])

theta = runif(100)
data = rbind(
  cbind(0, cos(6*pi*theta), sin(6*pi*theta)),
  cbind(cos(6*pi*theta), 0, sin(6*pi*theta)),
  cbind(cos(6*pi*theta), sin(6*pi*theta), 0))
r.plot2D.data(data)

r.plot2D.pca(data)

r.plot2D.nn(data)

km = kmeans(iris[,-5],3)
r.plot2D.data(iris[,-5], clustModel=km$cluster)

r.plot2D.pca(iris[,-5], clustModel=km$cluster)

r.plot2D.data(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

r.plot2D.pca(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

r.plot2D.nn(iris[,-5])
r.plot2D.nn(iris[,-5], clustModel=km$cluster)
r.plot2D.nn(iris[,-5], clustModel=km$cluster, clustReal=iris[,5])

5.3 Binning

r.plot.burbujas
rmodel::r.segment

5.4 K-means

km = kmeans(iris[,-5],3)
r.plot.kmeans.shapes(iris[,-5], km)

## K-means clustering with 3 clusters of sizes 62, 38, 50
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  5.901612903 2.748387097  4.393548387 1.433870968
## 2  6.850000000 3.073684211  5.742105263 2.071052632
## 3  5.006000000 3.428000000  1.462000000 0.246000000
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2
## [106] 2 1 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 2 1 1 2 2 2 2 2 1 2 2 2 2 1 2
## [141] 2 2 1 2 2 2 1 2 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 39.82096774 23.87947368 15.15100000
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
r.plot.kmeans.shapes(iris[,-5], km, paintCentroids=TRUE)

## K-means clustering with 3 clusters of sizes 62, 38, 50
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  5.901612903 2.748387097  4.393548387 1.433870968
## 2  6.850000000 3.073684211  5.742105263 2.071052632
## 3  5.006000000 3.428000000  1.462000000 0.246000000
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2
## [106] 2 1 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 2 1 1 2 2 2 2 2 1 2 2 2 2 1 2
## [141] 2 2 1 2 2 2 1 2 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 39.82096774 23.87947368 15.15100000
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
r.plot.kmeans.smoothshapes(iris[,-5], km)

5.5 Model performance

x=runif(1000)
y=c(round(0.8*x[1:200]+0.2*runif(200)),round(0.6*x[201:700]+0.4*runif(500)),round(runif(300)))
rmodel::r.performance.metrics(x, y)
##                ACTUAL_0    ACTUAL_1   PREDICTED   PRECISION
## PREDICTED_0         352         140         492     0,71545
## PREDICTED_1         128         380         508     0,74803
## ACTUAL              480         520       1.000     0,73174
## RECALL          0,73333     0,73077     0,73205     0,73200
## 
##        F1     phi
## 1 0,72428 0,46379
## 2 0,73930 0,46379
## 3 0,73179 0,46379
## 
##                        score
## Accuracy               0,732
## Precision            0,74803
## Recall (sensitivity) 0,73077
## specificity          0,73333
## Score F1              0,7393
## Score G              0,73935
## Score Beta            0,7393
## Score Phi (MCC)      0,46379
r.plot.confusionmatrix(x, y)
##                ACTUAL_0    ACTUAL_1   PREDICTED   PRECISION
## PREDICTED_0         352         140         492     0,71545
## PREDICTED_1         128         380         508     0,74803
## ACTUAL              480         520       1.000     0,73174
## RECALL          0,73333     0,73077     0,73205     0,73200

r.plot.F1(x, y)
##        F1     phi
## 1 0,72428 0,46379
## 2 0,73930 0,46379
## 3 0,73179 0,46379

r.plot.roc(x,y)

r.plot.gain(x,y)

r.plot.lift(x,y)

6 Interactive

r.plot(iris$Sepal.Length, iris$Sepal.Width, icol=iris[,5])
r.iplot(iris$Sepal.Length, iris$Sepal.Width, icol=iris[,5])
r.iplot.kmeans.shapes(iris[,-5])
r.iplot.smoothkmeans(iris[,-5])
r.iplot2D.data(iris[,-5], clustReal = iris[,5])
r.iplot2D.pca(iris[,-5], clustReal = iris[,5])

7 Data

randomTimeSeries <- function(n=20, amplitud=1, phase=2*pi*runif(1), velocity=rexp(1), noise=0.05) {
  return(amplitud*sin(phase+velocity*seq(0,2*pi,length.out=n))+noise*amplitud*rnorm(n))
}
matrixTimeSeries100 = cbind(unlist(sapply(1:100, function (x) {randomTimeSeries(200,velocity=1,noise=0)})))
matrixTimeSeries5 = cbind(unlist(sapply(1:5, function (x) {randomTimeSeries(50,velocity=1,noise=0)})))
x_seq = seq(0,10,0.1)
y_sin = sin(x_seq)
y_cos = cos(x_seq)
y_exp = exp(x_seq)
x_unif = runif(500)-0.5
y_unif = runif(500)-0.5
z_unif = runif(500)-0.5
x_norm = rnorm(500)
y_norm = rnorm(500)
z_norm = rnorm(500)
vecNum = round(1+5*runif(80)^3)
vecChar = letters[sample(1:26, 100, replace = TRUE)]
table_2 = table(round(runif(100)^2), round(1-runif(100)^3))
table_3 = table(round(2*runif(120)^2), round(2-2*runif(120)^3))
rownames(table_2) = c("Tipo A", "Tipo B")
colnames(table_2) = c("Tipo A", "Tipo B")
rownames(table_3) = c("Tipo A", "Tipo B", "Tipo C")
colnames(table_3) = c("Tipo A", "Tipo B", "Tipo C")
x_gaussian = rnorm(2000)
x = runif(5000)
y = runif(5000)
z = (x-0.7)^2+(y-0.6)^2
data(volcano)
x = runif(1000)
f = cut(x, breaks = 10)
t = table(f)
segmentosNames <- names(t)
segmentosGroup <- names(t)
segmentosArea <- as.numeric(t)
segmentosColor <- runif(length(segmentosNames))
x1 <- runif(5)
x2 <- 0.2+0.6*x1+0.2*runif(5)
df <- data.frame(group = c("Blue Collar Communities", "Prospering Suburbs"), matrix(c(x1,x2), nrow = 2, byrow = TRUE))
colnames(df)[2:ncol(df)] <- c("A", "B", "C", "D", "E")
Mcorr = cor(iris[,-5])

Mcorr = cor(iris[,rep(1:4,10)])