Basic Visualization
- plot (relationship)
- Histogram (distribution)
- Bar plot and pie chart
- Boxplot
- qqnorm, qqline, qqplot (distribution comparision)
Advanced Visualization
- ggplot
- Heat Map
- Map Visualizadion
- 3D Graphs
Yujiao Li
a <- c(1, 2, 4)
b <- c(2, 5, 6)
plot(a, b)

# important arguments of plot function
plot(x = a, y = b, type = "b",
pch = 14, lwd = 2, lty = 2, col = "red",
main = "Correlation", sub = "Figure 1",
xlab = "a value", ylab = "b value", xlim = c(0,5), ylim = c(-1,10))
# add text
text(x = a[1], y = b[1], labels = "First", cex = 2)

# add multiple lines
plot(x = a, y = b, col = "red", type = "b", xlim = c(0,5), ylim = c(-1,10))
# method 1: par(new = T) before the next figure
par(new = T)
plot(x = a, y = a, col = "blue", type = "b", xlim = c(0,5), ylim = c(-1,10), xlab = "", ylab = "")
# method 2: lines()
lines(x = b, y = b, col = "black", type = "b", xlim = c(0,5), ylim = c(-1,10))
# add legend
legend("topright", col = c("red", "blue", "black"), lty = c(1,1,1), legend = c("1","2","3"))

Plot the following two curves:

Produce the following figure

# car::scatterplot()
library(car)
scatterplot(prestige ~ income, data = Prestige)

pairs(x = iris[,1:4])
pairs(formula = ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris,
pch = 22, bg = c("red", "green3", "blue")[unclass(iris$Species)])

distance <- cars$dist
hist(distance, breaks = 20, col = "gray", freq = F)
lines(density(distance))
abline(v = mean(distance), col = "red")
abline(v = quantile(distance, c(0.025,0.975)), lty = 2, col = "blue")

Generate the normal distribution by following rnorm() function, then
set.seed(10)
numbers <- rnorm(n = 100, mean = 0, sd = 1)

par(mfrow = c(1,2)) #layout of multiple figures
barplot(height = VADeaths)
pie(x = c(1,3,4,2), label = letters[1:4], col = c("red","green","blue","pink"))

par(mfrow = c(1,2))
boxplot(ToothGrowth$len)
boxplot(len ~ dose, data = ToothGrowth )

layout(matrix(1:4,2,2), widths = c(1,2), heights = c(2,1))
for (i in 1:4) {plot(x = 1, y = i, main = i)}

Quantile-Quantile plot
a <- rnorm(n = 100, mean = 1, sd = 2)
b <- runif(n = 100, min = 0, max = 1)
par(mfrow = c(1,3))
qqnorm(a); qqline(a)
qqnorm(b); qqline(b)
qqplot(a, b)

After the pdf() command, graphs are redirected to file test.pdf.
pdf(file = "test.pdf"); plot(1); dev.off()
# Works for all common formats similarly: jpeg, png, ps, tiff, ...
jpeg(filename = "test.jpeg"); plot(2); dev.off()
getwd() # check working file path
dir() # show files in current path
setwd() # change working file path
dir.create() # create new folder
file.remove() # remove files
produce boxplots for every variable in dataset "mtcars", and respectively save them as "*.jpeg" files where asterisk * is corresponding variable name.

head(mtcars, 3)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
library(ggplot2)
baseplot <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width))
p1 <- baseplot + geom_point()
p2 <- baseplot + geom_line()
library(gridExtra) #layout package for ggplots
p_all <- grid.arrange(p1, p2, ncol = 2, nrow = 1)

ggsave("p_all.jpeg", p_all) #save ggplot
baseplot + geom_point(size = 3)

baseplot + geom_point(aes(size = Species))
## Warning: Using size for a discrete variable is not advised.

baseplot + geom_point(color = "red")

baseplot + geom_point(aes(color = Species))

baseplot + geom_point(shape = 12)

baseplot + geom_point(aes(shape = Species))

baseplot + geom_point(aes(size = Species,
color = Species,
shape = Species))
## Warning: Using size for a discrete variable is not advised.

ggplot(data=iris, aes(x=Sepal.Length, y=Sepal.Width,
color=Species, shape = Species)) +
geom_point(size=3)

Use below dataset "data_diamond" to produce scatter plot between diamond's carat and price where the points are colored by diamonds's color variable.
data_diamond <- diamonds[sample(1:nrow(diamonds),1000),]

ggplot(iris, aes(x = Species,y = Sepal.Length)) + geom_boxplot()

P1 <- ggplot(iris, aes(Sepal.Length)) + geom_histogram(color = "blue", fill = "pink")
P2 <- ggplot(iris, aes(Sepal.Length)) + geom_density(fill = "lightyellow")
P3 <- ggplot(iris, aes(Species)) + geom_bar(fill = "lightgreen")
grid.arrange(P1, P2, P3, ncol = 3, nrow = 1)

Plots can also have facets to make lattice plots
ggplot(iris, aes(Sepal.Length)) + geom_histogram() + facet_grid(.~ Species )

Use stat_smooth() to add a linear fit
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point(color = "red") +
stat_smooth(method = "lm")


ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() +
labs(title = "Iris\nWidth~Length", x = "length", y = "Width") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(plot.title=element_text(hjust=0.5))

library(googleVis)
M1 <- gvisMotionChart(Fruits, idvar = 'Fruit', timevar = 'Year')
plot(M1)
library(googleVis)
CairoCal <- gvisCalendar(Cairo, datevar="Date", numvar="Temp")
plot(CairoCal)
library(googleVis)
Bubble <- gvisBubbleChart(Fruits, idvar="Fruit", xvar="Sales", yvar="Expenses",
colorvar="Year", sizevar="Profit")
plot(Bubble)
library(networkD3)
Source <- c("A", "A", "A", "A", "B", "B", "C", "C", "D")
Target <- c("B", "C", "D", "J", "E", "F", "G", "H", "I")
NetworkData <- data.frame(Source, Target)
simpleNetwork(NetworkData)

https://www.buzzfeed.com/johntemplon/help-us-map-trumpworld?utm_term=.hjw0mR8lV#.lnZRxarAG http://rpubs.com/liyujiao1026/TrumpFamily
hc <- hclust(dist(USArrests))
dendroNetwork(hc, height = 600)
