Correlation Plot

mcor <- cor(mtcars)
library(corrplot)
corrplot(mcor)

Correlation Plot- Customized

corrplot(mcor, method="shade", shade.col=NA, tl.col="black", tl.srt=45)

Generating a lighter palette

col = colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))

corrplot(mcor, method="shade", shade.col=NA, tl.col="black", tl.srt=45,
         col=col(200), addCoef.col="black", addcolorlabel="no", order="AOE")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

# The data frame is only used for setting the range

Plotting a function

library(ggplot2)
p = ggplot(data.frame(x=c(-3,3)), aes(x=x))

p + stat_function(fun = dnorm)

Plotting a t-distribution

p + stat_function(fun=dt, args=list(df=2))

Defining a function

myfun <- function(xvar) {
    1/(1 + exp(-xvar + 10))
}

ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun)

shading a subregion under a curve

# Return dnorm(x) for 0 < x < 2, and NA for all other x
dnorm_limit <- function(x) {
    y <- dnorm(x)
    y[x < 0  |  x > 2] <- NA
    return(y)
}

# ggplot() with dummy data
p <- ggplot(data.frame(x=c(-3, 3)), aes(x=x))

p + stat_function(fun=dnorm_limit, geom="area", fill="blue", alpha=0.2) +
    stat_function(fun=dnorm)

Defining a useful function

limitRange <- function(fun, min, max) {
    function(x) {
        y <- fun(x)
        y[x < min  |  x > max] <- NA
        return(y)
    }
}
  
# This returns a function
dlimit <- limitRange(dnorm, 0, 2)

# Now we'll try out the new function -- it only returns values for inputs
# between 0 and 2
dlimit(-2:4)
## [1]         NA         NA 0.39894228 0.24197072 0.05399097         NA
## [7]         NA
p + stat_function(fun = dnorm) +
    stat_function(fun = limitRange(dnorm, 0, 2),
                  geom="area", fill="blue", alpha=0.2)

Creating a network graph

# May need to install first, with install.packages("igraph")
library(igraph)

# Specify edges for a directed graph
gd <- graph(c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6))
plot(gd)

# For an undirected graph
gu <- graph(c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6), directed=FALSE)
# No labels
plot(gu, vertex.label=NA)

Making the plot predictable

set.seed(229)
plot(gu)

Directed Graph with Fruchterman-Reingold Algorith

library(gcookbook) # For the data set
head(madmen2)
##                  Name1        Name2
## 1          Abe Drexler  Peggy Olson
## 2              Allison   Don Draper
## 3          Arthur Case Betty Draper
## 4 Bellhop in Baltimore   Sal Romano
## 5     Bethany Van Nuys   Don Draper
## 6         Betty Draper   Don Draper
# Create a graph object from the data set
g <- graph.data.frame(madmen2, directed=TRUE)

# Remove unnecessary margins
par(mar=c(0,0,0,0))

plot(g, layout=layout.fruchterman.reingold, vertex.size=8, edge.arrow.size=0.5,
     vertex.label=NA)

Undirected Graph- Circular Layout

g <- graph.data.frame(madmen, directed=FALSE)
par(mar=c(0,0,0,0))  # Remove unnecessary margins
plot(g, layout=layout.circle, vertex.size=8, vertex.label=NA)

Using Text Labels in a Network Graph

library(igraph)
library(gcookbook) # For the data set
# Copy madmen and drop every other row
m <- madmen[1:nrow(madmen) %% 2 == 1, ]
g <- graph.data.frame(m, directed=FALSE)

# Print out the names of each vertex
V(g)$name
##  [1] "Betty Draper"      "Don Draper"        "Harry Crane"      
##  [4] "Joan Holloway"     "Lane Pryce"        "Peggy Olson"      
##  [7] "Pete Campbell"     "Roger Sterling"    "Sal Romano"       
## [10] "Henry Francis"     "Allison"           "Candace"          
## [13] "Faye Miller"       "Megan Calvet"      "Rachel Menken"    
## [16] "Suzanne Farrell"   "Hildy"             "Franklin"         
## [19] "Rebecca Pryce"     "Abe Drexler"       "Duck Phillips"    
## [22] "Playtex bra model" "Ida Blankenship"   "Mirabelle Ames"   
## [25] "Vicky"             "Kitty Romano"
plot(g, layout=layout.fruchterman.reingold,
     vertex.size        = 4,          # Smaller nodes
     vertex.label       = V(g)$name,  # Set the labels
     vertex.label.cex   = 0.8,        # Slightly smaller font
     vertex.label.dist  = 0.4,        # Offset the labels
     vertex.label.color = "black")

Graph with Colored Edge

# This is equivalent to the preceding code
V(g)$size        <- 4
V(g)$label       <- V(g)$name
V(g)$label.cex   <- 0.8
V(g)$label.dist  <- 0.4
V(g)$label.color <- "black"

# Set a property of the entire graph
g$layout <- layout.fruchterman.reingold

plot(g)

# View the edges
E(g)
## Edge sequence:
##                                            
## [1]  Henry Francis     -- Betty Draper     
## [2]  Allison           -- Don Draper       
## [3]  Don Draper        -- Betty Draper     
## [4]  Candace           -- Don Draper       
## [5]  Faye Miller       -- Don Draper       
## [6]  Megan Calvet      -- Don Draper       
## [7]  Rachel Menken     -- Don Draper       
## [8]  Suzanne Farrell   -- Don Draper       
## [9]  Hildy             -- Harry Crane      
## [10] Franklin          -- Joan Holloway    
## [11] Roger Sterling    -- Joan Holloway    
## [12] Rebecca Pryce     -- Lane Pryce       
## [13] Abe Drexler       -- Peggy Olson      
## [14] Duck Phillips     -- Peggy Olson      
## [15] Pete Campbell     -- Peggy Olson      
## [16] Playtex bra model -- Pete Campbell    
## [17] Ida Blankenship   -- Roger Sterling   
## [18] Mirabelle Ames    -- Roger Sterling   
## [19] Vicky             -- Roger Sterling   
## [20] Kitty Romano      -- Sal Romano
# Set some of the labels to "M"
E(g)[c(2,11,19)]$label <- "M"

# Set color of all to grey, and then color a few red
E(g)$color             <- "grey70"
E(g)[c(2,11,19)]$color <- "red"

plot(g)

Creating a Heat Map

head(presidents,3)
## [1] NA 87 82
str(presidents)
##  Time-Series [1:120] from 1945 to 1975: NA 87 82 75 63 50 43 32 35 60 ...
#Convert it into data.frame
pres_rating <- data.frame(
    rating  = as.numeric(presidents),
    year    = as.numeric(floor(time(presidents))),
    quarter = as.numeric(cycle(presidents))
)

head(pres_rating,3)
##   rating year quarter
## 1     NA 1945       1
## 2     87 1945       2
## 3     82 1945       3
# Base plot
p <- ggplot(pres_rating, aes(x=year, y=quarter, fill=rating))

# Using geom_tile()
p + geom_tile()

# Using geom_raster() # looks the same, but a little more efficient
p + geom_raster()

#Customizing the heat map    
p + geom_tile() +
    scale_x_continuous(breaks = seq(1940, 1976, by = 4)) +
    scale_y_reverse() +
    scale_fill_gradient2(midpoint=50, mid="grey70", limits=c(0,100))