PARAMETRIZED KNITTING

setwd('D:/DROPBOX UNIVERSAL/Dropbox/R COMPLETE/DEVELOPING DATA PRODUCTS/09_DevelopingDataProducts/rCharts/')
detach_package <- function(pkg, character.only = FALSE)
{
  if(!character.only)
  {
    pkg <- deparse(substitute(pkg))
  }
  search_item <- paste("package", pkg, sep = ":")
  while(search_item %in% search())
  {
    detach(search_item, unload = TRUE, character.only = TRUE)
  }
}

RequireOrInstall <- function(packages) {
    packages <- strsplit(packages,',')
    for(package in packages){
    suppressPackageStartupMessages({
      if (!require(package,character.only=TRUE)) {
        install.packages(package, dep=TRUE)
        require(package,character.only=TRUE)  
      }})
  }
}

RequireOrInstall(c("ggplot2","rCharts","plotly","plyr","dplyr"))
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'plotly' was built under R version 3.2.5
## Warning: package 'plyr' was built under R version 3.2.5
# make this an external chunk that can be included in any file
library(knitr)
opts_chunk$set(message = F, error = F, warning = F, comment = NA, fig.align = 'center', dpi = 100,  cache.path = '.cache/', fig.path = 'fig/')

options(xtable.type = 'html')
knit_hooks$set(inline = function(x) {
  if(is.numeric(x)) {
    round(x, getOption('digits'))
  } else {
    paste(as.character(x), collapse = ', ')
  }
})
knit_hooks$set(plot = knitr:::hook_plot_html)
data(mtcars)
RequireOrInstall("rCharts")
p1 <- nPlot(mpg ~ wt, group = 'cyl', data = mtcars, type = 'scatterChart')
p1$save('fig/p1.html', cdn = TRUE)
cat('<iframe src="fig/p1.html" width=100%, height=600></iframe>')
p1
RequireOrInstall("rCharts")
hair_eye = as.data.frame(HairEyeColor)
p2 <- nPlot(Freq ~ Hair, group = 'Eye', data = subset(hair_eye, Sex == "Female"), type = 'multiBarChart')
p2$save('fig/p2.html', cdn = TRUE)
cat('<iframe src="fig/p2.html" width=100%, height=600></iframe>')
p2

rCharts

  • rCharts is a way to create interactive javascript visualizations using R
  • So
  • You don’t have to learn complex tools, like D3
  • You simply work in R learning a minimal amount of new syntax
  • rCharts was written by Ramnath Vaidyanathan (friend of the Data Science Series), who also wrote slidify, the framework we use for all of the lectures in the class
  • This lecture is basically going through (http://ramnathv.github.io/rCharts/)

Jitter plot of Diamonds

data(iris)
library("ggplot2")
head(iris)
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa
cutpoints <- quantile(iris$Sepal.Length, seq(0,1,length=8),na.rm=TRUE)
iris$Sep <- cut(iris$Sepal.Length,cutpoints)
ggplot(data=iris,aes(Sepal.Length,Species,color=Sep))+geom_jitter(height=0.2)
RequireOrInstall("datasets")
RequireOrInstall("caret")
data(BloodBrain)

numericset_names <- names(bbbDescr[,sapply(bbbDescr,is.numeric)|sapply(bbbDescr,is.integer)])

numericset <- bbbDescr[,sapply(bbbDescr,is.numeric)|sapply(bbbDescr,is.integer)]
highCor_feat <- names(numericset[,findCorrelation(abs(cor(numericset)),0.8)])
highcor_bbb <-  bbbDescr[,highCor_feat][,1:6]
names(subset(bbbDescr, select = names(bbbDescr) != highCor_feat))
  [1] "tpsa"                 "nbasic"               "negative"            
  [4] "vsa_hyd"              "a_aro"                "weight"              
  [7] "peoe_vsa.0"           "peoe_vsa.1"           "peoe_vsa.2"          
 [10] "peoe_vsa.3"           "peoe_vsa.4"           "peoe_vsa.5"          
 [13] "peoe_vsa.6"           "peoe_vsa.0.1"         "peoe_vsa.1.1"        
 [16] "peoe_vsa.2.1"         "peoe_vsa.3.1"         "peoe_vsa.4.1"        
 [19] "peoe_vsa.5.1"         "peoe_vsa.6.1"         "a_acc"               
 [22] "a_acid"               "a_base"               "vsa_acc"             
 [25] "vsa_acid"             "vsa_base"             "vsa_don"             
 [28] "vsa_other"            "vsa_pol"              "slogp_vsa0"          
 [31] "slogp_vsa1"           "slogp_vsa2"           "slogp_vsa3"          
 [34] "slogp_vsa4"           "slogp_vsa5"           "slogp_vsa6"          
 [37] "slogp_vsa7"           "slogp_vsa8"           "slogp_vsa9"          
 [40] "smr_vsa0"             "smr_vsa1"             "smr_vsa2"            
 [43] "smr_vsa3"             "smr_vsa4"             "smr_vsa5"            
 [46] "smr_vsa6"             "smr_vsa7"             "tpsa.1"              
 [49] "logp.o.w."            "frac.anion7."         "frac.cation7."       
 [52] "andrewbind"           "rotatablebonds"       "mlogp"               
 [55] "clogp"                "mw"                   "nocount"             
 [58] "hbdnr"                "rule.of.5violations"  "alert"               
 [61] "prx"                  "ub"                   "pol"                 
 [64] "inthb"                "adistm"               "adistd"              
 [67] "polar_area"           "nonpolar_area"        "psa_npsa"            
 [70] "tcsa"                 "tcpa"                 "tcnp"                
 [73] "ovality"              "surface_area"         "volume"              
 [76] "most_negative_charge" "most_positive_charge" "sum_absolute_charge" 
 [79] "dipole_moment"        "homo"                 "lumo"                
 [82] "hardness"             "ppsa1"                "ppsa2"               
 [85] "ppsa3"                "pnsa1"                "pnsa2"               
 [88] "pnsa3"                "fpsa1"                "fpsa2"               
 [91] "fpsa3"                "fnsa1"                "fnsa2"               
 [94] "fnsa3"                "wpsa1"                "wpsa2"               
 [97] "wpsa3"                "wnsa1"                "wnsa2"               
[100] "wnsa3"                "dpsa1"                "dpsa2"               
[103] "dpsa3"                "rpcg"                 "rncg"                
[106] "wpcs"                 "wncs"                 "sadh1"               
[109] "sadh2"                "sadh3"                "chdh1"               
[112] "chdh2"                "chdh3"                "scdh1"               
[115] "scdh2"                "scdh3"                "saaa1"               
[118] "saaa2"                "saaa3"                "chaa1"               
[121] "chaa2"                "chaa3"                "scaa1"               
[124] "scaa2"                "scaa3"                "ctdh"                
[127] "ctaa"                 "mchg"                 "achg"                
[130] "rdta"                 "n_sp2"                "n_sp3"               
[133] "o_sp2"                "o_sp3"               
# negative subsetting using the same syntax
featurePlot(x=highcor_bbb, y=bbbDescr$surface_area,plot = "pairs", xlab="Most Important Variables",ylab="Most_neg_charge")

Compare with Plotly options

set.seed(100)
RequireOrInstall("plotly")

d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x = carat, y = price, text = paste("Clarity: ", clarity),
        mode = "markers", color = carat, size = carat)

Where is the diamonds smoothed faceting?

data(diamonds)
qplot(carat,price,data=diamonds,shape=cut,facets=.~clarity)+geom_smooth(method="lm") 

Where is the simulated Rvar map?

# s is the variance-covariance matrix for a multivariate normal distribution
RequireOrInstall("plotly")
s <- matrix(c(1, .5, .5,
              .5, 1, .5,
              .5, .5, 1), ncol = 3)
# use the mvtnorm package to sample 200 observations
obs <- mvtnorm::rmvnorm(200, sigma = s)
# collect everything in a data-frame
df <- setNames(data.frame(obs), c("x", "y", "z"))

plot_ly(df, x = x, y = y, z = z, type = "scatter3d", mode = "markers")

Where is the Weibull comparison map?

RequireOrInstall("plotly")
# comparing weibull distributed samples
wei <- plot_ly(y = rweibull(50,2,2), type = "box") %>%
  add_trace(y = rweibull(50, 1,1), type = "box")
wei
##############
data(diamonds)
quantile(diamonds$price) # compare quantile function with boxplots
      0%      25%      50%      75%     100% 
  326.00   950.00  2401.00  5324.25 18823.00 
diaplot <- plot_ly(data=diamonds,x=cut,y=price,type="box")
diaplot

HORIZONTAL BOXPLOTS

unemploy is integer variable (discrete),

data(iris)
boxplot(iris$Sepal.Length ~iris$Sepal.Width)

nPlot – normal calling by just the plot name should work in Markdown

Just in case use also n1$print('chart1') – no more options, these will surely be enough!

RequireOrInstall("rCharts")

haireye = as.data.frame(HairEyeColor)
n1 <- nPlot(Freq ~ Hair, group = 'Eye', type = 'multiBarChart',
  data = subset(haireye, Sex == 'Male')
)

#n1$print('chart1')
n1$save('fig/n1.html', cdn = TRUE)
cat('<iframe src="fig/n1.html" width=100%, height=600></iframe>')
n1
#cat('<iframe src="fig/n1.html" width=100%, height=600></iframe>')

Where is this facet plot of mileage?

RequireOrInstall("ggplot2")
data(mpg)
g <- ggplot(data=mpg, aes(x=displ,y=hwy,color=factor(year)))
g+geom_point()+geom_smooth(method = "lm")+facet_grid(drv~cyl)+ggtitle("Swirl
Rules!") +labs(x = "Displacement", y = "Hwy Mileage")

Slidify interactive

  • The above was an example of embedding an rChart in a slidify document
  • In the YAML yaml ext_widgets : {rCharts: ["libraries/nvd3"]}
  • Or, if you use more than one library
  • YAML example yaml ext_widgets : {rCharts: ["libraries/highcharts", "libraries/nvd3", "libraries/morris"]}

Viewing the plot

  • The object n1 contains the plot
  • In RStudio, typing n1 brings up the plot in the RStudio viewer (or you can just not assign it to an object)
  • Do n1$ then hit TAB to see the various functions contained in the object
  • n1$html() prints out the html for the plot
  • I do n1$save(filename) then bring the code back into slidify document
  • This is recommended for slidify, but if you’re just looking at the plot, it’s unnecessary

Deconstructing another example

## Example 1 Facetted Scatterplot
names(iris) = gsub("\\.", "", names(iris))
r1 <- rPlot(SepalLength ~ SepalWidth | Species, data = iris, color = 'Species', type = 'point')
r1$save('fig/r1.html', cdn = TRUE)
cat('<iframe src="fig/r1.html" width=100%, height=600></iframe>')

Example 1: facetted barplot

data(iris)
## Example 1 Facetted Scatterplot
names(iris) = gsub("\\.", "", names(iris))
# rPlot is a shorthand for plots using Polychart
RequireOrInstall("rCharts")

r1 <- rPlot(SepalLength ~ SepalWidth | Species, data = iris, color = 'Species', type = 'point')

#r1$print('chart1')
r1$save('fig/r1.html', cdn = TRUE)
#cat('<iframe src="fig/r1.html" width=100%, height=600></iframe>')

cat('<iframe src="fig/r1.html" width=100%, height=600></iframe>')
r1

Example 2 Facetted Barplot

RequireOrInstall("rCharts")
hair_eye = as.data.frame(HairEyeColor)
r2 <- rPlot(Freq ~ Hair | Eye, color = 'Eye', data = hair_eye, type = 'bar')

r2$save('fig/r2.html', cdn = TRUE)
#cat('<iframe src="fig/r2.html" width=100%, height=600></iframe>')

cat('<iframe src="fig/r2.html" width=100%, height=600></iframe>')
r2

Rickshaw run

RequireOrInstall("plotly")
usp = reshape2::melt(USPersonalExpenditure)
# get the decades into a date Rickshaw likes
usp$Var2 <- as.numeric(as.POSIXct(paste0(usp$Var2, "-01-01")))
p4 <- Rickshaw$new()
p4$layer(value ~ Var2, group = "Var1", data = usp, type = "area", width = 560)
# add a helpful slider this easily; other features TRUE as a default
p4$set(slider = TRUE)
cat('<iframe src="fig/p4.html" width=100%, height=600></iframe>')
p4

rCharts summarized

  • rCharts makes creating interactive javascript visualizations in R ridiculously easy
  • However, non-trivial customization is going to require knowledge of javascript
  • If what you want is not too big of a deviation from the rCharts examples, then it’s awesome
  • Otherwise, it’s challenging to extend without fairly deep knowledge of the JS libraries that it’s calling.
  • rCharts is under fairly rapid development
RequireOrInstall("googleVis")
M <- gvisMotionChart(Fruits, "Fruit", "Year",
                     options=list(width=600, height=400))
print(M, tag = 'chart')

Combining multiple plots together

#options(gvis.print.tag='chart')
RequireOrInstall("googleVis")
G <- gvisGeoChart(Exports, "Country", "Profit",options=list(width=200, height=100))
T1 <- gvisTable(Exports,options=list(width=200, height=270))
M <- gvisMotionChart(Fruits, "Fruit", "Year", options=list(width=400, height=370))
GT <- gvisMerge(G,T1, horizontal=FALSE)
GTM <- gvisMerge(GT, M, horizontal=TRUE,tableOptions="bgcolor=\"#CCCCCC\" cellspacing=10")
print(GTM, tag = 'chart')
# US map small multiples

df <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/1962_2006_walmart_store_openings.csv')

# common map properties
g <- list(scope = 'usa', showland = T, landcolor = toRGB("gray90"), showcountries = F, subunitcolor = toRGB("white"))

# year text labels
yrs <- unique(df$YEAR)
id <- seq_along(yrs)
df2 <- data.frame(
  YEAR = yrs,
  id = id
)

# id for anchoring traces on different plots
df$id <- as.integer(factor(df$YEAR))

p <- plot_ly(df, type = 'scattergeo', lon = LON, lat = LAT, group = YEAR,
             geo = paste0("geo", id), showlegend = F,
             marker = list(color = toRGB("blue"), opacity = 0.5)) %>%
  add_trace(lon = -78, lat = 47, mode = 'text', group = YEAR, type = 'scattergeo', showlegend = F,
            geo = paste0("geo", id), text = YEAR, data = df2) %>%
  layout(title = 'New Walmart Stores per year 1962-2006<br> Source: <a href="http://www.econ.umn.edu/~holmes/data/WalMart/index.html">University of Minnesota</a>',
         geo = g,
         autosize = F,
         width = 1000,
         height = 900,
         hovermode = F)

subplot(p, nrows = 9)