require(ggplot2)
## Loading required package: ggplot2
library(tabplot)
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
## 
## Attaching package: 'bit'
## The following object is masked from 'package:base':
## 
##     xor
## Loading required package: ff
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/carlos/AppData/Local/Temp/RtmpsXxt8h"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush"  -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==34309406.72 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==1715470336 -- consider a different value for tuning your system
## 
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
## 
##     clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
## 
##     write.csv, write.csv2
## The following objects are masked from 'package:base':
## 
##     is.factor, is.ordered
## Loading required package: ffbase
## 
## Attaching package: 'ffbase'
## The following objects are masked from 'package:ff':
## 
##     [.ff, [.ffdf, [<-.ff, [<-.ffdf
## The following objects are masked from 'package:base':
## 
##     %in%, table
## Standard deviations are plot by default. See argument numMode of plot.tabplot.
#Un tableplot simplemente se crea con la función tableplot.
#De forma predeterminada, se muestran todas las variables del conjunto de datos. 
#Con el argumento select, podemos especificar qué variables se trazan. 

data(diamonds)
## add some NA's
is.na(diamonds$price) <- diamonds$cut == "Ideal"
is.na(diamonds$cut) <- (runif(nrow(diamonds)) > 0.8)

tableplot(diamonds)

#The tableplot below consists of five columns, where the data is sorted on price.
#Notice that the missing values that we have added are placed at the bottom and 
#(by default) shown in a bright red color.

tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price)

#Zooming
#We can focus our attention to the 5% most expensive diamonds by setting
#the from argument to 0 and the to argument to 5.
 
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price, 
          from = 0, to = 5)

#Filtering

#The argument subset serves as a data filter. The tableplot in the following 
#figure shows that data of premium cut diamonds that cost less than 5000$.

tableplot(diamonds, subset = price < 5000 & cut == "Premium")

#Variables categóricas

tableplot(diamonds, pals = list(cut="Set1(6)", color="Set5", clarity=rainbow(8)))

#High cardinality data

#To illustrate how tableplots deal with high cardinality data, we extend the 
#diamonds dataset with a convenient wrapper function num2fac:
  
diamonds$carat_class <- num2fac(diamonds$carat, n=20)
## Loading required namespace: classInt
diamonds$price_class <- num2fac(diamonds$price, n=100)
## Warning in classInt::classIntervals(num, n = n, style = method): var has
## missing values, omitted in finding classes
tableplot(diamonds, select=c(carat, price, carat_class, price_class))

#Preprocesando Big data

# create large dataset

large_diamonds <- diamonds[rep(seq.int(nrow(diamonds)), 10),]

system.time({
  p <- tablePrepare(large_diamonds)
})
##    user  system elapsed 
##    2.10    0.57    3.00
system.time({
  tableplot(p, plot=FALSE)
})
##    user  system elapsed 
##    0.38    0.14    0.56
system.time({
  tableplot(p, sortCol=price, nBins=200, plot=FALSE)
})
##    user  system elapsed 
##    0.46    0.14    0.68
#Although the first step takes a couple of seconds on a moderate desktop computer, 
#the processing time to create a tableplot from the intermediate result, object p, 
#is very short in comparison to the direct approach:

system.time({
  tableplot(large_diamonds, plot=FALSE)
})
##    user  system elapsed 
##    2.09    0.73    3.19
system.time({
  tableplot(large_diamonds, sortCol=price, nBins=200, plot=FALSE)
})
##    user  system elapsed 
##    2.17    0.70    3.39
#Sampling

system.time({
  tableplot(p, sample=TRUE)
})

##    user  system elapsed 
##    0.78    0.37    1.23
#Compare tableplots (experimental)

#It is possible to compare two datasets, for instance two samples, 
#two versions of a dataset, or datasets from two different time periods.

# calculate normalized carats to be used as sample probabilities
carat.norm <- with(diamonds, carat / max(diamonds$carat))

# draw samples
exp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=carat.norm, replace=TRUE),]
chp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=1-carat.norm, replace=TRUE),]

tp1 <- tableplot(exp.diamonds, plot=FALSE)
tp2 <- tableplot(chp.diamonds, plot=FALSE)

plot(tp2 - tp1)
## Independence of the compared tableplots assumed for the calculation of the standard deviations.

#This comparison tableplot shows per bin the difference in mean value for each numeric variable, 
#and for each categorical variable a two-sided stacked bar chart to indicate the differences in 
#fractions per category: left-side bars indicate that more items are contained in tp1, right-side 
#bars indicate that more items are contained in tp2, and if bars are not plotted, the fraction of
#items is unchanged. Relative mean values can be plot with relative=TRUE.

#Note: The objects tp1 and tp2 are tabplot-objects (see below). If they are substracted from 
#each other, a tabplot_compare-object is returned.


tab <- tableplot(diamonds, plot = FALSE)

summary(tab)
##               general               variable1      
##  dataset          :diamonds   name       :carat    
##  variables        :12         type       :numeric  
##  sortCol          :1          scale_init :auto     
##  decreasing       :TRUE       scale_final:lin      
##  from             :0%                              
##  to               :100%                            
##  objects.sample   :53940                           
##  objects.full.data:53940                           
##  bins             :100                             
##       variable2                variable3          
##  name      :cut           name      :color        
##  type      :categorical   type      :categorical  
##  categories:6             categories:8            
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##       variable4                 variable5             variable6      
##  name      :clarity       name       :depth     name       :table    
##  type      :categorical   type       :numeric   type       :numeric  
##  categories:9             scale_init :auto      scale_init :auto     
##                           scale_final:lin       scale_final:lin      
##                                                                      
##                                                                      
##                                                                      
##                                                                      
##                                                                      
##        variable7             variable8             variable9      
##  name       :price     name       :x         name       :y        
##  type       :numeric   type       :numeric   type       :numeric  
##  scale_init :auto      scale_init :auto      scale_init :auto     
##  scale_final:lin       scale_final:lin       scale_final:lin      
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##        variable10           variable11               variable12         
##  name       :z         name      :carat_class   name      :price_class  
##  type       :numeric   type      :categorical   type      :categorical  
##  scale_init :auto      categories:26            categories:51           
##  scale_final:lin                                                        
##                                                                         
##                                                                         
##                                                                         
##                                                                         
## 
plot(tab)

tableplot(diamonds, select = 1:7, fontsize = 14, legend.lines = 8,
          title = "Shine on you crazy Diamond", fontsize.title = 18)

tab2 <- tableChange(tab, select_string = c("carat", "price", "cut", "color", 
                                           "clarity"), pals = list(cut="Set1(2)"))
plot(tab2)

#Guardar tableplot
tableSave(tab, filename = "diamonds.png", width = 5, height = 3, fontsize = 6, legend.lines = 6)