require(ggplot2)
## Loading required package: ggplot2
library(tabplot)
## Loading required package: bit
## Attaching package bit
## package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
## creators: bit bitwhich
## coercion: as.logical as.integer as.bit as.bitwhich which
## operator: ! & | xor != ==
## querying: print length any all min max range sum summary
## bit access: length<- [ [<- [[ [[<-
## for more help type ?bit
##
## Attaching package: 'bit'
## The following object is masked from 'package:base':
##
## xor
## Loading required package: ff
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/carlos/AppData/Local/Temp/RtmpsXxt8h"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush" -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==34309406.72 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==1715470336 -- consider a different value for tuning your system
##
## Attaching package: 'ff'
## The following objects are masked from 'package:bit':
##
## clone, clone.default, clone.list
## The following objects are masked from 'package:utils':
##
## write.csv, write.csv2
## The following objects are masked from 'package:base':
##
## is.factor, is.ordered
## Loading required package: ffbase
##
## Attaching package: 'ffbase'
## The following objects are masked from 'package:ff':
##
## [.ff, [.ffdf, [<-.ff, [<-.ffdf
## The following objects are masked from 'package:base':
##
## %in%, table
## Standard deviations are plot by default. See argument numMode of plot.tabplot.
#Un tableplot simplemente se crea con la función tableplot.
#De forma predeterminada, se muestran todas las variables del conjunto de datos.
#Con el argumento select, podemos especificar qué variables se trazan.
data(diamonds)
## add some NA's
is.na(diamonds$price) <- diamonds$cut == "Ideal"
is.na(diamonds$cut) <- (runif(nrow(diamonds)) > 0.8)
tableplot(diamonds)

#The tableplot below consists of five columns, where the data is sorted on price.
#Notice that the missing values that we have added are placed at the bottom and
#(by default) shown in a bright red color.
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price)

#Zooming
#We can focus our attention to the 5% most expensive diamonds by setting
#the from argument to 0 and the to argument to 5.
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price,
from = 0, to = 5)

#Filtering
#The argument subset serves as a data filter. The tableplot in the following
#figure shows that data of premium cut diamonds that cost less than 5000$.
tableplot(diamonds, subset = price < 5000 & cut == "Premium")

#Variables categóricas
tableplot(diamonds, pals = list(cut="Set1(6)", color="Set5", clarity=rainbow(8)))

#High cardinality data
#To illustrate how tableplots deal with high cardinality data, we extend the
#diamonds dataset with a convenient wrapper function num2fac:
diamonds$carat_class <- num2fac(diamonds$carat, n=20)
## Loading required namespace: classInt
diamonds$price_class <- num2fac(diamonds$price, n=100)
## Warning in classInt::classIntervals(num, n = n, style = method): var has
## missing values, omitted in finding classes
tableplot(diamonds, select=c(carat, price, carat_class, price_class))

#Preprocesando Big data
# create large dataset
large_diamonds <- diamonds[rep(seq.int(nrow(diamonds)), 10),]
system.time({
p <- tablePrepare(large_diamonds)
})
## user system elapsed
## 2.10 0.57 3.00
system.time({
tableplot(p, plot=FALSE)
})
## user system elapsed
## 0.38 0.14 0.56
system.time({
tableplot(p, sortCol=price, nBins=200, plot=FALSE)
})
## user system elapsed
## 0.46 0.14 0.68
#Although the first step takes a couple of seconds on a moderate desktop computer,
#the processing time to create a tableplot from the intermediate result, object p,
#is very short in comparison to the direct approach:
system.time({
tableplot(large_diamonds, plot=FALSE)
})
## user system elapsed
## 2.09 0.73 3.19
system.time({
tableplot(large_diamonds, sortCol=price, nBins=200, plot=FALSE)
})
## user system elapsed
## 2.17 0.70 3.39
#Sampling
system.time({
tableplot(p, sample=TRUE)
})

## user system elapsed
## 0.78 0.37 1.23
#Compare tableplots (experimental)
#It is possible to compare two datasets, for instance two samples,
#two versions of a dataset, or datasets from two different time periods.
# calculate normalized carats to be used as sample probabilities
carat.norm <- with(diamonds, carat / max(diamonds$carat))
# draw samples
exp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=carat.norm, replace=TRUE),]
chp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=1-carat.norm, replace=TRUE),]
tp1 <- tableplot(exp.diamonds, plot=FALSE)
tp2 <- tableplot(chp.diamonds, plot=FALSE)
plot(tp2 - tp1)
## Independence of the compared tableplots assumed for the calculation of the standard deviations.

#This comparison tableplot shows per bin the difference in mean value for each numeric variable,
#and for each categorical variable a two-sided stacked bar chart to indicate the differences in
#fractions per category: left-side bars indicate that more items are contained in tp1, right-side
#bars indicate that more items are contained in tp2, and if bars are not plotted, the fraction of
#items is unchanged. Relative mean values can be plot with relative=TRUE.
#Note: The objects tp1 and tp2 are tabplot-objects (see below). If they are substracted from
#each other, a tabplot_compare-object is returned.
tab <- tableplot(diamonds, plot = FALSE)
summary(tab)
## general variable1
## dataset :diamonds name :carat
## variables :12 type :numeric
## sortCol :1 scale_init :auto
## decreasing :TRUE scale_final:lin
## from :0%
## to :100%
## objects.sample :53940
## objects.full.data:53940
## bins :100
## variable2 variable3
## name :cut name :color
## type :categorical type :categorical
## categories:6 categories:8
##
##
##
##
##
##
## variable4 variable5 variable6
## name :clarity name :depth name :table
## type :categorical type :numeric type :numeric
## categories:9 scale_init :auto scale_init :auto
## scale_final:lin scale_final:lin
##
##
##
##
##
## variable7 variable8 variable9
## name :price name :x name :y
## type :numeric type :numeric type :numeric
## scale_init :auto scale_init :auto scale_init :auto
## scale_final:lin scale_final:lin scale_final:lin
##
##
##
##
##
## variable10 variable11 variable12
## name :z name :carat_class name :price_class
## type :numeric type :categorical type :categorical
## scale_init :auto categories:26 categories:51
## scale_final:lin
##
##
##
##
##
plot(tab)

tableplot(diamonds, select = 1:7, fontsize = 14, legend.lines = 8,
title = "Shine on you crazy Diamond", fontsize.title = 18)

tab2 <- tableChange(tab, select_string = c("carat", "price", "cut", "color",
"clarity"), pals = list(cut="Set1(2)"))
plot(tab2)

#Guardar tableplot
tableSave(tab, filename = "diamonds.png", width = 5, height = 3, fontsize = 6, legend.lines = 6)