##   Sepal.Length       Sepal.Width       Petal.Length      Petal.Width     
##  Min.   :-1.86378   Min.   :-2.4258   Min.   :-1.5623   Min.   :-1.4422  
##  1st Qu.:-0.89767   1st Qu.:-0.5904   1st Qu.:-1.2225   1st Qu.:-1.1799  
##  Median :-0.05233   Median :-0.1315   Median : 0.3354   Median : 0.1321  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.67225   3rd Qu.: 0.5567   3rd Qu.: 0.7602   3rd Qu.: 0.7880  
##  Max.   : 2.48370   Max.   : 3.0805   Max.   : 1.7799   Max.   : 1.7064  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
mxs <- apply(select(iris,-Species), 2, max, na.rm=TRUE)
mns <- apply(select(iris,-Species), 2, min, na.rm=TRUE)
iris.norm <- cbind(scale(select(iris,-Species), center=mns, scale=mxs-mns),
                   select(iris,Species))
summary(iris.norm)
##   Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
##  Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
##  Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
##  3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
library(Hmisc) # for cut2()
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
data(Boston, package="MASS") # loading the data
summary(Boston$age) # the numeric variable we are going to discretize
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.90   45.02   77.50   68.57   94.08  100.00
Boston$newAge <- cut(Boston$age,5)
table(Boston$newAge)
## 
##  (2.8,22.3] (22.3,41.7] (41.7,61.2] (61.2,80.6]  (80.6,100] 
##          45          71          70          81         239
Boston$newAge <- cut(Boston$age,5, # alternative using our own labels for the bins
                     labels=c("verynew","new","normal","old","veryold"))
table(Boston$newAge)
## 
## verynew     new  normal     old veryold 
##      45      71      70      81     239
Boston$newAge <- cut2(Boston$age, g=5)
table(Boston$newAge)
## 
## [ 2.9, 38.1) [38.1, 66.1) [66.1, 86.1) [86.1, 95.7) [95.7,100.0] 
##          102          101          101          101          101
Boston$newAge <- cut2(Boston$age, g=5)
table(Boston$newAge)
## 
## [ 2.9, 38.1) [38.1, 66.1) [66.1, 86.1) [86.1, 95.7) [95.7,100.0] 
##          102          101          101          101          101
Boston$newAge <- factor(cut2(Boston$age, g=5),
                        labels=c("verynew","new","normal","old","veryold"))
table(Boston$newAge)
## 
## verynew     new  normal     old veryold 
##     102     101     101     101     101

3.3.3 Creating variables

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
sp500 <- xts(c(1102.94,1104.49,1115.71,1118.31),
             ymd(c("2010-02-25","2010-02-26","2010-03-01","2010-03-02"),
                 tz=Sys.getenv("TZ")))
sp500
##               [,1]
## 2010-02-25 1102.94
## 2010-02-26 1104.49
## 2010-03-01 1115.71
## 2010-03-02 1118.31