Reshaping Data

Import Data:

library(reshape2)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

###Melt the Data Set:

#Melt the Data Set:
#create ID & measure varables
mtcars$carname <-row.names(mtcars)
carMelt <-melt(mtcars, id=c("carname", "gear", "cyl"), measure.vars = c("mpg", "hp"))
head(carMelt, n=3)
##         carname gear cyl variable value
## 1     Mazda RX4    4   6      mpg  21.0
## 2 Mazda RX4 Wag    4   6      mpg  21.0
## 3    Datsun 710    4   4      mpg  22.8
tail(carMelt, n=3)
##          carname gear cyl variable value
## 62  Ferrari Dino    5   6       hp   175
## 63 Maserati Bora    5   8       hp   335
## 64    Volvo 142E    4   4       hp   109

###Cast into different Shapes:

#Cast into different Shapes: 
cylData <- dcast(carMelt, cyl ~ variable)
## Aggregation function missing: defaulting to length
#Aggregation by length
cylData
##   cyl mpg hp
## 1   4  11 11
## 2   6   7  7
## 3   8  14 14
cylData <- dcast(carMelt, cyl ~ variable, mean)
cylData
##   cyl      mpg        hp
## 1   4 26.66364  82.63636
## 2   6 19.74286 122.28571
## 3   8 15.10000 209.21429

###Average Values:

#upload data
head(InsectSprays)
##   count spray
## 1    10     A
## 2     7     A
## 3    20     A
## 4    14     A
## 5    14     A
## 6    12     A
#shorthand
tapply(InsectSprays$count, InsectSprays$spray, sum)
##   A   B   C   D   E   F 
## 174 184  25  59  42 200
#Alternative: get list, ...
spIns = split(InsectSprays$count, InsectSprays$spray)
spIns
## $A
##  [1] 10  7 20 14 14 12 10 23 17 20 14 13
## 
## $B
##  [1] 11 17 21 11 16 14 17 17 19 21  7 13
## 
## $C
##  [1] 0 1 7 2 3 1 2 1 3 0 1 4
## 
## $D
##  [1]  3  5 12  6  4  3  5  5  5  5  2  4
## 
## $E
##  [1] 3 5 3 5 3 6 1 1 3 2 6 4
## 
## $F
##  [1] 11  9 15 22 15 16 13 10 26 26 24 13
#Apply a function
sprCount = lapply(spIns, sum)
sprCount
## $A
## [1] 174
## 
## $B
## [1] 184
## 
## $C
## [1] 25
## 
## $D
## [1] 59
## 
## $E
## [1] 42
## 
## $F
## [1] 200
#go back to a vector
sapply(spIns, sum)
##   A   B   C   D   E   F 
## 174 184  25  59  42 200

Useing Pipes in R

# Import `magrittr`
library(magrittr)

#Pipes in R look like %>% and can combine functions
iris %>%
  subset(Sepal.Width > 2.5) %>%
  aggregate(. ~ Species, ., mean)
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa     5.016327    3.451020     1.465306    0.244898
## 2 versicolor     6.064865    2.918919     4.397297    1.381081
## 3  virginica     6.662222    3.033333     5.606667    2.053333
# Write easy to read & efficient scripts
# Initialize `x`
x <- c(0.179, 0.329, 0.63, 0.906, 0.535, 0.148, 0.012, 0.824, 0.207)

# Compute the logarithm of `x`, return suitably lagged and iterated differences, compute the exponential function and round the result

# Perform the some computations on `x` 
x %>% log() %>%
    diff() %>%
    exp() %>%
    round(1)
## [1]  1.8  1.9  1.4  0.6  0.3  0.1 68.7  0.3
# Initialize `x` 
x <- rnorm(100) ; x
##   [1] -0.87371055  0.09462496 -0.73264465 -0.63912705  1.62053319 -0.40112349
##   [7]  1.25378452  0.19777121  0.17589732  0.06542543 -1.67856052 -0.59916103
##  [13]  0.83330630  1.70888720  0.20470868  0.42463230 -1.25745247  1.19073481
##  [19] -0.33751957  1.59589902  0.55174696 -0.12621496  0.94560295 -0.10972299
##  [25] -1.27561235 -0.53276133 -0.54610627 -0.45776660 -0.82164735  2.47424794
##  [31]  0.40972771 -0.39949876  1.61966621 -0.01764131 -2.71517575 -1.32141235
##  [37] -1.45275195  1.71189144 -0.75220113 -0.59329418  0.17523016  1.04028927
##  [43]  0.19761390  1.51635065 -0.01389959 -0.35166504  0.09137293  1.89117144
##  [49]  0.05987502  0.97791065 -1.24483656 -1.36800974  0.17828806  0.87534275
##  [55] -0.27216796 -0.42795650  0.92457925 -1.48913376 -0.82690824 -0.16191332
##  [61]  0.29661240  0.92382699  1.08430758  0.99106314  1.98220322 -1.02583446
##  [67]  1.01932717  0.58343403  0.29139319 -0.29301218 -1.66013283  0.33890356
##  [73] -1.15830502  2.54201547  0.14168280 -0.14789032 -0.48954200  0.43443876
##  [79] -0.81292240  0.17664083 -0.64302116 -1.53426170 -1.71145635  0.51355207
##  [85]  1.25127948 -0.80277194 -0.33344515 -0.46804543 -0.76483943 -0.19287598
##  [91]  0.07279243  0.93815033  1.22162316  0.15488238  2.37598155  1.93019870
##  [97] -0.27118762  0.19272767  0.67976499 -1.15246521
# Update value of `x` and assign it to `x`
x %<>% abs %>% sort ; x
##   [1] 0.01389959 0.01764131 0.05987502 0.06542543 0.07279243 0.09137293
##   [7] 0.09462496 0.10972299 0.12621496 0.14168280 0.14789032 0.15488238
##  [13] 0.16191332 0.17523016 0.17589732 0.17664083 0.17828806 0.19272767
##  [19] 0.19287598 0.19761390 0.19777121 0.20470868 0.27118762 0.27216796
##  [25] 0.29139319 0.29301218 0.29661240 0.33344515 0.33751957 0.33890356
##  [31] 0.35166504 0.39949876 0.40112349 0.40972771 0.42463230 0.42795650
##  [37] 0.43443876 0.45776660 0.46804543 0.48954200 0.51355207 0.53276133
##  [43] 0.54610627 0.55174696 0.58343403 0.59329418 0.59916103 0.63912705
##  [49] 0.64302116 0.67976499 0.73264465 0.75220113 0.76483943 0.80277194
##  [55] 0.81292240 0.82164735 0.82690824 0.83330630 0.87371055 0.87534275
##  [61] 0.92382699 0.92457925 0.93815033 0.94560295 0.97791065 0.99106314
##  [67] 1.01932717 1.02583446 1.04028927 1.08430758 1.15246521 1.15830502
##  [73] 1.19073481 1.22162316 1.24483656 1.25127948 1.25378452 1.25745247
##  [79] 1.27561235 1.32141235 1.36800974 1.45275195 1.48913376 1.51635065
##  [85] 1.53426170 1.59589902 1.61966621 1.62053319 1.66013283 1.67856052
##  [91] 1.70888720 1.71145635 1.71189144 1.89117144 1.93019870 1.98220322
##  [97] 2.37598155 2.47424794 2.54201547 2.71517575

#Baby Names

# Import `babynames` data
library(babynames)
# Import `dplyr` library
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load the data
data(babynames)

# Count how many young boys with the name "Sam" are born
sum(select(filter(babynames,sex=="M",name=="Sam"),n))
## [1] 123800
# Do the same but now with `%>%`
babynames%>%filter(sex=="M",name=="Sam")%>%
            select(n)%>%
            sum
## [1] 123800
# Do the same but now with sex =="F"
babynames%>%filter(sex=="F",name=="Sam")%>%
            select(n)%>%
            sum
## [1] 1437
# if you want to use assign() with the pipe, you must be explicit about the environment:

# Define your environment
env <- environment()

# Add the environment to `assign()`
"x" %>% assign(100, envir = env)

# Return `x`
x
## [1] 100

###Add variables in a piple

# Load in the Iris data
iris <- read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"), header = FALSE)

# Add column names to the Iris data
names(iris) <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")

# Compute the square root of `iris$Sepal.Length` and assign it to the variable
iris$Sepal.Length <- 
  iris$Sepal.Length %>%
  sqrt()

# Return `Sepal.Length`
iris$Sepal.Length
##   [1] 2.258318 2.213594 2.167948 2.144761 2.236068 2.323790 2.144761 2.236068
##   [9] 2.097618 2.213594 2.323790 2.190890 2.190890 2.073644 2.408319 2.387467
##  [17] 2.323790 2.258318 2.387467 2.258318 2.323790 2.258318 2.144761 2.258318
##  [25] 2.190890 2.236068 2.236068 2.280351 2.280351 2.167948 2.190890 2.323790
##  [33] 2.280351 2.345208 2.213594 2.236068 2.345208 2.213594 2.097618 2.258318
##  [41] 2.236068 2.121320 2.097618 2.236068 2.258318 2.190890 2.258318 2.144761
##  [49] 2.302173 2.236068 2.645751 2.529822 2.626785 2.345208 2.549510 2.387467
##  [57] 2.509980 2.213594 2.569047 2.280351 2.236068 2.428992 2.449490 2.469818
##  [65] 2.366432 2.588436 2.366432 2.408319 2.489980 2.366432 2.428992 2.469818
##  [73] 2.509980 2.469818 2.529822 2.569047 2.607681 2.588436 2.449490 2.387467
##  [81] 2.345208 2.345208 2.408319 2.449490 2.323790 2.449490 2.588436 2.509980
##  [89] 2.366432 2.345208 2.345208 2.469818 2.408319 2.236068 2.366432 2.387467
##  [97] 2.387467 2.489980 2.258318 2.387467 2.509980 2.408319 2.664583 2.509980
## [105] 2.549510 2.756810 2.213594 2.701851 2.588436 2.683282 2.549510 2.529822
## [113] 2.607681 2.387467 2.408319 2.529822 2.549510 2.774887 2.774887 2.449490
## [121] 2.626785 2.366432 2.774887 2.509980 2.588436 2.683282 2.489980 2.469818
## [129] 2.529822 2.683282 2.720294 2.810694 2.529822 2.509980 2.469818 2.774887
## [137] 2.509980 2.529822 2.449490 2.626785 2.588436 2.626785 2.408319 2.607681
## [145] 2.588436 2.588436 2.509980 2.549510 2.489980 2.428992

what to do when a pipe function dosenโ€™t return anything:

#use T when a functions dosen't return anything:
# rnorm(100) %>%
#     matrix(ncol = 2) %>%
#     plot() %>%
#     str()
# NULL

rnorm(100) %>%
     matrix(ncol = 2) %T>%
     plot() %>%
     str()

##  num [1:50, 1:2] -0.484 0.624 -1.435 0.416 -0.267 ...
# num [1:50, 1:2] -0.715 -0.753 -0.939 -1.053 -0.437 ...
 
# the %<>% operator which allows you to replace code like:
 
#mtcars <- mtcars %>% 
#  transform(cyl = cyl * 2)
#mtcars %<>% transform(cyl = cyl * 2)

This is an R Markdown document, feel free to reach out for finer details.