library(reshape2)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
###Melt the Data Set:
#Melt the Data Set:
#create ID & measure varables
mtcars$carname <-row.names(mtcars)
carMelt <-melt(mtcars, id=c("carname", "gear", "cyl"), measure.vars = c("mpg", "hp"))
head(carMelt, n=3)
## carname gear cyl variable value
## 1 Mazda RX4 4 6 mpg 21.0
## 2 Mazda RX4 Wag 4 6 mpg 21.0
## 3 Datsun 710 4 4 mpg 22.8
tail(carMelt, n=3)
## carname gear cyl variable value
## 62 Ferrari Dino 5 6 hp 175
## 63 Maserati Bora 5 8 hp 335
## 64 Volvo 142E 4 4 hp 109
###Cast into different Shapes:
#Cast into different Shapes:
cylData <- dcast(carMelt, cyl ~ variable)
## Aggregation function missing: defaulting to length
#Aggregation by length
cylData
## cyl mpg hp
## 1 4 11 11
## 2 6 7 7
## 3 8 14 14
cylData <- dcast(carMelt, cyl ~ variable, mean)
cylData
## cyl mpg hp
## 1 4 26.66364 82.63636
## 2 6 19.74286 122.28571
## 3 8 15.10000 209.21429
###Average Values:
#upload data
head(InsectSprays)
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
#shorthand
tapply(InsectSprays$count, InsectSprays$spray, sum)
## A B C D E F
## 174 184 25 59 42 200
#Alternative: get list, ...
spIns = split(InsectSprays$count, InsectSprays$spray)
spIns
## $A
## [1] 10 7 20 14 14 12 10 23 17 20 14 13
##
## $B
## [1] 11 17 21 11 16 14 17 17 19 21 7 13
##
## $C
## [1] 0 1 7 2 3 1 2 1 3 0 1 4
##
## $D
## [1] 3 5 12 6 4 3 5 5 5 5 2 4
##
## $E
## [1] 3 5 3 5 3 6 1 1 3 2 6 4
##
## $F
## [1] 11 9 15 22 15 16 13 10 26 26 24 13
#Apply a function
sprCount = lapply(spIns, sum)
sprCount
## $A
## [1] 174
##
## $B
## [1] 184
##
## $C
## [1] 25
##
## $D
## [1] 59
##
## $E
## [1] 42
##
## $F
## [1] 200
#go back to a vector
sapply(spIns, sum)
## A B C D E F
## 174 184 25 59 42 200
# Import `magrittr`
library(magrittr)
#Pipes in R look like %>% and can combine functions
iris %>%
subset(Sepal.Width > 2.5) %>%
aggregate(. ~ Species, ., mean)
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.016327 3.451020 1.465306 0.244898
## 2 versicolor 6.064865 2.918919 4.397297 1.381081
## 3 virginica 6.662222 3.033333 5.606667 2.053333
# Write easy to read & efficient scripts
# Initialize `x`
x <- c(0.179, 0.329, 0.63, 0.906, 0.535, 0.148, 0.012, 0.824, 0.207)
# Compute the logarithm of `x`, return suitably lagged and iterated differences, compute the exponential function and round the result
# Perform the some computations on `x`
x %>% log() %>%
diff() %>%
exp() %>%
round(1)
## [1] 1.8 1.9 1.4 0.6 0.3 0.1 68.7 0.3
# Initialize `x`
x <- rnorm(100) ; x
## [1] -0.87371055 0.09462496 -0.73264465 -0.63912705 1.62053319 -0.40112349
## [7] 1.25378452 0.19777121 0.17589732 0.06542543 -1.67856052 -0.59916103
## [13] 0.83330630 1.70888720 0.20470868 0.42463230 -1.25745247 1.19073481
## [19] -0.33751957 1.59589902 0.55174696 -0.12621496 0.94560295 -0.10972299
## [25] -1.27561235 -0.53276133 -0.54610627 -0.45776660 -0.82164735 2.47424794
## [31] 0.40972771 -0.39949876 1.61966621 -0.01764131 -2.71517575 -1.32141235
## [37] -1.45275195 1.71189144 -0.75220113 -0.59329418 0.17523016 1.04028927
## [43] 0.19761390 1.51635065 -0.01389959 -0.35166504 0.09137293 1.89117144
## [49] 0.05987502 0.97791065 -1.24483656 -1.36800974 0.17828806 0.87534275
## [55] -0.27216796 -0.42795650 0.92457925 -1.48913376 -0.82690824 -0.16191332
## [61] 0.29661240 0.92382699 1.08430758 0.99106314 1.98220322 -1.02583446
## [67] 1.01932717 0.58343403 0.29139319 -0.29301218 -1.66013283 0.33890356
## [73] -1.15830502 2.54201547 0.14168280 -0.14789032 -0.48954200 0.43443876
## [79] -0.81292240 0.17664083 -0.64302116 -1.53426170 -1.71145635 0.51355207
## [85] 1.25127948 -0.80277194 -0.33344515 -0.46804543 -0.76483943 -0.19287598
## [91] 0.07279243 0.93815033 1.22162316 0.15488238 2.37598155 1.93019870
## [97] -0.27118762 0.19272767 0.67976499 -1.15246521
# Update value of `x` and assign it to `x`
x %<>% abs %>% sort ; x
## [1] 0.01389959 0.01764131 0.05987502 0.06542543 0.07279243 0.09137293
## [7] 0.09462496 0.10972299 0.12621496 0.14168280 0.14789032 0.15488238
## [13] 0.16191332 0.17523016 0.17589732 0.17664083 0.17828806 0.19272767
## [19] 0.19287598 0.19761390 0.19777121 0.20470868 0.27118762 0.27216796
## [25] 0.29139319 0.29301218 0.29661240 0.33344515 0.33751957 0.33890356
## [31] 0.35166504 0.39949876 0.40112349 0.40972771 0.42463230 0.42795650
## [37] 0.43443876 0.45776660 0.46804543 0.48954200 0.51355207 0.53276133
## [43] 0.54610627 0.55174696 0.58343403 0.59329418 0.59916103 0.63912705
## [49] 0.64302116 0.67976499 0.73264465 0.75220113 0.76483943 0.80277194
## [55] 0.81292240 0.82164735 0.82690824 0.83330630 0.87371055 0.87534275
## [61] 0.92382699 0.92457925 0.93815033 0.94560295 0.97791065 0.99106314
## [67] 1.01932717 1.02583446 1.04028927 1.08430758 1.15246521 1.15830502
## [73] 1.19073481 1.22162316 1.24483656 1.25127948 1.25378452 1.25745247
## [79] 1.27561235 1.32141235 1.36800974 1.45275195 1.48913376 1.51635065
## [85] 1.53426170 1.59589902 1.61966621 1.62053319 1.66013283 1.67856052
## [91] 1.70888720 1.71145635 1.71189144 1.89117144 1.93019870 1.98220322
## [97] 2.37598155 2.47424794 2.54201547 2.71517575
#Baby Names
# Import `babynames` data
library(babynames)
# Import `dplyr` library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the data
data(babynames)
# Count how many young boys with the name "Sam" are born
sum(select(filter(babynames,sex=="M",name=="Sam"),n))
## [1] 123800
# Do the same but now with `%>%`
babynames%>%filter(sex=="M",name=="Sam")%>%
select(n)%>%
sum
## [1] 123800
# Do the same but now with sex =="F"
babynames%>%filter(sex=="F",name=="Sam")%>%
select(n)%>%
sum
## [1] 1437
# if you want to use assign() with the pipe, you must be explicit about the environment:
# Define your environment
env <- environment()
# Add the environment to `assign()`
"x" %>% assign(100, envir = env)
# Return `x`
x
## [1] 100
###Add variables in a piple
# Load in the Iris data
iris <- read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"), header = FALSE)
# Add column names to the Iris data
names(iris) <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
# Compute the square root of `iris$Sepal.Length` and assign it to the variable
iris$Sepal.Length <-
iris$Sepal.Length %>%
sqrt()
# Return `Sepal.Length`
iris$Sepal.Length
## [1] 2.258318 2.213594 2.167948 2.144761 2.236068 2.323790 2.144761 2.236068
## [9] 2.097618 2.213594 2.323790 2.190890 2.190890 2.073644 2.408319 2.387467
## [17] 2.323790 2.258318 2.387467 2.258318 2.323790 2.258318 2.144761 2.258318
## [25] 2.190890 2.236068 2.236068 2.280351 2.280351 2.167948 2.190890 2.323790
## [33] 2.280351 2.345208 2.213594 2.236068 2.345208 2.213594 2.097618 2.258318
## [41] 2.236068 2.121320 2.097618 2.236068 2.258318 2.190890 2.258318 2.144761
## [49] 2.302173 2.236068 2.645751 2.529822 2.626785 2.345208 2.549510 2.387467
## [57] 2.509980 2.213594 2.569047 2.280351 2.236068 2.428992 2.449490 2.469818
## [65] 2.366432 2.588436 2.366432 2.408319 2.489980 2.366432 2.428992 2.469818
## [73] 2.509980 2.469818 2.529822 2.569047 2.607681 2.588436 2.449490 2.387467
## [81] 2.345208 2.345208 2.408319 2.449490 2.323790 2.449490 2.588436 2.509980
## [89] 2.366432 2.345208 2.345208 2.469818 2.408319 2.236068 2.366432 2.387467
## [97] 2.387467 2.489980 2.258318 2.387467 2.509980 2.408319 2.664583 2.509980
## [105] 2.549510 2.756810 2.213594 2.701851 2.588436 2.683282 2.549510 2.529822
## [113] 2.607681 2.387467 2.408319 2.529822 2.549510 2.774887 2.774887 2.449490
## [121] 2.626785 2.366432 2.774887 2.509980 2.588436 2.683282 2.489980 2.469818
## [129] 2.529822 2.683282 2.720294 2.810694 2.529822 2.509980 2.469818 2.774887
## [137] 2.509980 2.529822 2.449490 2.626785 2.588436 2.626785 2.408319 2.607681
## [145] 2.588436 2.588436 2.509980 2.549510 2.489980 2.428992
#use T when a functions dosen't return anything:
# rnorm(100) %>%
# matrix(ncol = 2) %>%
# plot() %>%
# str()
# NULL
rnorm(100) %>%
matrix(ncol = 2) %T>%
plot() %>%
str()
## num [1:50, 1:2] -0.484 0.624 -1.435 0.416 -0.267 ...
# num [1:50, 1:2] -0.715 -0.753 -0.939 -1.053 -0.437 ...
# the %<>% operator which allows you to replace code like:
#mtcars <- mtcars %>%
# transform(cyl = cyl * 2)
#mtcars %<>% transform(cyl = cyl * 2)
This is an R Markdown document, feel free to reach out for finer details.