# <- is used for assignment
a <- 10
# implicit returns
a
## [1] 10
# . is used to delimit words as opposed to _ (it's not a method call)
as.factor(c(1, 3, 4))
## [1] 1 3 4
## Levels: 1 3 4
# Create an integer vector
a <- 1:20
a/2
## [1] 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0
## [15] 7.5 8.0 8.5 9.0 9.5 10.0
mylist <- list(blah = c(1, 2, 3), wtf = c(7, 8, 9))
mylist$blah
## [1] 1 2 3
head(mylist)
## $blah
## [1] 1 2 3
##
## $wtf
## [1] 7 8 9
Holds lists of vectors or factors, all of the same length.
# A data frame with a numeric vector and a character vector
df <- data.frame(first = c(1, 2, 3), second = c("foo", "bar", "baz"))
head(df)
## first second
## 1 1 foo
## 2 2 bar
## 3 3 baz
# Columns are accessed with '$'
df$first
## [1] 1 2 3
# Rows use the typical [] notation…
df$first[1]
## [1] 1
# …but you can do really interesting things with it
# only return items 1 and 3
df$first[c(1, 3)]
## [1] 1 3
# return all items except 1 and 3
df$first[-c(1, 3)]
## [1] 2
# return all items where the value is greater than two
df$first[df$first > 2]
## [1] 3
# [] can be used on data frames directly like so:
df[3, 1]
## [1] 3
df[2, ]
## first second
## 2 2 bar
# Get all rows from the data frame where the value of column 'first' is
# greater than 2
df[df$first > 2, ]
## first second
## 3 3 baz
# You could do something like this to build up a vector…
myvector <- vector()
for (var in df$first) {
myvector <- c(myvector, var + 5)
}
myvector
## [1] 6 7 8
# …but the more idiomatic way would be:
myvector <- sapply(df$first, function(var) var + 5)
myvector
## [1] 6 7 8
# Boring
plot(df)
barplot(df$first)
# Pretty
library(ggplot2)
qplot(df$first, df$second, size = I(5))
qplot(df$second, df$first, fill = df$second, geom = "bar")
# qplot is simple, but it's worth learning the long form because you'll
# use it more.
ggplot(df, aes(first, second, colour = second)) + geom_point(aes(alpha = 0.5),
size = 5)
df <- data.frame(x = 1:200, y = rnorm(200))
ggplot(df, aes(x, y)) + geom_point()
# geom_bar will produce a typical bar chart when given a factor…
ggplot(df, aes(as.factor(x), y)) + geom_bar(aes(binwidth = 10))
# …but you usually want a histogram, which it gives you otherwise
ggplot(df, aes(y)) + geom_bar(aes(fill = ..count..))
# If you did want to see the values represented horizontally, the line or
# area geoms are better suited to the task (although still not that
# useful).
ggplot(df, aes(x, y)) + geom_line()
ggplot(df, aes(x, y)) + geom_area()
# Create some fake performance data
response.times.apc <- rnorm(200, mean = 1, sd = 0.2)
response.times.noapc <- rnorm(200, mean = 3)
time <- 1:200
df <- data.frame(time, response.times.apc, response.times.noapc)
head(df)
## time response.times.apc response.times.noapc
## 1 1 0.8917 3.002
## 2 2 1.1290 3.513
## 3 3 1.0420 3.334
## 4 4 1.1288 3.691
## 5 5 0.5662 3.124
## 6 6 1.2843 2.412
library(reshape2)
df.new <- melt(df, id.vars = "time")
head(df.new)
## time variable value
## 1 1 response.times.apc 0.8917
## 2 2 response.times.apc 1.1290
## 3 3 response.times.apc 1.0420
## 4 4 response.times.apc 1.1288
## 5 5 response.times.apc 0.5662
## 6 6 response.times.apc 1.2843
# You could look at the data as time series…
library(gridExtra)
p1 <- ggplot(df.new, aes(time, value, colour = variable)) + geom_line()
p2 <- ggplot(df.new, aes(time, value, fill = variable)) + geom_area(position = "identity",
aes(alpha = 0.5))
grid.arrange(p1, p2, nrow = 2)
# …but if the time isn't significant, it can be easier to visualize
# without:
ggplot(df.new, aes(value, fill = variable)) + geom_bar(position = "dodge")
# BOXPLOTS
ggplot(df.new, aes(variable, value, fill = variable)) + geom_boxplot(aes(alpha = 0.1))