##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Let’s create a sample dataframe
nmonths = 24
x = seq(as.Date("2015/1/1"), by = "month", length.out = nmonths)
prescription1 <- data.frame(
x,
Percent.Change = 25 + runif(nmonths,1,100)
)
prescription2 <- data.frame(
x,
Percent.Change = 75 + runif(nmonths,1,50)
)
cols = c("dates", "Difference")
colnames(prescription1) = cols
colnames(prescription2) = cols
Straightforward
p = ggplot() +
geom_line(data = prescription1, aes(x = dates, y = Difference), color = "blue") +
geom_line(data = prescription2, aes(x = dates, y = Difference), color = "red") +
xlab('Dates') +
ylab('percent.change')
print(p)
Using the melt() function, from the reshape package provides a number of advantages.
In essence melting converts a dataframe from a “short and wide format into a long and thin format ( more rows )”
print(head(prescription1))
## dates Difference
## 1 2015-01-01 117.35197
## 2 2015-02-01 82.06761
## 3 2015-03-01 48.21552
## 4 2015-04-01 107.49036
## 5 2015-05-01 105.70461
## 6 2015-06-01 30.87076
prescription = merge(prescription1, prescription2, by="dates")
head(prescription)
## dates Difference.x Difference.y
## 1 2015-01-01 117.35197 113.98116
## 2 2015-02-01 82.06761 104.97214
## 3 2015-03-01 48.21552 80.61535
## 4 2015-04-01 107.49036 89.48244
## 5 2015-05-01 105.70461 94.89909
## 6 2015-06-01 30.87076 102.23722
prescriptionMelted <- reshape2::melt(prescription, id.var='dates')
head(prescriptionMelted)
## dates variable value
## 1 2015-01-01 Difference.x 117.35197
## 2 2015-02-01 Difference.x 82.06761
## 3 2015-03-01 Difference.x 48.21552
## 4 2015-04-01 Difference.x 107.49036
## 5 2015-05-01 Difference.x 105.70461
## 6 2015-06-01 Difference.x 30.87076
ggplot(prescriptionMelted, aes(x=dates, y=value, col=variable)) + geom_line()
We use dplyr to create an extra columns and pipe the results to a combined data frame. Neat and succinct, as so often with dplyr
df1 <- data.frame(dates = x,Variable = rnorm(mean = 0.75,nmonths))
df2 <- data.frame(dates = x,Variable = rnorm(mean = -0.75,nmonths))
df3 <- df1 %>% mutate(Type = 'Amocycillin') %>%
bind_rows(df2 %>%
mutate(Type = 'Penicillin'))
ggplot(df3,aes(y = Variable,x = dates,color = Type)) +
geom_line() +
ggtitle("Merged datasets")
http://www.r-cran.com
https://www.rstudio.com
http://www.rpubs.com
https://rviews.rstudio.com
http://www.tendron.net
Alan Brown, CTO, Tendron Systems Ltd
– – – – – – – – – – – –