1 Goal


The goal of this tutorial is to learn how to add transparency to lines in ggplot in order to best see lines that are on top of each other.


2 Data preparation


#First we load the libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)

# In this tutorial we will use the dataset of minimum temperature in melbourne
# https://datamarket.com/data/set/2324/daily-minimum-temperatures-in-melbourne-australia-1981-1990
Temperatures <- read.csv("daily-minimum-temperatures-in-me.csv", stringsAsFactors = FALSE)
head(Temperatures)
##         Date Daily.minimum.temperatures.in.Melbourne..Australia..1981.1990
## 1 1981-01-01                                                          20.7
## 2 1981-01-02                                                          17.9
## 3 1981-01-03                                                          18.8
## 4 1981-01-04                                                          14.6
## 5 1981-01-05                                                          15.8
## 6 1981-01-06                                                          15.8
colnames(Temperatures) <- c("Date", "Temperature")

# First we have to change the date to POSIXct
Temperatures$Date <- strptime(Temperatures$Date, "%Y-%m-%d" )
Temperatures$Date <- as.POSIXct(Temperatures$Date)
Temperatures$Temperature <- as.numeric(Temperatures$Temperature)

# Let's check the structure of the table
str(Temperatures)
## 'data.frame':    3652 obs. of  2 variables:
##  $ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  $ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
# Now we create different columns for different time configurations
# Month
Temperatures <- mutate(Temperatures, Month = month(Date))

# Year
Temperatures <- mutate(Temperatures, Year = year(Date))
Temperatures$Year <- as.numeric(as.character(Temperatures$Year))

# Let's check the structure of the table
str(Temperatures)
## 'data.frame':    3652 obs. of  4 variables:
##  $ Date       : POSIXct, format: "1981-01-01" "1981-01-02" ...
##  $ Temperature: num  20.7 17.9 18.8 14.6 15.8 15.8 15.8 17.4 21.8 20 ...
##  $ Month      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Year       : num  1981 1981 1981 1981 1981 ...
# For the purpose of this exercise we are only keeping 4 years 
Temperatures <- Temperatures[which(Temperatures$Year %in% c(1981:1984)),]
unique(Temperatures$Year)
## [1] 1981 1982 1983 1984
# Now we are going to aggregate the points to get only one point per month
Temperatures <- mutate(Temperatures, MonthYear = paste(year(Date),formatC(month(Date), width = 2, flag = "0")))
Temps_month <- aggregate(Temperatures, by = list(Temperatures$MonthYear), FUN = function(x) mean(x, na.rm=T))

head(Temps_month)
##   Group.1                Date Temperature Month Year MonthYear
## 1 1981 01 1981-01-16 00:00:00   17.712903     1 1981        NA
## 2 1981 02 1981-02-14 12:00:00   17.678571     2 1981        NA
## 3 1981 03 1981-03-15 23:56:07   13.500000     3 1981        NA
## 4 1981 04 1981-04-15 12:00:00   12.356667     4 1981        NA
## 5 1981 05 1981-05-16 00:00:00    9.490323     5 1981        NA
## 6 1981 06 1981-06-15 12:00:00    7.306667     6 1981        NA

3 Drawing the plot


Temps_month$Month <- as.numeric(Temps_month$Month)
Temps_month$Temperature <- as.numeric(Temps_month$Temperature)
Temps_month$Year <- factor(Temps_month$Year)

# If we follow these simple steps we will always obtain the type of plot that we want
# Let's draw
ggplot() + geom_line(data = Temps_month, aes(x = Month, y = Temperature, color = Year))