image:
If you are ready, then let’s try to download. It’s up to you which cities you want to get from the website. I chose four cities: Seoul, Incheon(in where I live), Busan, Jeju.
Make sure that the period days should be same in each city. I chose dates from Jan 1, 2016 to present
We will develop clean_weather funcion once, it’s created we can use this function repeatedly with for-loop function. We will see the magic at the end of this article.
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
# Step 1. Data import and clean weather using function
clean_weather <- function(file) {
weather <- read.csv(file)
names(weather) <- c("date",
"temperature")
weather$date <- as.character(weather$date)
weather <- weather %>%
separate(col = "date",
into = c("year",
"month",
"day"),
sep = "-") %>%
mutate(year = as.numeric(year),
month = as.numeric(month),
day = as.numeric(day)) %>% # convernt character to numeric
filter(!(month == 2 & day == 29)) %>% # Let's remove Feb 29, it's irregular days
group_by(year) %>%
mutate(yearday = 1:length(day)) %>%
ungroup() %>%
filter(temperature != -99)
}
# Import seoul_data and cleaning data save as my_weather_data
my_weather_data <- clean_weather("data/MYSEOUL.csv")
str(my_weather_data)
## Classes 'tbl_df', 'tbl' and 'data.frame': 4220 obs. of 5 variables:
## $ year : num 2006 2006 2006 2006 2006 ...
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ temperature: num 1.4 0.2 -2.4 -6.2 -7.1 -7.5 -6.2 -4.2 -1.6 -0.3 ...
## $ yearday : int 1 2 3 4 5 6 7 8 9 10 ...
All ggplot2 objects are built the ggproto system of object oriented programming. Why ggproto? Whenever we turned to add an official extension mechanism to ggplot2, then we need this. It’s not new package we are going to use it’s extension we are going to add in ggplot2.
Let’s create simplest stat and apply into ggplot with mtcars dataset.
StatChull <- ggproto("StatChull",
Stat,
compute_group = function(data, scales) {
data[chull(data$x, data$y), ,drop = FALSE]
},
required_aes = c("x", "y")
)
In this function described, the most important components are the compute_group() and the required_aes field.
If you are done with the first step, we write layer function.
stat_chull <- function(mapping = NULL, data = NULL, geom = "polygon",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatChull, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
p1 <- ggplot(mtcars, aes(disp, mpg)) +
geom_point() +
stat_chull(fill = NA)
p2 <- ggplot(mtcars, aes(disp, mpg)) +
geom_point() +
stat_chull(fill = NA, colour = "black")
p3 <- ggplot(mtcars, aes(disp, mpg)) +
stat_chull(geom = "point", size = 4, colour = "red") +
geom_point()
p4 <- ggplot(mtcars, aes(disp, mpg, colour = factor(cyl))) +
geom_point() +
stat_chull(fill = NA)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p1, p2, p3, p4, ncol = 2)
This is power of ggproto(). If you want to learn more please go to this web http://rstudio-pubs-static.s3.amazonaws.com/108934_8537676801dd4548a96f6451bae01e94.html
I also personally study from this with datacamp.
# Create the Stat Historical
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
TempHistorical <- ggproto("TempHistorical", Stat,
compute_group = function(data, scales, param) {
data <- data %>%
filter(year != max(year)) %>% # except for 2017
group_by(x) %>%
mutate(ymin = smean.cl.normal(y)[2],
ymax = smean.cl.normal(y)[3]) %>%
ungroup()
},
requried_aes = c("x", "y", "year")
)
# Create the layer
temp_historical <- function(mapping = NULL, data = NULL, geom = "point",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
list(
layer(
stat = "identity", data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, col = "#EED8AE", alpha = 0.3, shape = 16, ...)
),
layer(
stat = TempHistorical, data = data, mapping = mapping, geom = "linerange",
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, col = "#8B7E66", ...)
)
)
}
ggplot(my_weather_data, aes(x = yearday, y = temperature, year = year)) +
temp_historical()
# Create the Stat Historical
library(Hmisc)
TempPresent <- ggproto("TempPresent", Stat,
compute_group = function(data, scales, param) {
data <- data %>%
filter(year == max(year)) # only for 2017
},
requried_aes = c("x", "y", "year")
)
# Create the layer
temp_present <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = TempPresent, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
ggplot(my_weather_data, aes(x = yearday, y = temperature, year = year)) +
temp_historical() +
temp_present()
# Create the stats Objects
TempExtremes <- ggproto("TempExtremes", Stat,
compute_group = function(data, scales, params) {
# present data
present <- data %>%
filter(year == max(year))
# past data
past <- data %>%
filter(year != max(year))
# past extremes
past_extremes <- past %>%
group_by(x) %>%
summarise(past_low = min(y),
past_high = max(y))
# transform data to contain extremes
data <- present %>%
left_join(past_extremes) %>%
mutate(record = ifelse(y < past_low, "#0000CD",
ifelse(y > past_high, "#CD2626", "#00000000"))
)
},
requried_aes = c("x", "y", "year")
)
# Create the layer
temp_extremes <- function(mapping = NULL, data = NULL, geom = "point",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = TempExtremes, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
ggplot(my_weather_data, aes(x = yearday, y = temperature, year = year)) +
temp_historical() +
temp_present() +
temp_extremes(aes(col = ..record..)) +
scale_color_identity()
## Joining, by = "x"
library(grid)
weather_draw_pop_legend <- function(x = 0.6, y = 0.2, width = 0.2, height = 0.2, fontsize = 10) {
# Finish viewport() function
pushViewport(viewport(x = x, y = y, width = width, height = height, just = "center"))
legend_labels <- c("New record high",
"95% CI range",
"Current year",
"Past years",
"New record low")
legend_position <- c(0.9, 0.7, 0.5, 0.3, 0.1)
# Finish grid.text() function
grid.text(legend_labels, x = 0.15, y = legend_position,
just = "left", gp = gpar(fontsize = fontsize, col = "grey20"))
# Position dots, rectangle and line
point_position_y <- c(0.1, 0.3, 0.9)
point_position_x <- rep(0.06, length(point_position_y))
grid.points(x = point_position_x, y = point_position_y, pch = 16,
gp = gpar(col = c("#0000CD", "#EED8AE", "#CD2626")))
grid.rect(x = 0.06, y = 0.7, width = 0.06, height = 0.1,
gp = gpar(col = NA, fill = "#8B7E66"))
grid.lines(x = c(0.03, 0.09), y = c(0.5, 0.5),
gp = gpar(col = "black", lwd = 3))
# Add popViewport() for bookkeeping
popViewport()
}
ggplot(my_weather_data, aes(x = yearday, y = temperature, year = year)) +
temp_historical() +
temp_present() +
temp_extremes(aes(col = ..record..)) +
scale_color_identity()
## Joining, by = "x"
weather_draw_pop_legend() # Call draw_pop_legend()
We are almost done. We have build three “temp_” functions. Now, we are going to upload three more cities and build graph in a just couple of codes.
For re-use, we also create data files using for-loop. In this time, for-loop is indeed helpful to build four cities graph. This is the power of Object Oriented programming, Organized around objects!!
my_weather_files <- c("data/MYSEOUL.csv", "data/MYBUSAN.csv", "data/MYINCHEON.csv", "data/MYJEJU.csv")
# Build my_weather_files with for-loop
my_weather_dataset <- NULL
for(data in my_weather_files) {
temperature <- clean_weather(data)
temperature$id <- sub(".csv", "", data)
temperature$id <- sub("data/", "", temperature$id)
my_weather_dataset <- rbind(my_weather_dataset, temperature)
}
# Build graph
ggplot(my_weather_dataset, aes(x = yearday, y = temperature, year = year)) +
temp_historical() +
temp_present() +
temp_extremes(aes(col = ..record..)) +
scale_color_identity() +
facet_wrap(~id, ncol=2)
## Joining, by = "x"
## Joining, by = "x"
## Joining, by = "x"
## Joining, by = "x"