Part A:
Part B:
Part C:
source("create_datasets.R")
load('data/test_datasets.RData')
library(readr)
library(dplyr)
library(ggplot2)
library(purrr)
library(grid)
library(gtable)
library(aplpack)
 Â
# the grid library is loaded
# Draw rectangle in null viewport
grid.rect(gp = gpar(fill = "grey90"))
# Write text in null viewport
grid.text("null viewport")
# Draw a line
grid.lines(x = c(0, 0.75), y = c(0.25, 1),
gp = gpar(lty = 2, col = "red"))
# Populate null viewport
grid.rect(gp = gpar(fill = "grey90"))
grid.text("null viewport")
grid.lines(x = c(0,0.75), y = c(0.25, 1),
gp = gpar(lty = 2, col = "red"))
# Create new viewport: vp
vp <- viewport(x = 0.5, y = 0.5, width = 0.5, height = 0.5, just = "center")
# Push vp
pushViewport(vp)
# Populate new viewport with rectangle
grid.rect(gp = gpar(fill = "blue"))
# 1 - Create plot viewport: pvp
mar <- c(5, 4, 2, 2)
pvp <- plotViewport(mar)
# 2 - Push pvp
pushViewport(pvp)
# 3 - Add rectangle
grid.rect(gp = gpar(fill = "grey80"))
# Create data viewport: dvp
dvp <- dataViewport(xData = mtcars$wt, yData = mtcars$mpg)
# 4 - Push dvp
pushViewport(dvp)
# Add two axes
grid.xaxis()
grid.yaxis()
# Work from before
pushViewport(plotViewport(c(5, 4, 2, 2)))
grid.rect(gp = gpar())
pushViewport(dataViewport(xData = mtcars$wt, yData = mtcars$mpg))
grid.xaxis()
grid.yaxis()
# 1 - Add text to x axis
grid.text("Weight", y = unit(-3, "lines"))
# 2 - Add text to y axis
grid.text("MPG", x = unit(-3, "lines"), rot = 90)
# 3 - Add points
grid.points(x = mtcars$wt, y = mtcars$mpg, pch = 16)
# Work from before
pushViewport(plotViewport(c(5, 4, 2, 2)))
grid.rect(gp = gpar())
pushViewport(dataViewport(xData = mtcars$wt, yData = mtcars$mpg))
grid.xaxis()
grid.yaxis()
# Work from before - add names
grid.text("Weight", y = unit(-3, "lines"), name = "xaxis")
grid.text("MPG", x = unit(-3, "lines"), rot = 90, name = "yaxis")
grid.points(x = mtcars$wt, y = mtcars$mpg, pch = 16, name = "datapoints")
# Edit "xaxis"
grid.edit("xaxis", label = "Weight (1000 lbs)")
# Edit "yaxis"
grid.edit("yaxis", label = "Miles/(US) gallon")
# Edit "datapoints"
grid.edit("datapoints",
gp = gpar(col = "#C3212766", cex = 2))
# A simple plot p
p <- ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) + geom_point()
# Create gtab with ggplotGrob()
gtab <- ggplotGrob(p)
# Print out gtab
gtab
## TableGrob (10 x 9) "layout": 18 grobs
## z cells name grob
## 1 0 ( 1-10, 1- 9) background rect[plot.background..rect.80]
## 2 5 ( 5- 5, 3- 3) spacer zeroGrob[NULL]
## 3 7 ( 6- 6, 3- 3) axis-l absoluteGrob[GRID.absoluteGrob.56]
## 4 3 ( 7- 7, 3- 3) spacer zeroGrob[NULL]
## 5 6 ( 5- 5, 4- 4) axis-t zeroGrob[NULL]
## 6 1 ( 6- 6, 4- 4) panel gTree[panel-1.gTree.36]
## 7 9 ( 7- 7, 4- 4) axis-b absoluteGrob[GRID.absoluteGrob.49]
## 8 4 ( 5- 5, 5- 5) spacer zeroGrob[NULL]
## 9 8 ( 6- 6, 5- 5) axis-r zeroGrob[NULL]
## 10 2 ( 7- 7, 5- 5) spacer zeroGrob[NULL]
## 11 10 ( 4- 4, 4- 4) xlab-t zeroGrob[NULL]
## 12 11 ( 8- 8, 4- 4) xlab-b titleGrob[axis.title.x..titleGrob.39]
## 13 12 ( 6- 6, 2- 2) ylab-l titleGrob[axis.title.y..titleGrob.42]
## 14 13 ( 6- 6, 6- 6) ylab-r zeroGrob[NULL]
## 15 14 ( 6- 6, 8- 8) guide-box gtable[guide-box]
## 16 15 ( 3- 3, 4- 4) subtitle zeroGrob[plot.subtitle..zeroGrob.77]
## 17 16 ( 2- 2, 4- 4) title zeroGrob[plot.title..zeroGrob.76]
## 18 17 ( 9- 9, 4- 4) caption zeroGrob[plot.caption..zeroGrob.78]
# Extract the grobs from gtab: gtab
g <- gtab$grob
# Draw only the legend
legend_index <- which(vapply(g, inherits, what = "gtable", logical(1)))
grid::grid.draw(g[[legend_index]])
grid
library seems really flaky. None of its functions were working in this code chunk
library(grid)
# Code from before
p <- ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) + geom_point()
gtab <- ggplotGrob(p)
g <- gtab$grobs
legend_index <- which(vapply(g, inherits, what = "gtable", logical(1)))
grid::grid.draw(g[[legend_index]])
# the gtable library is loaded
# 1 - Show layout of legend grob
## this is not printing in the html but it shows fine in the console.
gtable_show_layout(g[[legend_index]])
# Create text grob
my_text <- grid::textGrob(label = "Motor Trend, 1974", gp = gpar(fontsize = 7, col = "gray25"))
# 2 - Use gtable_add_grob to modify original gtab
new_legend <- gtable_add_grob(
gtab$grobs[[legend_index]], my_text, 3, 2)
# 3 - Update in gtab
gtab$grobs[[legend_index]] <- new_legend
# 4 - Draw gtab
grid::grid.draw(gtab)
# Simple plot p
p <- ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) + geom_point()
# Examine class() and names()
class(p)
## [1] "gg" "ggplot"
names(p)
## [1] "data" "layers" "scales" "mapping" "theme"
## [6] "coordinates" "facet" "plot_env" "labels"
# Print the scales sub-list
p$scales$scales
## list()
# Update p
p <- p +
scale_x_continuous("Length", limits = c(4, 8), expand = c(0, 0)) +
scale_y_continuous("Width", limits = c(2, 4.5), expand = c(0, 0))
# Print the scales sub-list
p$scales$scales
## [[1]]
## <ScaleContinuousPosition>
## Range:
## Limits: 4 -- 8
##
## [[2]]
## <ScaleContinuousPosition>
## Range:
## Limits: 2 -- 4.5
# Box plot of mtcars: p
p <- ggplot(mtcars, aes(x = factor(cyl), y = wt)) + geom_boxplot()
# Create pbuild
pbuild <- ggplot_build(p)
# a list of 3 elements
names(pbuild)
## [1] "data" "layout" "plot"
# Print out each element in pbuild
# In the exercise they have panel as an option. But its `plot now`
pbuild$data
## [[1]]
## ymin lower middle upper ymax outliers notchupper
## 1 1.513 1.8850 2.200 2.62250 3.19 2.551336
## 2 2.620 2.8225 3.215 3.44000 3.46 3.583761
## 3 3.170 3.5325 3.755 4.01375 4.07 5.250, 5.424, 5.345 3.958219
## notchlower x PANEL group ymin_final ymax_final xmin xmax weight colour
## 1 1.848664 1 1 1 1.513 3.190 0.625 1.375 1 grey20
## 2 2.846239 2 1 2 2.620 3.460 1.625 2.375 1 grey20
## 3 3.551781 3 1 3 3.170 5.424 2.625 3.375 1 grey20
## fill size alpha shape linetype
## 1 white 0.5 NA 19 solid
## 2 white 0.5 NA 19 solid
## 3 white 0.5 NA 19 solid
pbuild$layout
## <ggproto object: Class Layout>
## facet: <ggproto object: Class FacetNull, Facet>
## compute_layout: function
## draw_back: function
## draw_front: function
## draw_labels: function
## draw_panels: function
## finish_data: function
## init_scales: function
## map: function
## map_data: function
## params: list
## render_back: function
## render_front: function
## render_panels: function
## setup_data: function
## setup_params: function
## shrink: TRUE
## train: function
## train_positions: function
## train_scales: function
## vars: function
## super: <ggproto object: Class FacetNull, Facet>
## finish_data: function
## get_scales: function
## map: function
## map_position: function
## panel_layout: data.frame
## panel_ranges: list
## panel_scales: list
## render: function
## render_labels: function
## reset_scales: function
## setup: function
## train_position: function
## train_ranges: function
## xlabel: function
## ylabel: function
## super: <ggproto object: Class Layout>
## This should show the plot. Not working in the markdown doc though.
pbuild$plot
# Create gtab from pbuild
gtab <- ggplot_gtable(pbuild)
# Draw gtab
grid::grid.draw(gtab)
# Box plot of mtcars: p
p <- ggplot(mtcars, aes(x = factor(cyl), y = wt)) + geom_boxplot()
# Build pdata
pdata <- ggplot_build(p)$data
# confirm that the first element of the list is a data frame
class(pdata[[1]])
## [1] "data.frame"
# Isolate this data frame
my_df <- pdata[[1]]
# The x labels
my_df$group <- ggplot_build(p)$layout$panel_ranges[[1]]$x.labels
# Print out specific variables
my_df[c(1:6, 11)]
## ymin lower middle upper ymax outliers group
## 1 1.513 1.8850 2.200 2.62250 3.19 4
## 2 2.620 2.8225 3.215 3.44000 3.46 6
## 3 3.170 3.5325 3.755 4.01375 4.07 5.250, 5.424, 5.345 8
# Add a theme (legend at the bottom)
g1 <- ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point(alpha = 0.5) +
theme(legend.position = "bottom")
# Add a theme (no legend)
g2 <- ggplot(mtcars, aes(disp, fill = cyl)) +
geom_histogram(position = "identity", alpha = 0.5, binwidth = 20) +
theme(legend.position = "none")
# Load gridExtra
library(gridExtra)
# Call grid.arrange()
grid.arrange(g1, g2, ncol = 2)
# ggplot2, grid and gridExtra have been loaded for you
# Definitions of g1 and g2
g1 <- ggplot(mtcars, aes(wt, mpg, col = cyl)) +
geom_point() +
theme(legend.position = "bottom")
g2 <- ggplot(mtcars, aes(disp, fill = cyl)) +
geom_histogram(binwidth = 20) +
theme(legend.position = "none")
# Extract the legend from g1
my_legend <- ggplotGrob(g1)$grobs[[legend_index]]
# Create g1_noleg
g1_noleg <- g1 +
theme(legend.position = "none")
# Calculate the height: legend_height
legend_height <- sum(my_legend$heights)
# Arrange g1_noleg, g2 and my_legend
grid.arrange(
g1_noleg, g2, my_legend,
layout_matrix = matrix(
c(1,3,2,3),
ncol = 2),
heights = unit.c(unit(1, "npc") - legend_height, legend_height))
 Â
# test_datasets.RData has been loaded
# Call bagplot() on test_data
# The aplpack library has been loaded
head(ch5_test_data)
## x y
## 1 2560 97
## 2 2345 114
## 3 1845 81
## 4 2260 91
## 5 2440 113
## 6 2285 97
bagplot(ch5_test_data)
# Call compute.bagplot on test_data, assign to bag
bag <- compute.bagplot(ch5_test_data)
# Display information
bag$hull.loop
## x y
## 3690 146
## 2840 107
## 1900 73
## 1845 81
## 3325 231
## 3610 232
## 3735 202
## 3735 181
bag$hull.bag
## [,1] [,2]
## 2965.409 133.34581
## 2780.925 121.99004
## 2690.199 116.59625
## 2460.339 103.15300
## 2423.851 101.45165
## 2416.798 101.15107
## 2332.435 98.08723
## 2329.436 98.01789
## sl 2318.892 98.01833
## sl 2297.342 98.02599
## sl 2297.341 98.02648
## sl 2297.184 99.03550
## sl 2311.571 104.81678
## 2312.502 105.04681
## 2331.535 109.10241
## 2359.803 114.74815
## 2379.444 118.65044
## 2396.732 121.60977
## 2558.743 142.23486
## 2574.610 143.96787
## 2618.128 148.28650
## 2647.898 150.86817
## 2674.115 153.07863
## 2741.581 158.07621
## 2750.780 158.71717
## 3026.607 177.86184
## 3059.711 180.12607
## 3226.141 189.73602
## 3287.371 192.79199
## 3425.012 199.59414
## 3448.124 199.76179
## sr 3490.523 200.06172
## 3509.671 199.07448
## 3500.493 195.92219
## 3435.869 177.44535
## 3259.740 155.69116
## 3247.534 154.37051
## 3218.173 151.54236
## 3183.859 148.31374
## 3138.247 144.31759
bag$pxy.outlier
## x y
## [1,] 3320 305
## [2,] 3310 302
## [3,] 3855 305
## [4,] 3850 302
# Highlight components
points(bag$hull.loop, col = "green", pch = 16)
points(bag$hull.bag, col = "orange", pch = 16)
points(bag$pxy.outlier, col = "purple", pch = 16)
# bag and ch5_test_data are available
# Create data frames from matrices
hull.loop <- data.frame(x = bag$hull.loop[,1], y = bag$hull.loop[,2])
hull.bag <- data.frame(x = bag$hull.bag[,1], y = bag$hull.bag[,2])
pxy.outlier <- data.frame(x = bag$pxy.outlier[,1], y = bag$pxy.outlier[,2])
# Finish the ggplot command
ggplot(ch5_test_data, aes(x = x, y = y)) +
geom_polygon(data = hull.loop, fill = "green") +
geom_polygon(data = hull.bag, fill = "orange") +
geom_point(data = pxy.outlier, col = "purple", pch = 16, cex = 1.5)
# ggproto for StatLoop (hull.loop)
StatLoop <- ggproto(
"StatLoop",
Stat,
required_aes = c("x", "y"),
compute_group = function(data, scales) {
bag <- compute.bagplot(x = data$x, y = data$y)
data.frame(x = bag$hull.loop[,1], y = bag$hull.loop[,2])
})
# ggproto for StatBag (hull.bag)
StatBag <- ggproto(
"StatBag",
Stat,
required_aes = c("x", "y"),
compute_group = function(data, scales) {
bag <- compute.bagplot(x = data$x, y = data$y)
data.frame(x = bag$hull.bag[,1], y = bag$hull.bag[,2])
})
# ggproto for StatOut (pxy.outlier)
StatOut <- ggproto(
"StatOut",
Stat,
required_aes = c("x", "y"),
compute_group = function(data, scales) {
bag <- compute.bagplot(x = data$x, y = data$y)
data.frame(x = bag$pxy.outlier[,1], y = bag$pxy.outlier[,2])
})
# StatLoop, StatBag and StatOut are available
# Combine ggproto objects in layers to build stat_bag()
stat_bag <- function(mapping = NULL, data = NULL, geom = polygon,
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, loop = FALSE, ...) {
list(
# StatLoop layer
layer(
stat = StatLoop, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, alpha = 0.35, col = NA, ...)
),
# StatBag layer
layer(
stat = StatBag, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, alpha = 0.35, col = NA, ...)
),
# StatOut layer
layer(
stat = StatOut, data = data, mapping = mapping, geom = "point",
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, alpha = 0.7, col = NA, shape = 21, ...)
)
)
}
# hull.loop, hull.bag and pxy.outlier are available
# stat_bag, ch5_test_data and ch5_test_data2 are available
# Previous method
ggplot(ch5_test_data, aes(x = x, y = y)) +
geom_polygon(data = hull.loop, fill = "green") +
geom_polygon(data = hull.bag, fill = "orange") +
geom_point(data = pxy.outlier, col = "purple", pch = 16, cex = 1.5)
# stat_bag
ggplot(data = ch5_test_data, aes(x = x, y = y)) +
stat_bag(ch5_test_data, fill = 'black')
## Error: Mapping must be created by `aes()` or `aes_()`
# stat_bag on test_data2
ggplot(ch5_test_data2, aes(x = x, y = y, fill = treatment)) +
stat_bag()
## Error: object of type 'closure' is not subsettable
# Import weather data
weather_data_urls <- c(
'https://assets.datacamp.com/production/course_862/datasets/NYNEWYOR.txt',
'https://assets.datacamp.com/production/course_862/datasets/FRPARIS.txt',
'https://assets.datacamp.com/production/course_862/datasets/ILREYKJV.txt',
'https://assets.datacamp.com/production/course_862/datasets/UKLONDON.txt'
)
weather_data_files <- c("data/NYNEWYOR.txt","data/FRPARIS.txt", "data/ILREYKJV.txt", "data/UKLONDON.txt")
download.file(weather_data_urls, weather_data_files)
# Check out the NY weather data
weather <- read.fwf('data/NYNEWYOR.txt',
header = F,
col.names = c("month", "day", "year", "temp"),
widths = c(14, 14, 13, 4))
# Check structure of weather
str(weather)
## 'data.frame': 7824 obs. of 4 variables:
## $ month: num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 1995 1995 1995 1995 1995 ...
## $ temp : num 44 41 28 31 21 27 42 35 34 29 ...
# Create past with two filter() calls
past <- weather %>%
filter(!(month == 2 & day == 29)) %>%
filter(year != 2016)
# Check structure of past
str(past)
## 'data.frame': 7665 obs. of 4 variables:
## $ month: num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 1995 1995 1995 1995 1995 ...
## $ temp : num 44 41 28 31 21 27 42 35 34 29 ...
# Create new version of past
past_summ <- past %>%
group_by(year) %>%
mutate(yearday = 1:length(day)) %>%
ungroup() %>%
filter(temp != -99) %>%
group_by(yearday) %>%
mutate(max = max(temp),
min = min(temp),
avg = mean(temp),
CI_lower = Hmisc::smean.cl.normal(temp)[2],
CI_upper = Hmisc::smean.cl.normal(temp)[3]) %>%
ungroup()
# Structure of past_summ
str(past_summ)
## Classes 'tbl_df', 'tbl' and 'data.frame': 7645 obs. of 10 variables:
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 1995 1995 1995 1995 1995 ...
## $ temp : num 44 41 28 31 21 27 42 35 34 29 ...
## $ yearday : int 1 2 3 4 5 6 7 8 9 10 ...
## $ max : num 51 48 57 55 56 62 52 57 54 47 ...
## $ min : num 17 15 16 15 21 14 14 12 21 8.5 ...
## $ avg : num 35.6 35.4 34.9 35.1 35.9 ...
## $ CI_lower: num 31 31.6 29.7 29.9 31.9 ...
## $ CI_upper: num 40.1 39.2 40 40.4 39.9 ...
head(past_summ)
## # A tibble: 6 x 10
## month day year temp yearday max min avg CI_lower CI_upper
## <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 1995 44 1 51 17 35.57143 31.00682 40.13604
## 2 1 2 1995 41 2 48 15 35.38095 31.57395 39.18796
## 3 1 3 1995 28 3 57 16 34.85714 29.73285 39.98144
## 4 1 4 1995 31 4 55 15 35.14286 29.85929 40.42642
## 5 1 5 1995 21 5 56 21 35.90476 31.89258 39.91695
## 6 1 6 1995 27 6 62 14 35.95238 31.26040 40.64437
Hmisc::smean.cl.normal(past$temp)[2]
## Lower
## 54.69111
table(past$temp)
##
## -99 8.5 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 20 1 3 2 7 8 8 9 6 7 12 17 20 39 23 18 46 38
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## 41 54 69 83 76 82 77 107 109 112 126 113 126 148 144 137 137 135
## 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
## 146 133 135 135 152 132 128 143 133 125 125 123 137 148 144 151 119 121
## 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## 130 127 109 124 119 117 136 119 146 136 177 187 188 180 175 165 158 106
## 80 81 82 83 84 85 86 87 88 89 91 92 93
## 98 90 80 53 42 33 27 19 11 11 4 6 2
table(past_summ$temp)
##
## 8.5 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 1 3 2 7 8 8 9 6 7 12 17 20 39 23 18 46 38 41
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 54 69 83 76 82 77 107 109 112 126 113 126 148 144 137 137 135 146
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 133 135 135 152 132 128 143 133 125 125 123 137 148 144 151 119 121 130
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 127 109 124 119 117 136 119 146 136 177 187 188 180 175 165 158 106 98
## 81 82 83 84 85 86 87 88 89 91 92 93
## 90 80 53 42 33 27 19 11 11 4 6 2
# Adapt historical plot
ggplot(past_summ, aes(x = yearday, y = temp)) +
geom_point(
col = "#EED8AE",
alpha = 0.3,
shape = 16) +
geom_linerange(
aes(ymin = CI_lower, ymax = CI_upper),
col = "#8B7E66")
# weather and past are available in your workspace
# Create present
present <- weather %>%
filter(!(month == 2 & day == 29)) %>%
filter(year == max(year)) %>%
group_by(year) %>%
mutate(yearday = 1:length(day)) %>%
ungroup() %>%
filter(temp != -99)
# Add geom_line to ggplot command
ggplot(past_summ, aes(x = yearday, y = temp)) +
geom_point(
col = "#EED8AE",
alpha = 0.3,
shape = 16) +
geom_linerange(
aes(ymin = CI_lower, ymax = CI_upper),
col = "#8B7E66") +
geom_line(data = present)
# Create past_highs
past_highs <- past_summ %>%
group_by(yearday) %>%
summarise(past_high = max(temp))
# Create record_high
record_high <- present %>%
left_join(past_highs) %>%
filter(temp > past_high)
record_high
## # A tibble: 11 x 6
## month day year temp yearday past_high
## <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 1 10 2016 50 10 47
## 2 2 4 2016 54 35 47
## 3 2 21 2016 55 52 54
## 4 3 9 2016 54 68 52
## 5 3 10 2016 70 69 56
## 6 3 11 2016 61 70 56
## 7 3 25 2016 55 84 54
## 8 4 1 2016 62 91 60
## 9 4 22 2016 70 112 66
## 10 5 26 2016 82 146 80
## 11 5 28 2016 79 148 78
# Add record_high information to plot
ggplot(past_summ, aes(x = yearday, y = temp)) +
geom_point(col = "#EED8AE", alpha = 0.3, shape = 16) +
geom_linerange(aes(ymin = CI_lower, ymax = CI_upper), col = "#8B7E66") +
geom_line(data = present) +
geom_point(data = record_high, col = "#CD2626")
# Create past_extremes
past_extremes <- past_summ %>%
group_by(yearday) %>%
summarise(past_low = min(temp),
past_high = max(temp))
# Create record_high_low
record_high_low <- present %>%
left_join(past_extremes) %>%
mutate(record = ifelse(temp < past_low,
"#0000CD",
ifelse(temp > past_high,
"#CD2626",
"#00000000")))
# Structure of record_high_low
str(record_high_low)
## Classes 'tbl_df', 'tbl' and 'data.frame': 153 obs. of 8 variables:
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 2016 2016 2016 2016 2016 ...
## $ temp : num 41 37 40 33 19 32 39 40 43 50 ...
## $ yearday : int 1 2 3 4 5 6 7 8 9 10 ...
## $ past_low : num 17 15 16 15 21 14 14 12 21 8.5 ...
## $ past_high: num 51 48 57 55 56 62 52 57 54 47 ...
## $ record : chr "#00000000" "#00000000" "#00000000" "#00000000" ...
head(record_high_low)
## # A tibble: 6 x 8
## month day year temp yearday past_low past_high record
## <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr>
## 1 1 1 2016 41 1 17 51 #00000000
## 2 1 2 2016 37 2 15 48 #00000000
## 3 1 3 2016 40 3 16 57 #00000000
## 4 1 4 2016 33 4 15 55 #00000000
## 5 1 5 2016 19 5 21 56 #0000CD
## 6 1 6 2016 32 6 14 62 #00000000
# Add point layer of record_high_low
p <- ggplot(past_summ, aes(x = yearday, y = temp)) +
geom_point(col = "#EED8AE", alpha = 0.3, shape = 16) +
geom_linerange(aes(ymin = CI_lower, ymax = CI_upper), col = "#8B7E66") +
geom_line(data = present) +
geom_point(data = record_high_low, aes(col = record)) +
scale_color_identity()
p
# Finish the function draw_pop_legend
draw_pop_legend <- function(x = 0.6, y = 0.2, width = 0.2, height = 0.2, fontsize = 10) {
# Finish viewport() function
pushViewport(viewport(x = x, y = y, width = width, height = height, just = "center"))
legend_labels <- c("Past record high",
"95% CI range",
"Current year",
"Past years",
"Past record low")
legend_position <- c(0.9, 0.7, 0.5, 0.2, 0.1)
# Finish grid.text() function
grid.text(label = legend_labels, x = 0.12, y = legend_position,
just = "left",
gp = gpar(fontsize = fontsize, col = "grey20"))
# Position dots, rectangle and line
point_position_y <- c(0.1, 0.2, 0.9)
point_position_x <- rep(0.06, length(point_position_y))
grid.points(x = point_position_x, y = point_position_y, pch = 16,
gp = gpar(col = c("#0000CD", "#EED8AE", "#CD2626")))
grid.rect(x = 0.06, y = 0.5, width = 0.06, height = 0.4,
gp = gpar(col = NA, fill = "#8B7E66"))
grid.lines(x = c(0.03, 0.09), y = c(0.5, 0.5),
gp = gpar(col = "black", lwd = 3))
# Add popViewport() for bookkeeping
popViewport()
}
# Plotting object p, from previous exercise
p
# Call draw_pop_legend()
draw_pop_legend()
# Finish the clean_weather function
clean_weather <- function(file) {
weather <- read.fwf(file,
header = FALSE,
col.names = c("month", "day", "year", "temp"),
widths = c(14, 14, 13, 4))
weather %>%
filter(!(month == 2 & day == 29)) %>%
group_by(year) %>%
mutate(yearday = 1:length(day)) %>%
ungroup() %>%
filter(temp != -99)
}
# Import NYNEWYOR.txt: my_data
my_data <- clean_weather('data/NYNEWYOR.txt')
ny_weather_data_url <- 'https://assets.datacamp.com/production/course_862/datasets/NYNEWYOR.txt'
download.file(ny_weather_data_url, 'data/NYNEWYOR.txt')
# Create the stats object
StatHistorical <- ggproto("StatHistorical", Stat,
compute_group = function(data, scales, params) {
data <- data %>%
filter(year != max(year)) %>%
group_by(x) %>%
mutate(ymin = Hmisc::smean.cl.normal(y)[3],
ymax = Hmisc::smean.cl.normal(y)[2]) %>%
ungroup()
},
required_aes = c("x", "y", "year"))
# Create the layer
stat_historical <- function(mapping = NULL, data = NULL, geom = "point",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
list(
layer(
stat = "identity", data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, col = "#EED8AE", alpha = 0.3, shape = 16, ...)
),
layer(
stat = StatHistorical, data = data, mapping = mapping, geom = "linerange",
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, col = "#8B7E66", ...)
)
)
}
# Build the plot
my_data <- clean_weather("data/NYNEWYOR.txt")
ggplot(my_data, aes(x = yearday, y = temp, year = year)) +
stat_historical()
# Create the stats object
StatPresent <- ggproto("StatPresent", Stat,
compute_group = function(data, scales, params) {
data <- filter(data, year == max(year))
},
required_aes = c("x", "y", "year"))
# Create the layer
stat_present <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatPresent, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
# Build the plot
my_data <- clean_weather("data/NYNEWYOR.txt")
ggplot(my_data, aes(x = yearday, y = temp, year = year)) +
stat_historical() +
stat_present()
# Create the stats object
StatExtremes <- ggproto("StatExtremes", Stat,
compute_group = function(data, scales, params) {
present <- data %>%
filter(year == max(year))
past <- data %>%
filter(year != max(year))
past_extremes <- past %>%
group_by(x) %>%
summarise(past_low = min(y),
past_high = max(y))
# transform data to contain extremes
data <- present %>%
left_join(past_extremes) %>%
mutate(record = ifelse(y < past_low,
"#0000CD",
ifelse(y > past_high,
"#CD2626",
"#00000000")))
},
required_aes = c("x", "y", "year"))
# Create the layer
stat_extremes <- function(mapping = NULL, data = NULL, geom = "point",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatExtremes, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
# Build the plot
my_data <- clean_weather("data/NYNEWYOR.txt")
ggplot(my_data, aes(x = yearday, y = temp, year = year)) +
stat_historical() +
stat_present() +
stat_extremes(aes(col = ..record..)) +
scale_color_identity() # Colour specification
# File paths of all datasets
my_files <- c("data/NYNEWYOR.txt","data/FRPARIS.txt", "data/ILREYKJV.txt", "data/UKLONDON.txt")
# Build my_data with a for loop
my_data <- NULL
for (file in my_files) {
temp <- clean_weather(file)
temp$id <- sub(".txt", "", file)
my_data <- rbind(my_data, temp)
}
str(my_data)
## Classes 'tbl_df', 'tbl' and 'data.frame': 31017 obs. of 6 variables:
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : num 1 2 3 4 5 6 7 8 9 10 ...
## $ year : num 1995 1995 1995 1995 1995 ...
## $ temp : num 44 41 28 31 21 27 42 35 34 29 ...
## $ yearday: int 1 2 3 4 5 6 7 8 9 10 ...
## $ id : chr "data/NYNEWYOR" "data/NYNEWYOR" "data/NYNEWYOR" "data/NYNEWYOR" ...
# Build the final plot, from scratch!
ggplot(my_data, aes(x = yearday, y = temp, year = year)) +
stat_historical() +
stat_present() +
stat_extremes(aes(col = ..record..)) +
scale_color_identity() + # specify colour here
facet_wrap(~ id, ncol = 2)