ggplot2
is a hugely powerful R package which enables the creation of high quality scientific charting and visualisation for communication.
This note walks through some basics for creating charts to illustrate the literature review but there are lots of resources for chart creation of which this course really showcases what is possible.
Load libraries and import data
Code
## load libraries
library (pacman)
pacman:: p_load (tidyverse, readxl, here, skimr, overviewR, ggmap, gt, gtsummary, DataExplorer, gtExtras, rphylopic, magick) ## readxl is needed to load excel files into R
path <- here:: here ("/Users/julianflowers/Dropbox/My Mac (Julians-MBP-2)/Downloads" ) ## the `here` package helps with file paths
xls <- list.files (path, "xls" , full.names = TRUE )
data <- readxl:: read_xlsx (xls[13 ]) |>
janitor:: clean_names ()
New names:
• `Deer (cervine)` -> `Deer (cervine)...10`
• `Livestock (not stated)` -> `Livestock (not stated)...11`
• `Simulated grazing` -> `Simulated grazing...13`
• `Deer (cervine)` -> `Deer (cervine)...35`
• `Simulated grazing` -> `Simulated grazing...41`
• `Livestock (not stated)` -> `Livestock (not stated)...42`
Code
# A tibble: 1 × 9
rows columns discrete_columns conti…¹ all_m…² total…³ compl…⁴ total…⁵ memor…⁶
<int> <int> <int> <int> <int> <int> <int> <int> <dbl>
1 45 99 82 0 17 1815 0 4455 176168
# … with abbreviated variable names ¹continuous_columns, ²all_missing_columns,
# ³total_missing_values, ⁴complete_rows, ⁵total_observations, ⁶memory_usage
Data wrangling
The dataset is wide - that is each variable or category has its own column, but for analysis and plotting it might be easier to groups some of the variables to make the data more hierarchical. For example climate change outcome has a set of outcome categories each of which have a direction of change.
The skimr
package gives us a rapid overview of the data structure…
Code
Data summary
Name
data
Number of rows
45
Number of columns
99
_______________________
Column type frequency:
character
82
logical
17
________________________
Group variables
None
Variable type: character
item
0
1.00
24
44
0
45
0
title
0
1.00
66
160
0
27
0
wildlife_trusts_relevance
0
1.00
4
4
0
1
0
evidence_point
0
1.00
17
17
0
7
0
herbivore_species_or_simulation_exposure
2
0.96
14
34
0
9
0
horses_equines
43
0.04
16
22
0
2
0
cows_bovines
12
0.73
6
26
0
11
0
sheep_ovines
33
0.27
6
21
0
4
0
livestock_not_stated_11
44
0.02
23
23
0
1
0
simulated_grazing_13
39
0.13
8
8
0
1
0
mixed_herd_or_single_species_exposure
0
1.00
6
15
0
3
0
climate_effect_outcome
1
0.98
12
218
0
14
0
soil_carbon
36
0.20
8
10
0
4
0
digestive_methane_emissions_production_or_live_weight
37
0.18
8
10
0
3
0
digestive_methane_emissions_land_area
41
0.09
9
9
0
2
0
digestive_methane_emissions_per_animal
40
0.11
8
10
0
4
0
methane_flux_emissions
34
0.24
8
10
0
4
0
nitrous_oxide
23
0.49
8
10
0
3
0
co2_flux_emissions
33
0.27
9
10
0
3
0
above_ground_biomass
42
0.07
9
10
0
2
0
total_ghg_emissions_co2_equiv
39
0.13
8
9
0
3
0
root_biomass
42
0.07
9
10
0
2
0
direction_of_climate_change_outcome
2
0.96
10
20
0
4
0
herbivore_species_or_other_comparator
0
1.00
7
30
0
9
0
sheep_ovine
35
0.22
6
21
0
4
0
cows_bovine
22
0.51
10
26
0
7
0
mowing
44
0.02
7
7
0
1
0
cutting
44
0.02
15
15
0
1
0
no_herbivore_or_simulation
35
0.22
27
27
0
1
0
not_applicable
44
0.02
15
15
0
1
0
simulated_grazing_41
43
0.04
8
8
0
1
0
livestock_not_stated_42
44
0.02
23
23
0
1
0
mixed_herd_or_single_species_comparator
0
1.00
6
15
0
3
0
herbivore_density_frequency_exposure
0
1.00
12
40
0
8
0
herbivore_density_frequency_comparator
0
1.00
5
30
0
8
0
continent_and_country
0
1.00
7
47
0
3
0
australia_and_oceania
44
0.02
12
12
0
1
0
europe
1
0.98
6
32
0
10
0
north_america
43
0.04
4
4
0
1
0
latitude_n_s
0
1.00
14
125
0
28
0
longitude_e_w
0
1.00
14
126
0
28
0
elevation
0
1.00
14
190
0
18
0
site_name
0
1.00
14
264
0
29
0
study_characteristics
43
0.04
206
418
0
2
0
soil_type
0
1.00
5
28
0
12
0
current_land_use
0
1.00
11
56
0
7
0
previous_land_use
0
1.00
11
56
0
7
0
vegetation_type
0
1.00
14
517
0
24
0
habitat_management_exposure
1
0.98
5
37
0
10
0
habitat_management_comparator
4
0.91
5
37
0
12
0
herbivory_season_exposure
1
0.98
7
25
0
10
0
herbivory_season_comparator
0
1.00
7
25
0
10
0
effect_on_plants_or_soil_exposure
0
1.00
5
20
0
7
0
effect_on_plants_or_soil_comparator
0
1.00
5
28
0
11
0
herbivore_management_exposure
0
1.00
14
46
0
7
0
herbivore_management_comparator
0
1.00
14
46
0
7
0
supplementary_feeding_or_treatment_exposure
0
1.00
5
43
0
4
0
supplementary_feeding_or_teatment_comparator
0
1.00
5
15
0
3
0
forage_species_exposure
0
1.00
11
15
0
2
0
forage_species_comparator
0
1.00
11
15
0
2
0
other_comparator_differences_please_add
0
1.00
15
124
0
11
0
introduced_status_exposure
0
1.00
15
16
0
2
0
introduced_status_comparator
0
1.00
15
16
0
2
0
conservation_status
1
0.98
11
97
0
7
0
modifiers
44
0.02
50
50
0
1
0
soil_temperature
3
0.93
11
143
0
8
0
air_temperature
1
0.98
11
325
0
26
0
precipitation
1
0.98
11
189
0
23
0
growing_season
1
0.98
11
95
0
10
0
permafrost_or_snow
1
0.98
11
68
0
4
0
disturbance_events
1
0.98
8
15
0
3
0
predators
1
0.98
15
15
0
2
0
other_herbivores
1
0.98
15
54
0
3
0
habitat_productivity
1
0.98
11
105
0
3
0
biome
1
0.98
10
10
0
1
0
lowland_or_upland_300m_exposure
0
1.00
7
15
0
4
0
habitat_exposure
0
1.00
10
44
0
9
0
acidity_exposure
1
0.98
7
23
0
5
0
lowland_or_upland_300m_comparator
0
1.00
7
15
0
4
0
habitat_comparator
0
1.00
10
44
0
8
0
acidity_comparator
1
0.98
7
23
0
5
0
comments
0
1.00
239
3591
0
45
0
Variable type: logical
goats_caprines
45
0
NaN
:
deer_cervine_10
45
0
NaN
:
simulated_browsing
45
0
NaN
:
albedo
45
0
NaN
:
dung_methane_emissions
45
0
NaN
:
wildfire
45
0
NaN
:
goats_caprine
45
0
NaN
:
horses_equine
45
0
NaN
:
deer_cervine_35
45
0
NaN
:
not_available
45
0
NaN
:
global
45
0
NaN
:
africa
45
0
NaN
:
antarctica
45
0
NaN
:
asia
45
0
NaN
:
south_america_and_central_america
45
0
NaN
:
habitat_type_exposure
45
0
NaN
:
habitat_type_comparator
45
0
NaN
:
Lets try and reshape the climate effects variables.
Code
climate_effect <- data |>
select (item, title, albedo: direction_of_climate_change_outcome)
climate_effect_long <- climate_effect |>
pivot_longer (names_to = "vars" , values_to = "vals" , cols = 3 : 16 )
pluck (climate_effect_long, "vars" ) |>
unique ()
[1] "albedo"
[2] "soil_carbon"
[3] "digestive_methane_emissions_production_or_live_weight"
[4] "digestive_methane_emissions_land_area"
[5] "digestive_methane_emissions_per_animal"
[6] "dung_methane_emissions"
[7] "methane_flux_emissions"
[8] "nitrous_oxide"
[9] "co2_flux_emissions"
[10] "above_ground_biomass"
[11] "total_ghg_emissions_co2_equiv"
[12] "wildfire"
[13] "root_biomass"
[14] "direction_of_climate_change_outcome"
Recoding variables to create groups
Code
cel <- climate_effect_long |>
mutate (var_cat = case_when (str_detect (vars, "methane" ) ~ "methane" ,
str_detect (vars, "biomass" ) ~ "biomass" ,
str_detect (vars, "emissions" ) ~ "emissions" ,
TRUE ~ vars))
gt <- cel |>
janitor:: tabyl (vars, vals) |>
gt:: gt ()
gt |>
tab_header (title = "Climate change outcomes" ) |>
tab_spanner (
label = "Outcome" ,
columns = 2 : 9
) |>
tab_row_group (
label = "Biomass" ,
rows = c (1 , 11 )
) |>
tab_row_group (
label = "Methane" ,
rows = c (5 : 9 )
)
vars
Outcome
-Cooling effect
-Decrease
-Heating effect
-Increase
-No change
-Unclear
-Unclear or multiple
NA_
Methane
digestive_methane_emissions_per_animal
0
2
0
1
1
1
0
40
digestive_methane_emissions_production_or_live_weight
0
0
0
1
6
1
0
37
direction_of_climate_change_outcome
7
0
15
0
12
0
9
2
dung_methane_emissions
0
0
0
0
0
0
0
45
methane_flux_emissions
0
1
0
4
5
1
0
34
Biomass
above_ground_biomass
0
2
0
0
1
0
0
42
root_biomass
0
0
0
2
1
0
0
42
albedo
0
0
0
0
0
0
0
45
co2_flux_emissions
0
5
0
5
2
0
0
33
digestive_methane_emissions_land_area
0
1
0
3
0
0
0
41
nitrous_oxide
0
0
0
10
10
2
0
23
soil_carbon
0
3
0
1
4
1
0
36
total_ghg_emissions_co2_equiv
0
2
0
3
0
1
0
39
wildfire
0
0
0
0
0
0
0
45
Lets do similar data on herbivores.
Code
#| label: download phylopic images
dir.create (paste0 (here (), "/images" ))
image_path <- here ("images" )
taxa <- "sheep"
ns <- name_search (taxa, options = "namebankID" )[1 ,]
id <- name_images (uuid = ns$ uid[1 ])
uid <- id$ same[[4 ]]$ uid
size <- 64
img <- paste0 ("http://phylopic.org/assets/images/submissions/" , uid, "." , size, ".png" ) %>%
image_read ()
img |>
image_write (paste0 (image_path, "/sheep.png" ))
Table decoration with images
Code
## Create a table of images
image_files <- list.files (here:: here ("/Users/julianflowers/Dropbox/My Mac (Julians-MBP-2)/Downloads" ), "png" , full.names = T)
taxa <- c ("Cows" , "Goat" , "Horses" , "Sheep" )
image_tab <- tibble (taxa, image_files = image_files[c (1 : 3 , 8 )])
data |>
select (item, contains ("ine" ), - continent_and_country) |>
pivot_longer (names_to = "Herbivores" , values_to = "Breed" , cols = 2 : 11 , values_drop_na = TRUE ) |>
mutate (Herbivore_Group = case_when (str_detect (Herbivores, "ines" ) ~ "Outcome" ,
TRUE ~ "Comparator" )) |>
group_by (Herbivore_Group, Herbivores) |>
count (Breed) |>
mutate (Herbivores = str_remove_all (Herbivores, "_.*ine?." ),
Herbivores = str_to_title (Herbivores)) |>
pivot_wider (names_from = "Herbivore_Group" , values_from = "n" , values_fill = 0 ) |>
mutate (Breed = str_remove (Breed, "-" ),
Breed = str_remove (Breed, " \\ r \\ n" )) |>
left_join (image_tab, by = c ("Herbivores" = "taxa" )) |>
select (Herbivory = image_files, Breed, Outcome, Comparator) |>
gt:: gt () |>
gt_img_rows (Herbivory, img_source = "local" ) |>
gt_theme_guardian ()
Adding missing grouping variables: `Herbivores`