The purpose of this notebook is to illustrate how data can be manipulated or visualised using strings as variables.
library(tidyverse)
library(leaflet)
library(sf)
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
iris <- as_tibble(iris)
iris
## # A tibble: 150 x 5
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <dbl> <dbl> <dbl> <dbl> <fct>
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## # ... with 140 more rows
storms
## # A tibble: 10,010 x 13
## name year month day hour lat long status category wind
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <chr> <ord> <int>
## 1 Amy 1975 6 27 0 27.5 -79 tropical depr… -1 25
## 2 Amy 1975 6 27 6 28.5 -79 tropical depr… -1 25
## 3 Amy 1975 6 27 12 29.5 -79 tropical depr… -1 25
## 4 Amy 1975 6 27 18 30.5 -79 tropical depr… -1 25
## 5 Amy 1975 6 28 0 31.5 -78.8 tropical depr… -1 25
## 6 Amy 1975 6 28 6 32.4 -78.7 tropical depr… -1 25
## 7 Amy 1975 6 28 12 33.3 -78 tropical depr… -1 25
## 8 Amy 1975 6 28 18 34 -77 tropical depr… -1 30
## 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm 0 35
## 10 Amy 1975 6 29 6 34 -74.8 tropical storm 0 40
## # ... with 10,000 more rows, and 3 more variables: pressure <int>,
## # ts_diameter <dbl>, hu_diameter <dbl>
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
select_if(starwars, funs(is.numeric))
## # A tibble: 87 x 3
## height mass birth_year
## <int> <dbl> <dbl>
## 1 172 77 19
## 2 167 75 112
## 3 96 32 33
## 4 202 136 41.9
## 5 150 49 19
## 6 178 120 52
## 7 165 75 47
## 8 97 32 NA
## 9 183 84 24
## 10 182 77 57
## # ... with 77 more rows
select_at(starwars, vars("name", "height"))
## # A tibble: 87 x 2
## name height
## <chr> <int>
## 1 Luke Skywalker 172
## 2 C-3PO 167
## 3 R2-D2 96
## 4 Darth Vader 202
## 5 Leia Organa 150
## 6 Owen Lars 178
## 7 Beru Whitesun lars 165
## 8 R5-D4 97
## 9 Biggs Darklighter 183
## 10 Obi-Wan Kenobi 182
## # ... with 77 more rows
rename_all(starwars, funs(paste0("sw_", .)))
## # A tibble: 87 x 13
## sw_name sw_height sw_mass sw_hair_color sw_skin_color sw_eye_color
## <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Luke Skywal… 172 77 blond fair blue
## 2 C-3PO 167 75 <NA> gold yellow
## 3 R2-D2 96 32 <NA> white, blue red
## 4 Darth Vader 202 136 none white yellow
## 5 Leia Organa 150 49 brown light brown
## 6 Owen Lars 178 120 brown, grey light blue
## 7 Beru Whites… 165 75 brown light blue
## 8 R5-D4 97 32 <NA> white, red red
## 9 Biggs Darkl… 183 84 black light brown
## 10 Obi-Wan Ken… 182 77 auburn, white fair blue-gray
## # ... with 77 more rows, and 7 more variables: sw_birth_year <dbl>,
## # sw_gender <chr>, sw_homeworld <chr>, sw_species <chr>,
## # sw_films <list>, sw_vehicles <list>, sw_starships <list>
rename_if(starwars, funs(is.numeric), funs(str_to_upper))
## # A tibble: 87 x 13
## name HEIGHT MASS hair_color skin_color eye_color BIRTH_YEAR gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
rename_at(starwars, vars("name", "height"), funs(str_to_upper))
## # A tibble: 87 x 13
## NAME HEIGHT mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter_at(starwars, vars(contains("color")), all_vars(. == "brown"))
## # A tibble: 1 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Wicket S… 88 20 brown brown brown 8 male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter_at(starwars, vars(contains("color")), any_vars(. == "brown"))
## # A tibble: 31 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Leia Or… 150 49 brown light brown 19 female
## 2 Beru Wh… 165 75 brown light blue 47 female
## 3 Biggs D… 183 84 black light brown 24 male
## 4 Chewbac… 228 112 brown unknown blue 200 male
## 5 Han Solo 180 80 brown fair brown 29 male
## 6 Wedge A… 170 77 brown fair hazel 21 male
## 7 Jek Ton… 180 110 brown fair blue NA male
## 8 Yoda 66 17 white green brown 896 male
## 9 Boba Fe… 183 78.2 black fair brown 31.5 male
## 10 Lando C… 177 79 black dark brown 31 male
## # ... with 21 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter_if(starwars, is.numeric, all_vars(. > 100))
## # A tibble: 2 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Chewbac… 228 112 brown unknown blue 200 male
## 2 Jabba D… 175 1358 <NA> green-tan,… orange 600 herma…
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter_if(starwars, is.character, any_vars(. > 100))
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
mutate_all(starwars, as.character)
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red <NA> <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <chr>, vehicles <chr>, starships <chr>
mutate_if(starwars, funs(is.character), funs(as.factor))
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <fct> <int> <dbl> <fct> <fct> <fct> <dbl> <fct>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <fct>,
## # species <fct>, films <list>, vehicles <list>, starships <list>
mutate_at(starwars, vars("height"), funs(. / 10))
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 17.2 77 blond fair blue 19 male
## 2 C-3PO 16.7 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 9.6 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 20.2 136 none white yellow 41.9 male
## 5 Leia Or… 15 49 brown light brown 19 female
## 6 Owen La… 17.8 120 brown, gr… light blue 52 male
## 7 Beru Wh… 16.5 75 brown light blue 47 female
## 8 R5-D4 9.7 32 <NA> white, red red NA <NA>
## 9 Biggs D… 18.3 84 black light brown 24 male
## 10 Obi-Wan… 18.2 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
starwars %>%
select(1:3) %>%
mutate(sum(height, mass, na.rm=T))
## # A tibble: 87 x 4
## name height mass `sum(height, mass, na.rm = T)`
## <chr> <int> <dbl> <dbl>
## 1 Luke Skywalker 172 77 19864.
## 2 C-3PO 167 75 19864.
## 3 R2-D2 96 32 19864.
## 4 Darth Vader 202 136 19864.
## 5 Leia Organa 150 49 19864.
## 6 Owen Lars 178 120 19864.
## 7 Beru Whitesun lars 165 75 19864.
## 8 R5-D4 97 32 19864.
## 9 Biggs Darklighter 183 84 19864.
## 10 Obi-Wan Kenobi 182 77 19864.
## # ... with 77 more rows
starwars %>%
select(1:3) %>%
group_by_all() %>%
mutate(sum(height, mass, na.rm=T))
## # A tibble: 87 x 4
## # Groups: name, height, mass [87]
## name height mass `sum(height, mass, na.rm = T)`
## <chr> <int> <dbl> <dbl>
## 1 Luke Skywalker 172 77 249
## 2 C-3PO 167 75 242
## 3 R2-D2 96 32 128
## 4 Darth Vader 202 136 338
## 5 Leia Organa 150 49 199
## 6 Owen Lars 178 120 298
## 7 Beru Whitesun lars 165 75 240
## 8 R5-D4 97 32 129
## 9 Biggs Darklighter 183 84 267
## 10 Obi-Wan Kenobi 182 77 259
## # ... with 77 more rows
starwars %>%
select(1:3) %>%
group_by_if(is.character) %>%
count()
## # A tibble: 87 x 2
## # Groups: name [87]
## name n
## <chr> <int>
## 1 Ackbar 1
## 2 Adi Gallia 1
## 3 Anakin Skywalker 1
## 4 Arvel Crynyd 1
## 5 Ayla Secura 1
## 6 Bail Prestor Organa 1
## 7 Barriss Offee 1
## 8 BB8 1
## 9 Ben Quadinaros 1
## 10 Beru Whitesun lars 1
## # ... with 77 more rows
group_by_at(starwars, vars("eye_color", "hair_color")) %>%
count()
## # A tibble: 35 x 3
## # Groups: eye_color, hair_color [35]
## eye_color hair_color n
## <chr> <chr> <int>
## 1 black none 9
## 2 black <NA> 1
## 3 blue auburn 1
## 4 blue auburn, grey 1
## 5 blue black 2
## 6 blue blond 3
## 7 blue brown 7
## 8 blue brown, grey 1
## 9 blue none 3
## 10 blue white 1
## # ... with 25 more rows
starwars %>%
select_if(funs(is.numeric)) %>%
summarise_all(funs(mean), na.rm = T)
## # A tibble: 1 x 3
## height mass birth_year
## <dbl> <dbl> <dbl>
## 1 174. 97.3 87.6
summarise_if(starwars, funs(is.numeric), funs(min, median, mean, sd, max), na.rm = T)
## # A tibble: 1 x 15
## height_min mass_min birth_year_min height_median mass_median
## <dbl> <dbl> <dbl> <int> <dbl>
## 1 66 15 8 180 79
## # ... with 10 more variables: birth_year_median <dbl>, height_mean <dbl>,
## # mass_mean <dbl>, birth_year_mean <dbl>, height_sd <dbl>,
## # mass_sd <dbl>, birth_year_sd <dbl>, height_max <dbl>, mass_max <dbl>,
## # birth_year_max <dbl>
starwars %>%
summarise_if(funs(is.numeric), funs(min, median, mean, sd, max), na.rm = T) %>%
gather() %>%
arrange(key)
## # A tibble: 15 x 2
## key value
## <chr> <dbl>
## 1 birth_year_max 896
## 2 birth_year_mean 87.6
## 3 birth_year_median 52
## 4 birth_year_min 8
## 5 birth_year_sd 155.
## 6 height_max 264
## 7 height_mean 174.
## 8 height_median 180
## 9 height_min 66
## 10 height_sd 34.8
## 11 mass_max 1358
## 12 mass_mean 97.3
## 13 mass_median 79
## 14 mass_min 15
## 15 mass_sd 169.
summarise_at(starwars, vars("height", "mass"), funs(sum, mean), na.rm = T)
## # A tibble: 1 x 4
## height_sum mass_sum height_mean mass_mean
## <int> <dbl> <dbl> <dbl>
## 1 14123 5741. 174. 97.3
starwars %>%
summarise_all(funs(sum(is.na(.)))) %>%
select_if(any_vars(. > 0))
## # A tibble: 1 x 7
## height mass hair_color birth_year gender homeworld species
## <int> <int> <int> <int> <int> <int> <int>
## 1 6 28 5 44 3 10 5
select(iris, !!sym("Sepal.Width")) %>% as_tibble()
## # A tibble: 150 x 1
## Sepal.Width
## <dbl>
## 1 3.5
## 2 3
## 3 3.2
## 4 3.1
## 5 3.6
## 6 3.9
## 7 3.4
## 8 3.4
## 9 2.9
## 10 3.1
## # ... with 140 more rows
select(iris, !!!syms(c("Sepal.Width", "Sepal.Length"))) %>% as_tibble()
## # A tibble: 150 x 2
## Sepal.Width Sepal.Length
## <dbl> <dbl>
## 1 3.5 5.1
## 2 3 4.9
## 3 3.2 4.7
## 4 3.1 4.6
## 5 3.6 5
## 6 3.9 5.4
## 7 3.4 4.6
## 8 3.4 5
## 9 2.9 4.4
## 10 3.1 4.9
## # ... with 140 more rows
df <- iris
x_var <- "Sepal.Width"
y_var <- "Sepal.Length"
colour_var <- "Species"
#facet_var <- "Species"
#title <- "Iris sepal length and width by species"
#subtitle <- "1936"
#x_title <- "Species"
#y_title <- "Sepal length"
#caption <- "Note: this is the famous Fisher's iris data set"
ggplot(df, aes(x = !!sym(x_var), y = !!sym(y_var), colour = !!sym(colour_var))) +
geom_point(show.legend = F) +
#facet_wrap(as.formula(paste0("~", facet_var))) +
#labs(title = title, subtitle = subtitle, x = x_title, y = y_title, caption = caption) +
scale_colour_brewer(palette = "Paired")
df <- iris
x_var <- "Species"
y_var <- "Sepal.Length"
colour_var <- "Species"
#facet_var <- "Species"
#title <- "Iris sepal length by species"
#subtitle <- "1936"
#x_title <- "Species"
#y_title <- "Sepal length"
#caption <- "Note: this is the famous Fisher's iris data set"
df %>%
group_by_at(vars(x_var)) %>% #if you are faceting or colouring by different variables, you need to add these
summarise_at(vars(y_var), funs("mean")) %>%
ggplot(aes(x = !!sym(x_var), y = !!sym(y_var), fill = !!sym(colour_var))) +
geom_bar(stat = "identity", colour = "black", show.legend = F) +
#facet_wrap(as.formula(paste0("~", facet_var))) +
#labs(title = title, subtitle = subtitle, x = x_title, y = y_title, caption = caption) +
scale_fill_brewer(palette = "Paired")
#select inputs
df <- storms #filter if necessary
z_var <- "wind"
colour_no <- 4
colour_reverse <- F
legend_digits <- 0
#legend_title <- paste("Wind speed", "km/hr", "1975\u20132015", sep = "<br/>")
#adjust generic code only if necessary
if (is.character(df[[z_var]]) | is.factor(df[[z_var]])) pal_col_name <- "Set1"
if (is.numeric(df[[z_var]])) pal_col_name <- "Spectral"
pal <- RColorBrewer::brewer.pal(colour_no, pal_col_name)
#pal <- c("#d73027","#A8A8A8","#4575b4") #manually add a colour vector in
if (is.character(df[[z_var]]) | is.factor(df[[z_var]]) | is.logical(df[[z_var]])) pal_fun <- colorFactor(palette = pal, domain = df[[z_var]], reverse = colour_reverse)
if (is.numeric(df[[z_var]])) pal_fun <- colorBin(palette = pal, domain = df[[z_var]], bins = colour_no, pretty = T, reverse = colour_reverse)
#if (is.numeric(df[[z_var]])) pal_fun <- colorBin(palette = pal, domain = df[[z_var]], bins=quantile(df[[z_var]], probs=seq(0, 1, 1/colour_no)), reverse = colour_reverse) #quantiles
#if (is.numeric(df[[z_var]])) pal_fun <- colorNumeric(palette = pal, domain = df[[z_var]], reverse = colour_reverse)
leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = df,
color = ~pal_fun(df[[z_var]]),
label = ~as.character(df[[z_var]]),
radius = 2,
opacity = 1,
fillOpacity = 1,
weight = 2
) %>%
addLegend(pal = pal_fun, values = df[[z_var]],
#title = legend_title,
position = "bottomright",
opacity = 1,
labFormat = labelFormat(between = "–", digits = legend_digits)
)