Import data

palm <- read_excel("../00_data/palmtrees.xlsx")
## Warning: Coercing text to numeric in V1449 / R1449C22: '0.56675675700000006'
palm
## # A tibble: 2,557 × 29
##    spec_name            acc_genus acc_species palm_tribe palm_subfamily climbing
##    <chr>                <chr>     <chr>       <chr>      <chr>          <chr>   
##  1 Acanthophoenix crin… Acanthop… crinita     Areceae    Arecoideae     climbing
##  2 Acanthophoenix rous… Acanthop… rousselii   Areceae    Arecoideae     climbing
##  3 Acanthophoenix rubra Acanthop… rubra       Areceae    Arecoideae     climbing
##  4 Acoelorrhaphe wrigh… Acoelorr… wrightii    Trachycar… Coryphoideae   climbing
##  5 Acrocomia aculeata   Acrocomia aculeata    Cocoseae   Arecoideae     climbing
##  6 Acrocomia crispa     Acrocomia crispa      Cocoseae   Arecoideae     climbing
##  7 Acrocomia emensis    Acrocomia emensis     Cocoseae   Arecoideae     climbing
##  8 Acrocomia glaucesce… Acrocomia glaucescens Cocoseae   Arecoideae     climbing
##  9 Acrocomia hassleri   Acrocomia hassleri    Cocoseae   Arecoideae     climbing
## 10 Acrocomia intumesce… Acrocomia intumescens Cocoseae   Arecoideae     climbing
## # ℹ 2,547 more rows
## # ℹ 23 more variables: acaulescent <chr>, erect <chr>, stem_solitary <chr>,
## #   stem_armed <chr>, leaves_armed <chr>, max_stem_height_m <dbl>,
## #   max_stem_dia_cm <dbl>, understorey_canopy <chr>, max_leaf_number <dbl>,
## #   max__blade__length_m <dbl>, max__rachis__length_m <dbl>,
## #   max__petiole_length_m <dbl>, average_fruit_length_cm <dbl>,
## #   min_fruit_length_cm <dbl>, max_fruit_length_cm <dbl>, …

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = palm) +
  geom_bar(mapping = aes(x = max_leaf_number))
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_count()`).

ggplot(data = palm) +
  geom_histogram(mapping = aes(x = max_fruit_length_cm), binwidth = 0.5)
## Warning: Removed 1641 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data = palm, mapping = aes(x = average_fruit_length_cm, colour = fruit_shape)) +
  geom_freqpoly(binwidth = 0.1)
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values

ggplot(data = palm, mapping = aes(x = max__rachis__length_m)) +
  geom_histogram(binwidth = 0.1)
## Warning: Removed 1026 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values

ggplot(palm) + 
  geom_histogram(mapping = aes(x = average_fruit_length_cm), binwidth = 0.5) +
  coord_cartesian(ylim = c(0, 200))
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values

palm2 <- palm %>% 
  filter(between(0 , 3, 20))

palm2 <- palm %>% 
  mutate(y = ifelse(0 < 3 | 0 > 20, NA, 0))

ggplot(data = palm2, mapping = aes(x = max_stem_height_m, y = max_stem_dia_cm)) + 
  geom_point(na.rm = TRUE)

Covariation

A categorical and continuous variable

ggplot(data = palm, mapping = aes(x = average_fruit_length_cm)) + 
  geom_freqpoly(mapping = aes(colour = fruit_shape), binwidth = 500)
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Why did this make a triangle?
ggplot(palm) + 
  geom_bar(mapping = aes(x = fruit_shape))

ggplot(data = palm) +
  geom_boxplot(mapping = aes(x = reorder(acaulescent, max_stem_height_m), y = max_stem_height_m)) +
  coord_flip()
## Warning: Removed 446 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables

ggplot(data = palm) +
  geom_count(mapping = aes(x = fruit_shape, y = fruit_size_categorical))

palm %>% 
  count(fruit_shape, fruit_size_categorical) %>%  
  ggplot(mapping = aes(x = fruit_shape, y = fruit_size_categorical)) +
    geom_tile(mapping = aes(fill = n))

Two continous variables

ggplot(data = palm) +
  geom_point(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m), alpha = 1/10)
## Warning: Removed 1066 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = palm) +
  geom_bin2d(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m))
## `stat_bin2d()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1066 rows containing non-finite outside the scale range
## (`stat_bin2d()`).

# install.packages("hexbin")
ggplot(data = palm) +
  geom_hex(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m))
## Warning: Removed 1066 rows containing non-finite outside the scale range
## (`stat_binhex()`).

ggplot(data = palm, mapping = aes(x = max__rachis__length_m, y = max_stem_height_m)) + 
  geom_boxplot(mapping = aes(group = cut_width(max__rachis__length_m , 2)))
## Warning: Removed 1026 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 40 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(data = palm) +
  geom_point(mapping = aes(x = max_stem_height_m, y = max_stem_dia_cm)) +
  coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))
## Warning: Removed 647 rows containing missing values or values outside the scale range
## (`geom_point()`).

Patterns and models

ggplot(data = palm) + 
  geom_point(mapping = aes(x = max__rachis__length_m, y = max__petiole_length_m))
## Warning: Removed 1594 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = palm) + 
  geom_boxplot(mapping = aes(x = max_leaf_number, y = fruit_size_categorical))
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_boxplot()`).