Import data
palm <- read_excel("../00_data/palmtrees.xlsx")
## Warning: Coercing text to numeric in V1449 / R1449C22: '0.56675675700000006'
palm
## # A tibble: 2,557 × 29
## spec_name acc_genus acc_species palm_tribe palm_subfamily climbing
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Acanthophoenix crin… Acanthop… crinita Areceae Arecoideae climbing
## 2 Acanthophoenix rous… Acanthop… rousselii Areceae Arecoideae climbing
## 3 Acanthophoenix rubra Acanthop… rubra Areceae Arecoideae climbing
## 4 Acoelorrhaphe wrigh… Acoelorr… wrightii Trachycar… Coryphoideae climbing
## 5 Acrocomia aculeata Acrocomia aculeata Cocoseae Arecoideae climbing
## 6 Acrocomia crispa Acrocomia crispa Cocoseae Arecoideae climbing
## 7 Acrocomia emensis Acrocomia emensis Cocoseae Arecoideae climbing
## 8 Acrocomia glaucesce… Acrocomia glaucescens Cocoseae Arecoideae climbing
## 9 Acrocomia hassleri Acrocomia hassleri Cocoseae Arecoideae climbing
## 10 Acrocomia intumesce… Acrocomia intumescens Cocoseae Arecoideae climbing
## # ℹ 2,547 more rows
## # ℹ 23 more variables: acaulescent <chr>, erect <chr>, stem_solitary <chr>,
## # stem_armed <chr>, leaves_armed <chr>, max_stem_height_m <dbl>,
## # max_stem_dia_cm <dbl>, understorey_canopy <chr>, max_leaf_number <dbl>,
## # max__blade__length_m <dbl>, max__rachis__length_m <dbl>,
## # max__petiole_length_m <dbl>, average_fruit_length_cm <dbl>,
## # min_fruit_length_cm <dbl>, max_fruit_length_cm <dbl>, …
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = palm) +
geom_bar(mapping = aes(x = max_leaf_number))
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_count()`).

ggplot(data = palm) +
geom_histogram(mapping = aes(x = max_fruit_length_cm), binwidth = 0.5)
## Warning: Removed 1641 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data = palm, mapping = aes(x = average_fruit_length_cm, colour = fruit_shape)) +
geom_freqpoly(binwidth = 0.1)
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

Typical values
ggplot(data = palm, mapping = aes(x = max__rachis__length_m)) +
geom_histogram(binwidth = 0.1)
## Warning: Removed 1026 rows containing non-finite outside the scale range
## (`stat_bin()`).

Unusual values
ggplot(palm) +
geom_histogram(mapping = aes(x = average_fruit_length_cm), binwidth = 0.5) +
coord_cartesian(ylim = c(0, 200))
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

Missing Values
palm2 <- palm %>%
filter(between(0 , 3, 20))
palm2 <- palm %>%
mutate(y = ifelse(0 < 3 | 0 > 20, NA, 0))
ggplot(data = palm2, mapping = aes(x = max_stem_height_m, y = max_stem_dia_cm)) +
geom_point(na.rm = TRUE)

Covariation
A categorical and continuous variable
ggplot(data = palm, mapping = aes(x = average_fruit_length_cm)) +
geom_freqpoly(mapping = aes(colour = fruit_shape), binwidth = 500)
## Warning: Removed 505 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Why did this make a triangle?
ggplot(palm) +
geom_bar(mapping = aes(x = fruit_shape))

ggplot(data = palm) +
geom_boxplot(mapping = aes(x = reorder(acaulescent, max_stem_height_m), y = max_stem_height_m)) +
coord_flip()
## Warning: Removed 446 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Two categorical variables
ggplot(data = palm) +
geom_count(mapping = aes(x = fruit_shape, y = fruit_size_categorical))

palm %>%
count(fruit_shape, fruit_size_categorical) %>%
ggplot(mapping = aes(x = fruit_shape, y = fruit_size_categorical)) +
geom_tile(mapping = aes(fill = n))

Two continous variables
ggplot(data = palm) +
geom_point(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m), alpha = 1/10)
## Warning: Removed 1066 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = palm) +
geom_bin2d(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m))
## `stat_bin2d()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1066 rows containing non-finite outside the scale range
## (`stat_bin2d()`).

# install.packages("hexbin")
ggplot(data = palm) +
geom_hex(mapping = aes(x = max_stem_height_m, y = max__rachis__length_m))
## Warning: Removed 1066 rows containing non-finite outside the scale range
## (`stat_binhex()`).

ggplot(data = palm, mapping = aes(x = max__rachis__length_m, y = max_stem_height_m)) +
geom_boxplot(mapping = aes(group = cut_width(max__rachis__length_m , 2)))
## Warning: Removed 1026 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 40 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(data = palm) +
geom_point(mapping = aes(x = max_stem_height_m, y = max_stem_dia_cm)) +
coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))
## Warning: Removed 647 rows containing missing values or values outside the scale range
## (`geom_point()`).

Patterns and models
ggplot(data = palm) +
geom_point(mapping = aes(x = max__rachis__length_m, y = max__petiole_length_m))
## Warning: Removed 1594 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = palm) +
geom_boxplot(mapping = aes(x = max_leaf_number, y = fruit_size_categorical))
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
