data <- read_excel("./data/palmtrees.xlsx")
data
## # A tibble: 2,557 × 29
## spec_name acc_genus acc_species palm_tribe palm_subfamily climbing
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Acanthophoenix crin… Acanthop… crinita Areceae Arecoideae climbing
## 2 Acanthophoenix rous… Acanthop… rousselii Areceae Arecoideae climbing
## 3 Acanthophoenix rubra Acanthop… rubra Areceae Arecoideae climbing
## 4 Acoelorrhaphe wrigh… Acoelorr… wrightii Trachycar… Coryphoideae climbing
## 5 Acrocomia aculeata Acrocomia aculeata Cocoseae Arecoideae climbing
## 6 Acrocomia crispa Acrocomia crispa Cocoseae Arecoideae climbing
## 7 Acrocomia emensis Acrocomia emensis Cocoseae Arecoideae climbing
## 8 Acrocomia glaucesce… Acrocomia glaucescens Cocoseae Arecoideae climbing
## 9 Acrocomia hassleri Acrocomia hassleri Cocoseae Arecoideae climbing
## 10 Acrocomia intumesce… Acrocomia intumescens Cocoseae Arecoideae climbing
## # ℹ 2,547 more rows
## # ℹ 23 more variables: acaulescent <chr>, erect <chr>, stem_solitary <chr>,
## # stem_armed <chr>, leaves_armed <chr>, max_stem_height_m <chr>,
## # max_stem_dia_cm <chr>, understorey_canopy <chr>, max_leaf_number <chr>,
## # max__blade__length_m <chr>, max__rachis__length_m <chr>,
## # max__petiole_length_m <chr>, average_fruit_length_cm <chr>,
## # min_fruit_length_cm <chr>, max_fruit_length_cm <chr>, …
# Clean the data
data$max_leaf_number <- as.numeric(data$max_leaf_number)
## Warning: NAs introduced by coercion
data$average_fruit_length_cm <- as.numeric(data$average_fruit_length_cm)
## Warning: NAs introduced by coercion
data$max_stem_height_m <- as.numeric(data$max_stem_height_m)
## Warning: NAs introduced by coercion
data$max_stem_dia_cm <- as.numeric(data$max_stem_dia_cm)
## Warning: NAs introduced by coercion
What is the distribution of fruit size? What about that of max leaf number? Leaf number distribution by fruit size?
ggplot(data) +
geom_bar(mapping = aes(fruit_size_categorical))
ggplot(data) +
geom_histogram(mapping = aes(max_leaf_number), binwidth = 1)
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_bin()`).
filter(data, max_leaf_number<40) %>%
ggplot(mapping = aes(max_leaf_number)) +
geom_histogram(binwidth = 0.75)
ggplot(data) +
geom_histogram(mapping = aes(max_leaf_number), binwidth = 1.25) +
coord_cartesian(ylim = c(0, 20))
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_bin()`).
data2 <- data %>%
mutate(max_leaf_number = ifelse(max_leaf_number <= 0, NA, max_leaf_number))
ggplot(data2, mapping = aes(max_leaf_number, average_fruit_length_cm)) +
geom_point()
## Warning: Removed 1330 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data, mapping = aes(x = max_leaf_number, colour = fruit_size_categorical)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(data, mapping = aes(max_leaf_number, y = after_stat(density))) +
geom_freqpoly(mapping = aes(colour = fruit_size_categorical), binwidth = 6)
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(data, mapping = aes(fruit_size_categorical, max_leaf_number)) +
geom_boxplot()
## Warning: Removed 1251 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(data) +
geom_count(mapping = aes(fruit_size_categorical, fruit_shape))
data %>%
count(fruit_shape, fruit_size_categorical) %>%
ggplot(mapping = aes(fruit_shape, fruit_size_categorical)) +
geom_tile(mapping = aes(fill = n))
ggplot(data) +
geom_point(mapping = aes(max_leaf_number, average_fruit_length_cm), alpha = 1/10)
## Warning: Removed 1330 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(hexbin)
ggplot(data) +
geom_bin2d(mapping = aes(max_leaf_number, average_fruit_length_cm))
## `stat_bin2d()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 1330 rows containing non-finite outside the scale range
## (`stat_bin2d()`).
filter(data, average_fruit_length_cm < 20) %>%
ggplot(mapping = aes(max_leaf_number, average_fruit_length_cm)) +
geom_boxplot(mapping = aes(group = cut_width(max_leaf_number, 0.1)))
## Warning: Removed 825 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
filter(data, average_fruit_length_cm < 20) %>%
ggplot(mapping = aes(max_leaf_number, average_fruit_length_cm)) +
geom_boxplot(mapping = aes(group = cut_number(max_leaf_number, 12)))
## Warning: Orientation is not uniquely specified when both the x and y aesthetics are
## continuous. Picking default orientation 'x'.
## Warning: Removed 825 rows containing missing values or values outside the scale range
## (`stat_boxplot()`).
summary(data)
## spec_name acc_genus acc_species palm_tribe
## Length:2557 Length:2557 Length:2557 Length:2557
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## palm_subfamily climbing acaulescent erect
## Length:2557 Length:2557 Length:2557 Length:2557
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## stem_solitary stem_armed leaves_armed max_stem_height_m
## Length:2557 Length:2557 Length:2557 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 2.50
## Mode :character Mode :character Mode :character Median : 6.00
## Mean : 10.86
## 3rd Qu.: 15.00
## Max. :170.00
## NA's :446
## max_stem_dia_cm understorey_canopy max_leaf_number max__blade__length_m
## Min. : 0.00 Length:2557 Min. : 4.00 Length:2557
## 1st Qu.: 2.00 Class :character 1st Qu.: 8.00 Class :character
## Median : 5.00 Mode :character Median :11.00 Mode :character
## Mean : 12.38 Mean :14.37
## 3rd Qu.: 17.00 3rd Qu.:18.00
## Max. :175.00 Max. :75.00
## NA's :602 NA's :1251
## max__rachis__length_m max__petiole_length_m average_fruit_length_cm
## Length:2557 Length:2557 Min. : 0.300
## Class :character Class :character 1st Qu.: 1.050
## Mode :character Mode :character Median : 1.500
## Mean : 2.196
## 3rd Qu.: 2.500
## Max. :45.000
## NA's :505
## min_fruit_length_cm max_fruit_length_cm average_fruit_width_cm
## Length:2557 Length:2557 Length:2557
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## min_fruit_width_cm max_fruit_width_cm fruit_size_categorical
## Length:2557 Length:2557 Length:2557
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## fruit_shape fruit_color_description main_fruit_colors
## Length:2557 Length:2557 Length:2557
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## conspicuousness
## Length:2557
## Class :character
## Mode :character
##
##
##
##
ggplot(data, aes(x = log(max_stem_height_m), y = log(max_stem_dia_cm))) +
geom_point()
## Warning: Removed 647 rows containing missing values or values outside the scale range
## (`geom_point()`).
There seems to be a subtle logarithmic relationship here
data_clean <- data %>%
filter(max_stem_dia_cm > 0, max_stem_height_m > 0)
library(modelr)
mod <- lm(log(max_stem_dia_cm) ~ log(max_stem_height_m), data = data_clean)
data2 <- data_clean %>%
add_residuals(mod) %>%
mutate(resid = exp(resid))
ggplot(data2) +
geom_point(mapping = aes(x = average_fruit_length_cm, y = resid))
## Warning: Removed 149 rows containing missing values or values outside the scale range
## (`geom_point()`).
I don’t see anything here really.
ggplot(data2, mapping = aes(resid, average_fruit_length_cm)) +
geom_line(mapping = aes(colour = palm_tribe, group = palm_tribe))
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
Wow, that graph is not helpful.
filter(data2, resid < 15 & average_fruit_length_cm < 20 & (palm_tribe %in%
c("Areceae", "Sclerospermeae", "Trachycarpeae", "Phytelepheae", "Caryoteae"))) %>%
ggplot(mapping = aes(resid, average_fruit_length_cm)) +
stat_summary_bin(fun = mean, bins = 5, geom = "line",
aes(group = palm_tribe, color = palm_tribe))
Based on the above chart, that plots the residual of stem diameter uncluding stem width, we see that the stem diameter has a positive affect on fruit length within each respective tribe.