library(tidyverse)
library(knitr)
library(gmodels)
#library(ggplot2)
library(ggpubr)
library(openxlsx) ## for writing to xlsx files
library(xlsx)## For importing xlsx files
#library(corrplot) ## For creating correlation plot visualisations
#library(factoextra) ## For creating PCA visualisations
#library(GGally)
#library(lubridate) ## Allows selection of dates from tibbles (Included in tidyverse)
#library(ggrepel) ## for labeling graphs at end of lines instead of legend
library(rtry) ## for analysing try plant trait database files
library(data.table)
library(jsonlite)
library(curl)
Analysing_TRY_Data
Install and loadpackages:
Plant Trait data for 196 plants and 21 traits downloaded to file “41691.txt”
Import TRY data “41691.txt” and explore
<- rtry_import("41691.txt") TRYdataWhole
input: 41691.txt
dim: 1208347 29
col: LastName FirstName DatasetID Dataset SpeciesName AccSpeciesID AccSpeciesName ObservationID ObsDataID TraitID TraitName DataID DataName OriglName OrigValueStr OrigUnitStr ValueKindName OrigUncertaintyStr UncertaintyName Replicates StdValue UnitName RelUncertaintyPercent OrigObsDataID ErrorRisk Reference Comment StdValueStr V29
rtry_explore(TRYdataWhole, TraitID, TraitName) %>%
print(n = Inf, width = Inf)
dim: 19 3
# A tibble: 19 × 3
TraitID
<int>
1 4
2 14
3 18
4 26
5 926
6 3086
7 3106
8 3107
9 3108
10 3109
11 3110
12 3111
13 3112
14 3113
15 3114
16 3115
17 3116
18 3117
19 NA
TraitName
<chr>
1 "Stem specific density (SSD, stem dry mass per stem fresh volume) or wood de…
2 "Leaf nitrogen (N) content per leaf dry mass"
3 "Plant height"
4 "Seed dry mass"
5 "Leaf area index (LAI) of a single plant"
6 "Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) petiole, rha…
7 "Plant height vegetative"
8 "Plant height generative"
9 "Leaf area (in case of compound leaves: leaf, petiole excluded)"
10 "Leaf area (in case of compound leaves: leaflet, petiole excluded)"
11 "Leaf area (in case of compound leaves: leaf, petiole included)"
12 "Leaf area (in case of compound leaves: leaflet, petiole included)"
13 "Leaf area (in case of compound leaves: leaf, undefined if petiole in- or ex…
14 "Leaf area (in case of compound leaves: leaflet, undefined if petiole is in-…
15 "Leaf area (in case of compound leaves undefined if leaf or leaflet, undefin…
16 "Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole exc…
17 "Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole inc…
18 "Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined i…
19 ""
Count
<int>
1 2746
2 9344
3 185
4 8509
5 250
6 303
7 27438
8 3371
9 1024
10 8671
11 3610
12 542
13 665
14 440
15 3949
16 5560
17 5403
18 13970
19 1112367
Cross referencing with the list of requested traits shows that Leaf Area, Leaf Area per Dry Mass and Leaf lamina area per leaf lamina dry mass are all missing.
This isn’t a issue since there are other measures that will work.
Plant Height also has few records.
head(TRYdataWhole)
LastName FirstName DatasetID Dataset SpeciesName
<char> <char> <int> <char> <char>
1: Cornelissen Johannes 37 Sheffield Database Acer pseudoplatanus
2: Cornelissen Johannes 37 Sheffield Database Acer pseudoplatanus
3: Cornelissen Johannes 37 Sheffield Database Aesculus hippocastanum
4: Cornelissen Johannes 37 Sheffield Database Aesculus hippocastanum
5: Cornelissen Johannes 37 Sheffield Database Alnus glutinosa
6: Cornelissen Johannes 37 Sheffield Database Alnus glutinosa
AccSpeciesID AccSpeciesName ObservationID ObsDataID TraitID
<int> <char> <int> <int> <int>
1: 855 Acer pseudoplatanus 15474 440586 NA
2: 855 Acer pseudoplatanus 15474 440587 26
3: 1571 Aesculus hippocastanum 15475 440588 NA
4: 1571 Aesculus hippocastanum 15475 440589 26
5: 2710 Alnus glutinosa 15476 440590 NA
6: 2710 Alnus glutinosa 15476 440591 26
TraitName DataID DataName OriglName OrigValueStr
<char> <int> <char> <char> <char>
1: 213 Number of replicates n 25
2: Seed dry mass 30 Seed dry mass mean dry wght (mg) 44.81
3: 213 Number of replicates n 30
4: Seed dry mass 30 Seed dry mass mean dry wght (mg) 6729
5: 213 Number of replicates n 50
6: Seed dry mass 30 Seed dry mass mean dry wght (mg) 0.847
OrigUnitStr ValueKindName OrigUncertaintyStr UncertaintyName Replicates
<char> <char> <char> <char> <char>
1:
2: mg Single
3:
4: mg Single
5:
6: mg Single
StdValue UnitName RelUncertaintyPercent OrigObsDataID ErrorRisk
<num> <char> <int> <int> <num>
1: NA NA NA NA
2: 44.810 mg NA NA 1.41037
3: NA NA NA NA
4: 6729.000 mg NA NA 3.80818
5: NA NA NA NA
6: 0.847 mg NA NA 3.30301
Reference
<char>
1: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
2: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
3: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
4: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
5: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
6: Cornelissen, J. H. C., P. C. Diez, and R. Hunt. 1996. Seedling growth, allocation and leaf attributes in a wide range of woody plant species and types. Journal of Ecology 84:755-765.
Comment StdValueStr V29
<char> <char> <lgcl>
1: number of measured seeds NA
2: seedlings in strict sense NA
3: number of measured seeds NA
4: seedlings in strict sense NA
5: number of measured seeds NA
6: seedlings in strict sense NA
rtry_explore(TRYdataWhole, AccSpeciesID, AccSpeciesName) %>%
print(n = Inf, width = Inf)
dim: 194 3
# A tibble: 194 × 3
AccSpeciesID AccSpeciesName Count
<int> <chr> <int>
1 855 Acer pseudoplatanus 12043
2 1571 Aesculus hippocastanum 3964
3 2065 Agrostis stolonifera 2719
4 2172 Ajuga reptans 8078
5 2496 Alliaria petiolata 1203
6 2710 Alnus glutinosa 9362
7 3691 Anemone nemorosa 1686
8 3738 Angelica sylvestris 1565
9 4036 Anthoxanthum odoratum 20556
10 4045 Anthriscus sylvestris 5163
11 4491 Arctium minus 1018
12 5393 Arum maculatum 388
13 5693 Asplenium scolopendrium 365
14 6006 Athyrium filix-femina 813
15 6903 Barbarea vulgaris 655
16 7297 Berula erecta 563
17 7356 Betula pendula 118776
18 7359 Betula pubescens 11660
19 7387 Bidens cernua 1838
20 8156 Brachypodium sylvaticum 1643
21 9139 Calamagrostis canescens 669
22 9601 Callitriche stagnalis 308
23 9768 Caltha palustris 2111
24 9842 Calystegia sepium 1165
25 10281 Cardamine amara 434
26 10293 Cardamine flexuosa 408
27 10304 Cardamine pratensis 1551
28 10355 Carex acutiformis 436
29 10363 Carex appropinquata 461
30 10443 Carex elata 1089
31 10604 Carex paniculata 467
32 10620 Carex pilulifera 1555
33 10632 Carex pseudocyperus 472
34 10641 Carex remota 307
35 10645 Carex riparia 514
36 10773 Carpinus betulus 3645
37 11068 Castanea sativa 8123
38 11598 Centaurea nigra 2271
39 11781 Cerastium fontanum 3035
40 12748 Chrysosplenium oppositifolium 301
41 13007 Circaea lutetiana 925
42 13016 Cirsium arvense 9885
43 13051 Cirsium palustre 1111
44 13072 Cirsium vulgare 1392
45 13230 Cladium mariscus 465
46 14240 Convallaria majalis 1248
47 14688 Corylus avellana 8414
48 15090 Crataegus monogyna 12600
49 16700 Dactylis glomerata 25595
50 16724 Dactylorhiza fuchsii 859
51 16731 Dactylorhiza praetermissa 142
52 17365 Deschampsia cespitosa 8135
53 17868 Digitalis purpurea 1618
54 19400 Dryopteris affinis 1180
55 19406 Dryopteris carthusiana 1522
56 19410 Dryopteris dilatata 486
57 19413 Dryopteris filix-mas 853
58 20105 Eleocharis palustris 1306
59 20359 Elymus repens 6688
60 20801 Epilobium angustifolium 9768
61 20816 Epilobium ciliatum 1008
62 20845 Epilobium hirsutum 2138
63 20877 Epilobium parviflorum 765
64 20927 Equisetum arvense 4811
65 20929 Equisetum fluviatile 1246
66 20934 Equisetum palustre 553
67 23260 Euonymus europaeus 4610
68 23289 Eupatorium cannabinum 1396
69 23331 Euphorbia amygdaloides 2358
70 23906 Fagus sylvatica 110151
71 24150 Festuca arundinacea 1568
72 24995 Filipendula ulmaria 3207
73 25213 Fragaria vesca 11769
74 25247 Fraxinus excelsior 15440
75 25473 Galeopsis tetrahit 1735
76 25487 Galium aparine 5781
77 25535 Galium palustre 924
78 25569 Galium uliginosum 1058
79 26089 Geranium robertianum 2140
80 26152 Geum urbanum 1682
81 26336 Glechoma hederacea 4727
82 26483 Glyceria maxima 401
83 27988 Hedera helix 24043
84 28473 Heracleum sphondylium 5339
85 29305 Holcus lanatus 14937
86 29306 Holcus mollis 837
87 29681 Humulus lupulus 667
88 29696 HYACINTHOIDES NON-SCRIPTA 444
89 30021 Hypericum tetrapterum 355
90 30228 Ilex aquifolium 14454
91 30380 Impatiens capensis 53037
92 30870 Iris pseudacorus 479
93 31465 Juncus bulbosus 1019
94 31479 Juncus effusus 3690
95 31490 Juncus inflexus 920
96 31526 Juncus subnodulosus 728
97 31529 Juncus tenuis 850
98 32120 Lactuca muralis 4608
99 32357 Lapsana communis 1088
100 32364 Larix decidua 8829
101 32496 Lathyrus palustris 1079
102 32703 Lemna minor 265
103 33431 Ligustrum vulgare 4897
104 34153 Lonicera periclymenum 3435
105 34281 Lotus pedunculatus 918
106 34605 Lycopus europaeus 1677
107 34660 Lysimachia nummularia 492
108 34666 Lysimachia vulgaris 1173
109 34676 Lythrum salicaria 2166
110 35230 Malus domestica 593
111 36265 Mentha aquatica 1629
112 37011 Moehringia trinervia 499
113 37506 Myosotis scorpioides 718
114 37586 Myrica gale 1010
115 38665 Oenanthe crocata 109
116 38666 Oenanthe fistulosa 326
117 38670 Oenanthe lachenalii 373
118 39048 Ophioglossum vulgatum 282
119 41190 Persicaria hydropiper 806
120 41193 Persicaria maculosa 12307
121 41345 Peucedanum palustre 4065
122 41425 Phalaris arundinacea 5068
123 41658 Phragmites australis 5886
124 42065 Pimpinella major 3634
125 42185 Pinus sylvestris 105199
126 42544 Plantago major 3636
127 42805 Poa annua 3062
128 42893 Poa trivialis 7118
129 43323 Polypodium vulgare 4008
130 43532 Populus tremula 6023
131 44289 Prunella vulgaris 6429
132 44364 Prunus laurocerasus 7019
133 44805 Pteridium aquilinum 8240
134 45470 Quercus robur 123265
135 45587 Ranunculus acris 11348
136 45634 Ranunculus flammula 574
137 45666 Ranunculus lingua 469
138 45737 Ranunculus repens 6009
139 45748 Ranunculus sceleratus 872
140 45983 Rhamnus cathartica 5875
141 46226 Rhododendron ponticum 6346
142 46517 Ribes nigrum 2705
143 46527 Ribes rubrum 1076
144 46535 RIBES UVA-CRISPA 4512
145 46710 Rorippa palustris 925
146 46721 Rosa arvensis 8887
147 46728 Rosa canina 3438
148 46801 Rosa tomentosa 139
149 47059 Rubus fruticosus 6033
150 47466 Rumex conglomeratus 405
151 47481 Rumex hydrolapathum 407
152 47498 Rumex obtusifolius 1955
153 47512 Rumex sanguineus 287
154 47706 Sagina procumbens 565
155 47776 Salix alba 3343
156 47815 Salix cinerea 1994
157 48206 Sambucus nigra 8507
158 48244 Sanicula europaea 13389
159 49223 Scrophularia auriculata 378
160 49248 Scrophularia umbrosa 553
161 49261 Scutellaria galericulata 394
162 49614 Senecio sarracenicus 80
163 49638 Senecio vulgaris 2657
164 50679 Solanum dulcamara 37565
165 50913 Sonchus arvensis 842
166 50914 Sonchus asper 28989
167 50930 Sonchus palustris 576
168 50997 Sorbus aucuparia 12332
169 51204 Sparganium erectum 567
170 51554 Stachys palustris 804
171 51561 Stachys sylvatica 820
172 51617 Stellaria alsine 427
173 51634 Stellaria media 5309
174 53415 Taraxacum sp 2267
175 53581 Telekia speciosa 91
176 53988 Thalictrum flavum 439
177 54079 Thelypteris palustris 451
178 55394 Typha angustifolia 1096
179 55399 Typha latifolia 2044
180 55598 Urtica dioica 9571
181 55768 Valeriana dioica 521
182 55773 Valeriana officinalis 2023
183 56241 Veronica catenata 537
184 56242 Veronica chamaedrys 8997
185 56260 Veronica montana 2374
186 56277 Veronica serpyllifolia 760
187 56323 Viburnum opulus 7844
188 56356 Vicia cracca 10201
189 56478 Vinca minor 469
190 56568 Viola riviniana 1876
191 81266 Dioscorea communis 393
192 83492 Ficaria verna 1060
193 436062 Lolium giganteum 366
194 448214 Populus canadensis_x 1844
Not all species are present, so let’s see which 2 are missing
<- read_csv("AccSpeciesIDList.csv", col_names = TRUE) AccSpeciesIDList
Rows: 196 Columns: 1
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (1): AccSpeciesID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- rtry_explore(TRYdataWhole, AccSpeciesID) TRYAccSpeciesIDList
dim: 194 2
%>%
AccSpeciesIDList filter(!(AccSpeciesID %in% TRYAccSpeciesIDList$AccSpeciesID)) %>%
print(n = Inf, width = Inf)
# A tibble: 2 × 1
AccSpeciesID
<dbl>
1 440114
2 457379
So 440114 and 457379 are missing
- These are Narcissus sp (Daffodil) and Symphytum peregrinum (Russian Comfrey)
Could this be because of incorrect names/IDs?
Let’s move on and come back to this later.
Limit columns to only those relevant to data analysis while keeping ObservationID, OrigObsDataID to remove duplicates
<- rtry_select_col(TRYdataWhole, AccSpeciesID, AccSpeciesName, DataID, DataName, TraitID, TraitName, OrigValueStr, OrigUnitStr, ValueKindName, OrigUncertaintyStr, UncertaintyName, Replicates, StdValue, UnitName, ObservationID, OrigObsDataID) TRYdata
dim: 1208347 16
col: AccSpeciesID AccSpeciesName DataID DataName TraitID TraitName OrigValueStr OrigUnitStr ValueKindName OrigUncertaintyStr UncertaintyName Replicates StdValue UnitName ObservationID OrigObsDataID
I need a matrix with: Species as rows, Traits as columns and trait values as data points.
First I need to tidy up the data, so lets have a look at what it looks like:
%>%
TRYdata print() %>%
str() %>%
summary()
AccSpeciesID AccSpeciesName DataID DataName
<int> <char> <int> <char>
1: 855 Acer pseudoplatanus 213 Number of replicates
2: 855 Acer pseudoplatanus 30 Seed dry mass
3: 1571 Aesculus hippocastanum 213 Number of replicates
4: 1571 Aesculus hippocastanum 30 Seed dry mass
5: 2710 Alnus glutinosa 213 Number of replicates
---
1208343: 50914 Sonchus asper 61 Altitude
1208344: 50914 Sonchus asper 8080 Treatment: competition
1208345: 50914 Sonchus asper 114 Location / Site Name
1208346: 50914 Sonchus asper 449 Location Region
1208347: 50914 Sonchus asper 1412 Location Country
TraitID TraitName OrigValueStr OrigUnitStr
<int> <char> <char> <char>
1: NA 25
2: 26 Seed dry mass 44.81 mg
3: NA 30
4: 26 Seed dry mass 6729 mg
5: NA 50
---
1208343: NA 400
1208344: NA grassland community
1208345: NA Heidfeldhof, University Hohenheim
1208346: NA Stuttgart
1208347: NA Germany
ValueKindName OrigUncertaintyStr UncertaintyName Replicates StdValue
<char> <char> <char> <char> <num>
1: NA
2: Single 44.81
3: NA
4: Single 6729.00
5: NA
---
1208343: NA
1208344: NA
1208345: NA
1208346: NA
1208347: NA
UnitName ObservationID OrigObsDataID
<char> <int> <int>
1: 15474 NA
2: mg 15474 NA
3: 15475 NA
4: mg 15475 NA
5: 15476 NA
---
1208343: 6510620 NA
1208344: 6510620 NA
1208345: 6510620 NA
1208346: 6510620 NA
1208347: 6510620 NA
Classes 'data.table' and 'data.frame': 1208347 obs. of 16 variables:
$ AccSpeciesID : int 855 855 1571 1571 2710 2710 7356 7356 10773 10773 ...
$ AccSpeciesName : chr "Acer pseudoplatanus" "Acer pseudoplatanus" "Aesculus hippocastanum" "Aesculus hippocastanum" ...
$ DataID : int 213 30 213 30 213 30 213 30 213 30 ...
$ DataName : chr "Number of replicates" "Seed dry mass" "Number of replicates" "Seed dry mass" ...
$ TraitID : int NA 26 NA 26 NA 26 NA 26 NA 26 ...
$ TraitName : chr "" "Seed dry mass" "" "Seed dry mass" ...
$ OrigValueStr : chr "25" "44.81" "30" "6729" ...
$ OrigUnitStr : chr "" "mg" "" "mg" ...
$ ValueKindName : chr "" "Single" "" "Single" ...
$ OrigUncertaintyStr: chr "" "" "" "" ...
$ UncertaintyName : chr "" "" "" "" ...
$ Replicates : chr "" "" "" "" ...
$ StdValue : num NA 44.8 NA 6729 NA ...
$ UnitName : chr "" "mg" "" "mg" ...
$ ObservationID : int 15474 15474 15475 15475 15476 15476 15483 15483 15487 15487 ...
$ OrigObsDataID : int NA NA NA NA NA NA NA NA NA NA ...
- attr(*, ".internal.selfref")=<externalptr>
Length Class Mode
0 NULL NULL
Here we can see that the data rows without any trait data have NA under TraitID. Therefore use filter() to remove these rows. I’ll also filter out any traits that are not needed.
<- TRYdata %>%
TRYdatafiltered filter(!(is.na(TraitID))) %>%
filter(TraitName == c("Leaf area (in case of compound leaves: leaf, petiole excluded)",
"Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded",
"Leaf nitrogen (N) content per leaf dry mass",
"Plant height generative",
"Seed dry mass",
"Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density"))
Warning: There was 1 warning in `filter()`.
ℹ In argument: `==...`.
Caused by warning in `TraitName == c("Leaf area (in case of compound leaves: leaf, petiole excluded)",
"Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded",
"Leaf nitrogen (N) content per leaf dry mass", "Plant height generative",
"Seed dry mass",
"Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density")`:
! longer object length is not a multiple of shorter object length
Good, now we need to average the scores for each trait for each species
<- TRYdatafiltered %>%
TRYdataavg group_by(AccSpeciesID, TraitID) %>%
summarise(AvgTraitValue = mean(StdValue, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = TraitID, values_from = AvgTraitValue) %>%
print()
# A tibble: 192 × 7
AccSpeciesID `4` `14` `26` `3107` `3108` `3115`
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 855 0.536 22.1 84.4 25 8365. 12.9
2 1571 0.76 23.5 10302. NA NA 15.1
3 2065 NA 29.7 0.0639 0.4 NA 27.5
4 2172 0.0862 19.2 1.42 0.154 NA NA
5 2496 0.376 37.2 2.51 0.7 3475 43.5
6 2710 0.512 34.5 3.17 15 NA 14.1
7 3691 NA 26.3 3.20 0.204 3285. 24.9
8 3738 NA 28.9 2.92 0.838 NA 29.2
9 4036 0.0975 21.3 0.623 0.473 416. 29.6
10 4045 NA 25.1 4.08 0.941 NA 35.7
# ℹ 182 more rows
And do the same but with actual names instead of IDs
<- TRYdatafiltered %>%
TRYdataavgNames group_by(AccSpeciesName, TraitName) %>%
summarise(AvgTraitValue = mean(StdValue), .groups = "drop") %>%
pivot_wider(names_from = TraitName, values_from = AvgTraitValue) %>%
print() %>%
write_csv("K:/NTU/Research Project/Functional Trait info/NameMatrix")
# A tibble: 192 × 7
AccSpeciesName Leaf area (in case of compoun…¹ Leaf area per leaf d…²
<chr> <dbl> <dbl>
1 Acer pseudoplatanus 8365. 12.9
2 Aesculus hippocastanum NA 15.1
3 Agrostis stolonifera NA 27.5
4 Ajuga reptans NA NA
5 Alliaria petiolata 3475 43.5
6 Alnus glutinosa NA 14.1
7 Anemone nemorosa 3285. 24.9
8 Angelica sylvestris NA 29.2
9 Anthoxanthum odoratum 416. 29.6
10 Anthriscus sylvestris NA 35.7
# ℹ 182 more rows
# ℹ abbreviated names:
# ¹`Leaf area (in case of compound leaves: leaf, petiole excluded)`,
# ²`Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded`
# ℹ 4 more variables: `Leaf nitrogen (N) content per leaf dry mass` <dbl>,
# `Plant height generative` <dbl>, `Seed dry mass` <dbl>,
# `Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density` <dbl>