library(ggplot2)
library(ggrepel)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ purrr 1.0.1
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dbplyr::ident() masks dplyr::ident()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dbplyr::sql() masks dplyr::sql()
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(skimr)
Load the data
url = "C:/Users/Ivant/Desktop/2015_Street_Tree_Census_-_Tree_Data.csv"
df <- read.csv(url, header=TRUE,stringsAsFactors=FALSE)
head(df)
## tree_id block_id created_at tree_dbh stump_diam curb_loc status health
## 1 180683 348711 08/27/2015 3 0 OnCurb Alive Fair
## 2 200540 315986 09/03/2015 21 0 OnCurb Alive Fair
## 3 204026 218365 09/05/2015 3 0 OnCurb Alive Good
## 4 204337 217969 09/05/2015 10 0 OnCurb Alive Good
## 5 189565 223043 08/30/2015 21 0 OnCurb Alive Good
## 6 190422 106099 08/30/2015 11 0 OnCurb Alive Good
## spc_latin spc_common steward guards sidewalk
## 1 Acer rubrum red maple None None NoDamage
## 2 Quercus palustris pin oak None None Damage
## 3 Gleditsia triacanthos var. inermis honeylocust 1or2 None Damage
## 4 Gleditsia triacanthos var. inermis honeylocust None None Damage
## 5 Tilia americana American linden None None Damage
## 6 Gleditsia triacanthos var. inermis honeylocust 1or2 Helpful NoDamage
## user_type problems root_stone root_grate root_other trunk_wire
## 1 TreesCount Staff None No No No No
## 2 TreesCount Staff Stones Yes No No No
## 3 Volunteer None No No No No
## 4 Volunteer Stones Yes No No No
## 5 Volunteer Stones Yes No No No
## 6 Volunteer None No No No No
## trnk_light trnk_other brch_light brch_shoe brch_other address
## 1 No No No No No 108-005 70 AVENUE
## 2 No No No No No 147-074 7 AVENUE
## 3 No No No No No 390 MORGAN AVENUE
## 4 No No No No No 1027 GRAND STREET
## 5 No No No No No 603 6 STREET
## 6 No No No No No 8 COLUMBUS AVENUE
## postcode zip_city community.board borocode borough cncldist st_assem
## 1 11375 Forest Hills 406 4 Queens 29 28
## 2 11357 Whitestone 407 4 Queens 19 27
## 3 11211 Brooklyn 301 3 Brooklyn 34 50
## 4 11211 Brooklyn 301 3 Brooklyn 34 53
## 5 11215 Brooklyn 306 3 Brooklyn 39 44
## 6 10023 New York 107 1 Manhattan 3 67
## st_senate nta nta_name boro_ct state latitude longitude
## 1 16 QN17 Forest Hills 4073900 New York 40.72309 -73.84422
## 2 11 QN49 Whitestone 4097300 New York 40.79411 -73.81868
## 3 18 BK90 East Williamsburg 3044900 New York 40.71758 -73.93661
## 4 18 BK90 East Williamsburg 3044900 New York 40.71354 -73.93446
## 5 21 BK37 Park Slope-Gowanus 3016500 New York 40.66678 -73.97598
## 6 27 MN14 Lincoln Square 1014500 New York 40.77005 -73.98495
## x_sp y_sp council.district census.tract bin bbl
## 1 1027431.1 202756.8 29 739 4052307 4022210001
## 2 1034455.7 228644.8 19 973 4101931 4044750045
## 3 1001822.8 200716.9 34 449 3338310 3028870001
## 4 1002420.4 199244.3 34 449 3338342 3029250001
## 5 990913.8 182202.4 39 165 3025654 3010850052
## 6 988418.7 219825.5 3 145 1076229 1011310031
skim(df)
| Name | df |
| Number of rows | 683788 |
| Number of columns | 45 |
| _______________________ | |
| Column type frequency: | |
| character | 26 |
| numeric | 19 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| created_at | 0 | 1 | 10 | 10 | 0 | 483 | 0 |
| curb_loc | 0 | 1 | 6 | 14 | 0 | 2 | 0 |
| status | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| health | 0 | 1 | 0 | 4 | 31616 | 4 | 0 |
| spc_latin | 0 | 1 | 0 | 34 | 31619 | 133 | 0 |
| spc_common | 0 | 1 | 0 | 22 | 31619 | 133 | 0 |
| steward | 0 | 1 | 0 | 7 | 31615 | 5 | 0 |
| guards | 0 | 1 | 0 | 7 | 31616 | 5 | 0 |
| sidewalk | 0 | 1 | 0 | 8 | 31616 | 3 | 0 |
| user_type | 0 | 1 | 9 | 16 | 0 | 3 | 0 |
| problems | 0 | 1 | 0 | 95 | 31664 | 233 | 0 |
| root_stone | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| root_grate | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| root_other | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| trunk_wire | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| trnk_light | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| trnk_other | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| brch_light | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| brch_shoe | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| brch_other | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| address | 0 | 1 | 1 | 40 | 0 | 408701 | 0 |
| zip_city | 0 | 1 | 5 | 19 | 0 | 48 | 0 |
| borough | 0 | 1 | 5 | 13 | 0 | 5 | 0 |
| nta | 0 | 1 | 4 | 4 | 0 | 188 | 0 |
| nta_name | 0 | 1 | 6 | 56 | 0 | 188 | 0 |
| state | 0 | 1 | 8 | 8 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| tree_id | 0 | 1.00 | 365205.01 | 2.081221e+05 | 3.00 | 186582.75 | 366214.50 | 546170.25 | 722694.00 | ▇▇▇▇▇ |
| block_id | 0 | 1.00 | 313793.10 | 1.148390e+05 | 100002.00 | 221556.00 | 319967.00 | 404624.00 | 999999.00 | ▆▇▂▁▁ |
| tree_dbh | 0 | 1.00 | 11.28 | 8.720000e+00 | 0.00 | 4.00 | 9.00 | 16.00 | 450.00 | ▇▁▁▁▁ |
| stump_diam | 0 | 1.00 | 0.43 | 3.290000e+00 | 0.00 | 0.00 | 0.00 | 0.00 | 140.00 | ▇▁▁▁▁ |
| postcode | 0 | 1.00 | 10916.25 | 6.515500e+02 | 83.00 | 10451.00 | 11214.00 | 11365.00 | 11697.00 | ▁▁▁▁▇ |
| community.board | 0 | 1.00 | 343.51 | 1.157400e+02 | 101.00 | 302.00 | 402.00 | 412.00 | 503.00 | ▂▃▆▇▃ |
| borocode | 0 | 1.00 | 3.36 | 1.170000e+00 | 1.00 | 3.00 | 4.00 | 4.00 | 5.00 | ▂▃▆▇▃ |
| cncldist | 0 | 1.00 | 29.94 | 1.433000e+01 | 1.00 | 19.00 | 30.00 | 43.00 | 51.00 | ▃▅▇▅▇ |
| st_assem | 0 | 1.00 | 50.79 | 1.897000e+01 | 23.00 | 33.00 | 52.00 | 64.00 | 87.00 | ▇▅▅▆▃ |
| st_senate | 0 | 1.00 | 20.62 | 7.390000e+00 | 10.00 | 14.00 | 21.00 | 25.00 | 36.00 | ▇▃▆▂▃ |
| boro_ct | 0 | 1.00 | 3404914.12 | 1.175863e+06 | 1000201.00 | 3011700.00 | 4008100.00 | 4103202.00 | 5032300.00 | ▂▃▆▇▃ |
| latitude | 0 | 1.00 | 40.70 | 9.000000e-02 | 40.50 | 40.63 | 40.70 | 40.76 | 40.91 | ▂▆▇▅▂ |
| longitude | 0 | 1.00 | -73.92 | 1.200000e-01 | -74.25 | -73.98 | -73.91 | -73.83 | -73.70 | ▂▂▇▇▅ |
| x_sp | 0 | 1.00 | 1005279.86 | 3.428505e+04 | 913349.27 | 989657.84 | 1008386.23 | 1029991.28 | 1067247.62 | ▂▂▇▇▅ |
| y_sp | 0 | 1.00 | 194798.42 | 3.290206e+04 | 120973.79 | 169515.15 | 194560.25 | 217019.57 | 271894.09 | ▂▆▇▅▂ |
| council.district | 6519 | 0.99 | 30.03 | 1.430000e+01 | 1.00 | 19.00 | 30.00 | 43.00 | 51.00 | ▃▅▇▅▇ |
| census.tract | 6519 | 0.99 | 11957.37 | 3.074574e+04 | 1.00 | 202.00 | 516.00 | 1417.00 | 157903.00 | ▇▁▁▁▁ |
| bin | 9559 | 0.99 | 3495439.01 | 1.193275e+06 | 1000000.00 | 3031991.00 | 4020352.00 | 4263123.00 | 5515124.00 | ▂▃▆▇▃ |
| bbl | 9559 | 0.99 | 3413413626.23 | 1.174892e+09 | 0.00 | 3011240055.00 | 4008560127.00 | 4105700010.00 | 5080500094.00 | ▁▂▅▆▇ |
df %>% group_by(borough, nta_name) %>% mutate( is_good = ifelse(status=='Alive' & health=='Good', 1, 0 ), is_fair = ifelse(status=='Alive' & health=='Fair', 1, 0 ), is_dead = ifelse(status != 'Alive', 1, 0), is_poor = ifelse(status == 'Alive' & health=='Poor', 1, 0) ) %>% summarize( count=n(), n_good = sum(is_good), n_fair = sum(is_fair) , n_poor = sum(is_poor), n_dead = sum(is_dead) , fr_good = n_good/count, n_fair/count , n_poor / count, n_dead/count , residual = 1- (n_good + n_fair + n_poor + n_dead) / count ) -> status_by_nta
## `summarise()` has grouped output by 'borough'. You can override using the
## `.groups` argument.
head(status_by_nta) %>% arrange( borough, desc(fr_good) ) %>% kable(digits = 2 ) %>% kable_styling(full_width = T, bootstrap_options = c("striped")) %>% column_spec(8, color = "blue") %>%
column_spec(11, color = "green")
| borough | nta_name | count | n_good | n_fair | n_poor | n_dead | fr_good | n_fair/count | n_poor/count | n_dead/count | residual |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Bronx | Co-op City | 2103 | 1734 | 250 | 46 | 73 | 0.82 | 0.12 | 0.02 | 0.03 | 0 |
| Bronx | Belmont | 1637 | 1291 | 207 | 70 | 69 | 0.79 | 0.13 | 0.04 | 0.04 | 0 |
| Bronx | Allerton-Pelham Gardens | 3843 | 2975 | 486 | 139 | 243 | 0.77 | 0.13 | 0.04 | 0.06 | 0 |
| Bronx | Bedford Park-Fordham North | 1821 | 1372 | 272 | 77 | 100 | 0.75 | 0.15 | 0.04 | 0.05 | 0 |
| Bronx | Bronxdale | 1550 | 1146 | 194 | 65 | 145 | 0.74 | 0.13 | 0.04 | 0.09 | 0 |
| Bronx | Claremont-Bathgate | 1706 | 1097 | 386 | 116 | 107 | 0.64 | 0.23 | 0.07 | 0.06 | 0 |
df %>% filter( status == 'Alive' ) %>% group_by(borough, health) %>% summarize( xx=n() ) %>%
group_by(borough) %>% mutate( ptg = prop.table(xx ) ) %>% dplyr::select( borough, health, ptg ) %>%
pivot_wider(names_from = "borough", values_from = "ptg") %>% kable(digits = 2 ) %>%
kable_styling(full_width = T, bootstrap_options = c("striped"))
## `summarise()` has grouped output by 'borough'. You can override using the
## `.groups` argument.
| health | Bronx | Brooklyn | Manhattan | Queens | Staten Island |
|---|---|---|---|---|---|
| Fair | 0.14 | 0.15 | 0.18 | 0.15 | 0.14 |
| Good | 0.83 | 0.81 | 0.76 | 0.82 | 0.81 |
| Poor | 0.04 | 0.04 | 0.06 | 0.04 | 0.04 |
| NA | NA | NA | NA | 0.00 |
df %>% filter(health != '' ) %>%
dplyr::select( status, health, steward, borough, spc_common, tree_id) %>%
group_by(health, steward ) %>%
summarise(ct = n()) %>%
pivot_wider(names_from = "health", values_from = "ct") -> yy
## `summarise()` has grouped output by 'health'. You can override using the
## `.groups` argument.
yy
## # A tibble: 4 × 4
## steward Fair Good Poor
## <chr> <int> <int> <int>
## 1 1or2 21902 115372 6283
## 2 3or4 2830 15606 747
## 3 4orMore 200 1361 49
## 4 None 71572 396511 19739
This result analyzes the marginal percentage of trees in all states of health grouped by borough. It shows that Queens, Brooklyn and Staten Island have very similar proportions of trees in good health at around 81%, trees in fair health at 14.5%. The defining criteria may in fact by percentile driven rather than observable criterion.
df %>% filter(health != '' ) %>%
dplyr::select( status, health, steward, borough, spc_common, tree_id) %>%
group_by(health, steward ) %>%
summarise(ct = n()) %>%
group_by( health) %>% mutate(ptg = 100* prop.table(ct)) %>% dplyr::select(health, steward, ptg) %>%
pivot_wider( names_from = "health", values_from = "ptg" ) %>%
kable(digits = 1 ) %>%
kable_styling(bootstrap_options = c("striped"))
## `summarise()` has grouped output by 'health'. You can override using the
## `.groups` argument.
| steward | Fair | Good | Poor |
|---|---|---|---|
| 1or2 | 22.7 | 21.8 | 23.4 |
| 3or4 | 2.9 | 3.0 | 2.8 |
| 4orMore | 0.2 | 0.3 | 0.2 |
| None | 74.2 | 75.0 | 73.6 |
df %>% filter(health != '' ) %>%
dplyr::select( status, health, steward, borough, spc_common, tree_id) %>%
group_by(health, steward ) %>%
summarise(ct = n()) %>%
group_by( steward) %>% mutate(ptg = 100* prop.table(ct)) %>% dplyr::select(health, steward, ptg) %>%
pivot_wider( names_from = "health", values_from = "ptg" ) %>%
kable(digits = 1 ) %>%
kable_styling(bootstrap_options = c("striped"))
## `summarise()` has grouped output by 'health'. You can override using the
## `.groups` argument.
| steward | Fair | Good | Poor |
|---|---|---|---|
| 1or2 | 15.3 | 80.4 | 4.4 |
| 3or4 | 14.8 | 81.4 | 3.9 |
| 4orMore | 12.4 | 84.5 | 3.0 |
| None | 14.7 | 81.3 | 4.0 |
df %>% filter(health != '' & spc_common != '') %>%
dplyr::select( status, health, steward, spc_common, tree_id) %>%
group_by(spc_common, health, steward ) %>%
summarise(ct = n()) -> by_spc_health_steward
## `summarise()` has grouped output by 'spc_common', 'health'. You can override
## using the `.groups` argument.
head(by_spc_health_steward, n=100)
## # A tibble: 100 × 4
## # Groups: spc_common, health [30]
## spc_common health steward ct
## <chr> <chr> <chr> <int>
## 1 'Schubert' chokecherry Fair 1or2 220
## 2 'Schubert' chokecherry Fair 3or4 9
## 3 'Schubert' chokecherry Fair None 488
## 4 'Schubert' chokecherry Good 1or2 1271
## 5 'Schubert' chokecherry Good 3or4 127
## 6 'Schubert' chokecherry Good 4orMore 11
## 7 'Schubert' chokecherry Good None 2520
## 8 'Schubert' chokecherry Poor 1or2 66
## 9 'Schubert' chokecherry Poor 3or4 5
## 10 'Schubert' chokecherry Poor 4orMore 1
## # … with 90 more rows
df %>% filter(health != '' ) %>%
dplyr::select( status, health, steward, borough, spc_common, tree_id) %>%
group_by(spc_common, health, steward ) %>%
summarise(ct = n()) %>%
group_by( steward) %>% mutate(ptg = 100* prop.table(ct)) %>% dplyr::select(health, steward, ptg) %>%
pivot_wider( names_from = "health", values_from = "ptg" ) %>%
kable(digits = 1 ) %>%
kable_styling(bootstrap_options = c("striped"))
## `summarise()` has grouped output by 'spc_common', 'health'. You can override
## using the `.groups` argument.
## Warning: Values from `ptg` are not uniquely identified; output will contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} %>%
## dplyr::group_by(steward, health) %>%
## dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
## dplyr::filter(n > 1L)
| steward | Good | Poor | Fair |
|---|---|---|---|
| 1or2 | 0.002089762, 0.885362609, 0.052940644, 1.086676372, 0.183202491, 0.247288533, 0.006965874, 1.923277862, 0.018807860, 0.346203947, 0.279331555, 0.076624616, 0.156732169, 0.048761119, 0.020201035, 0.264006632, 0.016021511, 0.091252952, 0.197134239, 0.008359049, 0.179022966, 0.002786350, 0.043188420, 0.047367944, 0.025077147, 0.006965874, 0.075231441, 8.586833105, 0.053637231, 5.060011006, 0.028560084, 0.896508007, 0.051547469, 0.101701763, 0.036222546, 0.323216562, 0.173450267, 0.498060004, 0.108667637, 0.479252144, 0.019504448, 0.576077795, 0.020201035, 0.021594210, 0.013931748, 0.646433124, 0.201313764, 0.021594210, 0.253557820, 0.006269287, 0.023683972, 0.436760311, 0.027166909, 0.250771471, 3.372876279, 0.593492480, 1.584039789, 0.196437652, 0.371977681, 0.266096394, 0.012538574, 0.029953259, 8.719881302, 0.116330099, 0.101701763, 0.204100114, 0.087770015, 0.825456091, 4.437261854, 0.137924309, 0.676386383, 0.069658742, 0.066872392, 3.719776813, 6.762470656, 0.149766295, 0.401234353, 0.020897623, 0.092646127, 1.051150414, 1.688527902, 0.059209931, 0.012538574, 0.031346434, 0.007662462, 0.043885007, 0.099612001, 0.029256672, 0.052940644, 0.009752224, 5.288491679, 0.039008895, 0.002089762, 0.038312308, 1.124988680, 0.004876112, 0.012538574, 1.620262335, 0.006269287, 0.124689148, 0.014628336, 0.429097850, 0.171360505, 0.218728449, 0.006965874, 0.370584507, 0.009055636, 0.216638687, 0.245895359, 0.047367944, 1.115236457, 0.636680900, 0.010448811, 3.064288053, 0.027863497, 0.012538574, 0.044581595, 0.263310044, 1.193950835, 1.306101409, 0.157428757, 0.020201035, 0.054333819, 0.017414685, 0.122599386, 0.055030406, 0.027166909, 0.003482937, 0.033436196, 0.091949539, 0.249378296, 0.033436196, 0.523137151 | 0.0459747696, 0.0027863497, 0.0585133431, 0.0153249232, 0.0278634967, 0.1636980433, 0.0006965874, 0.0222907974, 0.0167180980, 0.0013931748, 0.0076624616, 0.0097522239, 0.0139317484, 0.0027863497, 0.0055726993, 0.0076624616, 0.0006965874, 0.0090556364, 0.0020897623, 0.0048761119, 0.0048761119, 0.0020897623, 0.0013931748, 0.0055726993, 0.3657083946, 0.0076624616, 0.2479851209, 0.0410986577, 0.0020897623, 0.0041795245, 0.0062692868, 0.0236839722, 0.0097522239, 0.0313464338, 0.0048761119, 0.0654792173, 0.0034829371, 0.0522440564, 0.0027863497, 0.0027863497, 0.0619962802, 0.0062692868, 0.0041795245, 0.0125385735, 0.0006965874, 0.0390088954, 0.0006965874, 0.0146283358, 0.2229079738, 0.0236839722, 0.0522440564, 0.0174146855, 0.0313464338, 0.0111453987, 0.0006965874, 0.0020897623, 0.2382328970, 0.0146283358, 0.0069658742, 0.0090556364, 0.0069658742, 0.0522440564, 0.1448901830, 0.0195044477, 0.0271669093, 0.0111453987, 0.0020897623, 0.3364517230, 0.3190370376, 0.0132351609, 0.0626928676, 0.0013931748, 0.0090556364, 0.0487611193, 0.3622254575, 0.0027863497, 0.0020897623, 0.0048761119, 0.0041795245, 0.0083590490, 0.0048761119, 0.0041795245, 0.1379243088, 0.0013931748, 0.0069658742, 0.0682655670, 0.0006965874, 0.0006965874, 0.1037915253, 0.0006965874, 0.0125385735, 0.0013931748, 0.0118419861, 0.0118419861, 0.0083590490, 0.0404020703, 0.0013931748, 0.0146283358, 0.0104488113, 0.0013931748, 0.0424918325, 0.0341327835, 0.0006965874, 0.1142403366, 0.0006965874, 0.0006965874, 0.0013931748, 0.0264703219, 0.0571201683, 0.0557269935, 0.0174146855, 0.0020897623, 0.0020897623, 0.0006965874, 0.0076624616, 0.0048761119, 0.0027863497, 0.0006965874, 0.0006965874, 0.0006965874, 0.0222907974, 0.0020897623, 0.0208976225 | 0.1532492320, 0.0132351609, 0.2333567851, 0.0334361961, 0.0557269935, 0.0013931748, 0.4290978496, 0.0020897623, 0.0640860425, 0.0515474689, 0.0034829371, 0.0271669093, 0.0118419861, 0.0013931748, 0.0438850073, 0.0027863497, 0.0118419861, 0.0508508815, 0.0034829371, 0.0383123080, 0.0013931748, 0.0076624616, 0.0160215106, 0.0020897623, 0.0013931748, 0.0222907974, 1.5387616069, 0.0139317484, 0.7000703553, 0.0027863497, 0.1504628823, 0.0062692868, 0.0195044477, 0.0118419861, 0.0571201683, 0.0397054828, 0.0759280286, 0.0174146855, 0.1260823227, 0.0013931748, 0.0891631895, 0.0034829371, 0.0034829371, 0.0076624616, 0.1246891479, 0.0208976225, 0.0062692868, 0.0522440564, 0.0076624616, 0.0738382663, 0.0020897623, 0.0626928676, 0.5607528717, 0.1058812876, 0.2814213170, 0.0341327835, 0.0821973154, 0.0536372312, 0.0048761119, 0.0048761119, 1.6216555097, 0.0299532590, 0.0195044477, 0.0313464338, 0.0153249232, 0.1650912181, 0.5844368439, 0.0501542941, 0.1093642247, 0.0215942100, 0.0111453987, 0.8059516429, 1.4461154803, 0.0181112729, 0.1198130359, 0.0034829371, 0.0243805596, 0.2006171765, 0.7746052091, 0.0132351609, 0.0055726993, 0.0048761119, 0.0006965874, 0.0097522239, 0.0271669093, 0.0104488113, 0.0153249232, 0.0027863497, 0.8108277548, 0.0034829371, 0.0146283358, 0.2556475825, 0.0006965874, 0.3852128423, 0.0013931748, 0.0285600841, 0.0041795245, 0.0480645319, 0.0487611193, 0.0362225457, 0.0006965874, 0.0633894551, 0.0020897623, 0.0299532590, 0.0397054828, 0.0195044477, 0.1797195539, 0.1414072459, 0.0013931748, 0.5732914452, 0.0020897623, 0.0048761119, 0.0870734273, 0.2131557500, 0.2020103513, 0.0466713570, 0.0083590490, 0.0097522239, 0.0041795245, 0.0285600841, 0.0146283358, 0.0132351609, 0.0055726993, 0.0139317484, 0.0529406438, 0.0055726993, 0.0863768399 |
| None | 2.049928e-04, 5.165819e-01, 2.623908e-02, 9.571114e-01, 1.166409e-01, 1.656342e-01, 5.124820e-03, 1.579265e+00, 1.783437e-02, 2.564460e-01, 2.480413e-01, 3.669371e-02, 3.835415e-01, 3.587374e-02, 9.224676e-03, 1.203308e-01, 8.609698e-03, 6.231781e-02, 2.158574e-01, 1.496447e-02, 1.615343e-01, 2.459914e-03, 3.320883e-02, 2.521411e-02, 1.168459e-02, 7.174748e-03, 5.678301e-02, 6.866234e+00, 5.883293e-02, 3.391401e+00, 2.131925e-02, 6.198982e-01, 2.500912e-02, 4.550840e-02, 4.120355e-02, 2.767403e-01, 1.205358e-01, 4.331498e-01, 3.710370e-02, 7.359242e-01, 2.234422e-02, 3.109741e-01, 8.199712e-03, 3.546376e-02, 6.969755e-03, 3.901013e-01, 1.297604e-01, 2.849400e-02, 1.937182e-01, 4.304849e-03, 1.270955e-02, 2.041728e-01, 1.065963e-02, 1.559995e-01, 2.243851e+00, 4.417595e-01, 2.190758e+00, 8.609698e-02, 3.521776e-01, 3.406980e-01, 7.994719e-03, 1.393951e-02, 8.148054e+00, 1.242256e-01, 3.628373e-02, 1.113111e-01, 3.177388e-02, 4.829630e-01, 3.681261e+00, 7.584734e-02, 3.534076e-01, 5.329813e-02, 2.685406e-02, 3.592704e+00, 1.280980e+01, 7.133750e-02, 8.392405e-01, 1.803937e-02, 1.502597e-01, 1.038904e+00, 3.797287e+00, 3.587374e-02, 6.764763e-03, 2.562410e-02, 2.459914e-03, 3.074892e-02, 4.878829e-02, 2.254921e-02, 2.767403e-02, 1.250456e-02, 7.581864e+00, 3.792367e-02, 5.534806e-03, 1.188958e-02, 7.445339e-01, 1.147960e-02, 1.537446e-02, 2.231757e+00, 1.721940e-02, 5.493807e-02, 2.111426e-02, 2.570610e-01, 1.637893e-01, 1.699390e-01, 2.254921e-03, 2.074527e-01, 6.764763e-03, 1.070062e-01, 1.803937e-01, 3.054393e-02, 1.028449e+00, 1.732804e+00, 4.919827e-03, 2.152219e+00, 1.311954e-02, 9.224676e-03, 1.660442e-02, 3.216337e-01, 7.008704e-01, 1.430030e+00, 3.560725e-01, 1.639942e-02, 9.224676e-02, 1.270955e-02, 1.252506e-01, 2.828901e-02, 2.336918e-02, 6.149784e-04, 3.300384e-02, 1.543596e-01, 1.900283e-01, 2.029429e-02, 3.661171e-01 | 0.0002049928, 0.0348487768, 0.0034848777, 0.0358737408, 0.0067647626, 0.0100446474, 0.0965516110, 0.0047148345, 0.0145544891, 0.0153744604, 0.0008199712, 0.0184493524, 0.0036898705, 0.0002049928, 0.0112746043, 0.0008199712, 0.0065597698, 0.0084047050, 0.0012299568, 0.0059447913, 0.0010249640, 0.0016399424, 0.0004099856, 0.0006149784, 0.0038948633, 0.3050292935, 0.0071747482, 0.1883883876, 0.0004099856, 0.0190643308, 0.0006149784, 0.0020499280, 0.0026649065, 0.0192693236, 0.0069697554, 0.0198843021, 0.0022549209, 0.0723624601, 0.0014349496, 0.0231641869, 0.0004099856, 0.0030748921, 0.0010249640, 0.0377186761, 0.0061497841, 0.0022549209, 0.0051248201, 0.0002049928, 0.0006149784, 0.0149644747, 0.0002049928, 0.0147594819, 0.1424699993, 0.0174243884, 0.0703125320, 0.0073797410, 0.0295189639, 0.0149644747, 0.0004099856, 0.0006149784, 0.1670691359, 0.0145544891, 0.0030748921, 0.0088146906, 0.0038948633, 0.0289039855, 0.1109011074, 0.0163994244, 0.0194743165, 0.0036898705, 0.0026649065, 0.2357417255, 0.3392630919, 0.0047148345, 0.1111061002, 0.0006149784, 0.0100446474, 0.0405885753, 0.6594618529, 0.0032798849, 0.0010249640, 0.0006149784, 0.0014349496, 0.0032798849, 0.0020499280, 0.0014349496, 0.0008199712, 0.2082726896, 0.0018449352, 0.0034848777, 0.0491982731, 0.0008199712, 0.0006149784, 0.1494397547, 0.0014349496, 0.0020499280, 0.0061497841, 0.0112746043, 0.0096346618, 0.0002049928, 0.0137345179, 0.0006149784, 0.0049198273, 0.0073797410, 0.0022549209, 0.0381286617, 0.1033163736, 0.0865069636, 0.0010249640, 0.0002049928, 0.0010249640, 0.0393586185, 0.0336188200, 0.0453034099, 0.0315688919, 0.0016399424, 0.0108646187, 0.0004099856, 0.0118895827, 0.0043048489, 0.0004099856, 0.0024599137, 0.0116845899, 0.0118895827, 0.0014349496, 0.0133245323 | 0.1000364887, 0.0036898705, 0.1890033660, 0.0213192517, 0.0229591941, 0.0010249640, 0.2589059124, 0.0067647626, 0.0368987049, 0.0465333667, 0.0022549209, 0.0719524745, 0.0084047050, 0.0014349496, 0.0198843021, 0.0032798849, 0.0172193956, 0.0522731652, 0.0024599137, 0.0262390790, 0.0028698993, 0.0071747482, 0.0049198273, 0.0024599137, 0.0018449352, 0.0108646187, 1.2541459795, 0.0227542013, 0.4829630480, 0.0032798849, 0.0889668773, 0.0032798849, 0.0086096978, 0.0071747482, 0.0614978414, 0.0135295251, 0.0742073953, 0.0057397985, 0.1924882437, 0.0024599137, 0.0520681724, 0.0010249640, 0.0088146906, 0.0016399424, 0.0817921291, 0.0172193956, 0.0077897266, 0.0317738847, 0.0026649065, 0.0022549209, 0.0360787336, 0.0010249640, 0.0336188200, 0.3609923292, 0.0686725896, 0.4401195518, 0.0118895827, 0.0705175248, 0.0586279422, 0.0008199712, 0.0014349496, 1.2164273034, 0.0344387912, 0.0067647626, 0.0264440718, 0.0077897266, 0.0920417693, 0.4561089906, 0.0293139711, 0.0453034099, 0.0086096978, 0.0061497841, 0.6350677091, 1.8744542067, 0.0104546330, 0.3179438402, 0.0043048489, 0.0371036977, 0.1984330350, 1.6223130568, 0.0086096978, 0.0014349496, 0.0053298129, 0.0004099856, 0.0057397985, 0.0084047050, 0.0051248201, 0.0047148345, 0.0030748921, 1.0427983978, 0.0057397985, 0.0028698993, 0.1443149345, 0.0018449352, 0.0024599137, 0.4751733214, 0.0016399424, 0.0073797410, 0.0049198273, 0.0303389351, 0.0483783019, 0.0381286617, 0.0004099856, 0.0344387912, 0.0008199712, 0.0153744604, 0.0440734530, 0.0063547769, 0.1236106613, 0.4181853217, 0.0016399424, 0.3942011635, 0.0018449352, 0.0026649065, 0.0071747482, 0.0981915535, 0.1100811362, 0.1941281861, 0.0996265031, 0.0028698993, 0.0297239567, 0.0028698993, 0.0330038416, 0.0084047050, 0.0073797410, 0.0086096978, 0.0291089783, 0.0350537696, 0.0051248201, 0.0438684602 |
| 3or4 | 0.662044519, 0.046916541, 0.813220039, 0.166814367, 0.229369754, 0.010425898, 1.730699056, 0.031277694, 0.312776938, 0.145962571, 0.067768337, 0.078194234, 0.088620132, 0.015638847, 0.260647448, 0.099046030, 0.260647448, 0.015638847, 0.239795652, 0.010425898, 0.046916541, 0.151175520, 0.041703592, 0.005212949, 0.062555388, 10.837720899, 0.026064745, 3.414481572, 0.026064745, 0.860136579, 0.031277694, 0.031277694, 0.062555388, 0.213730908, 0.161601418, 0.333628734, 0.130323724, 0.265860397, 0.020851796, 0.364906428, 0.015638847, 0.015638847, 0.026064745, 0.469165407, 0.119897826, 0.046916541, 0.265860397, 0.005212949, 0.015638847, 0.250221550, 0.031277694, 0.302351040, 6.495334411, 0.443100662, 1.230255956, 0.182453214, 0.375332325, 0.192879112, 0.031277694, 10.884637439, 0.072981286, 0.078194234, 0.260647448, 0.067768337, 0.516081948, 4.623885732, 0.062555388, 0.609915029, 0.046916541, 0.041703592, 3.430120419, 5.411041026, 0.062555388, 0.172027316, 0.057342439, 0.057342439, 1.162487619, 1.412709170, 0.052129490, 0.005212949, 0.010425898, 0.015638847, 0.031277694, 0.198092061, 0.026064745, 0.031277694, 0.005212949, 4.603033936, 0.010425898, 0.005212949, 0.020851796, 0.933117865, 0.026064745, 0.020851796, 1.125996976, 0.010425898, 0.057342439, 0.015638847, 0.479591305, 0.208517959, 0.213730908, 0.401397070, 0.187666163, 0.192879112, 0.031277694, 1.042589793, 0.453526560, 0.031277694, 4.488349059, 0.052129490, 0.015638847, 0.036490643, 0.218943857, 1.423135068, 1.136422874, 0.125110775, 0.020851796, 0.046916541, 0.072981286, 0.031277694, 0.026064745, 0.046916541, 0.052129490, 0.234582703, 0.020851796, 0.938330814 | 0.026064745, 0.041703592, 0.010425898, 0.020851796, 0.005212949, 0.172027316, 0.010425898, 0.005212949, 0.005212949, 0.015638847, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.390971172, 0.005212949, 0.156388469, 0.036490643, 0.005212949, 0.020851796, 0.010425898, 0.015638847, 0.057342439, 0.031277694, 0.005212949, 0.005212949, 0.015638847, 0.526507845, 0.005212949, 0.052129490, 0.005212949, 0.015638847, 0.010425898, 0.140749622, 0.005212949, 0.005212949, 0.010425898, 0.020851796, 0.172027316, 0.010425898, 0.010425898, 0.417035917, 0.422248866, 0.031277694, 0.005212949, 0.031277694, 0.198092061, 0.005212949, 0.010425898, 0.005212949, 0.099046030, 0.005212949, 0.020851796, 0.005212949, 0.093833081, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.036490643, 0.005212949, 0.015638847, 0.072981286, 0.026064745, 0.140749622, 0.005212949, 0.020851796, 0.046916541, 0.010425898, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.005212949, 0.010425898, 0.015638847 | 0.046916541, 0.005212949, 0.177240265, 0.026064745, 0.057342439, 0.005212949, 0.333628734, 0.010425898, 0.046916541, 0.036490643, 0.005212949, 0.010425898, 0.010425898, 0.026064745, 0.005212949, 0.015638847, 0.072981286, 0.005212949, 0.041703592, 0.015638847, 0.015638847, 0.005212949, 0.005212949, 0.015638847, 2.189438565, 0.005212949, 0.406610019, 0.187666163, 0.005212949, 0.005212949, 0.052129490, 0.036490643, 0.072981286, 0.083407183, 0.005212949, 0.057342439, 0.078194234, 0.010425898, 0.005212949, 0.036490643, 0.005212949, 0.072981286, 0.005212949, 0.088620132, 1.032163895, 0.062555388, 0.234582703, 0.031277694, 0.036490643, 0.046916541, 0.005212949, 1.496116353, 0.020851796, 0.005212949, 0.036490643, 0.026064745, 0.083407183, 0.625553876, 0.026064745, 0.062555388, 0.005212949, 0.026064745, 0.901840171, 1.402283272, 0.015638847, 0.052129490, 0.005212949, 0.010425898, 0.276286295, 0.651618621, 0.015638847, 0.005212949, 0.020851796, 0.031277694, 0.020851796, 0.698535161, 0.005212949, 0.099046030, 0.005212949, 0.005212949, 0.182453214, 0.020851796, 0.057342439, 0.005212949, 0.010425898, 0.072981286, 0.046916541, 0.010425898, 0.192879112, 0.067768337, 0.703748110, 0.005212949, 0.005212949, 0.083407183, 0.328415785, 0.166814367, 0.031277694, 0.005212949, 0.010425898, 0.010425898, 0.015638847, 0.005212949, 0.005212949, 0.020851796, 0.083407183, 0.114684877 |
| 4orMore | 0.6832298, 0.0621118, 1.6770186, 0.1242236, 0.8695652, 2.9192547, 0.0621118, 0.1863354, 0.0621118, 0.1863354, 0.2484472, 0.0621118, 0.3105590, 0.1863354, 0.0621118, 0.0621118, 0.1242236, 0.1863354, 0.1863354, 10.6832298, 0.1863354, 3.9130435, 1.0559006, 0.0621118, 0.0621118, 0.8074534, 0.3105590, 0.5590062, 0.4347826, 0.0621118, 0.7453416, 0.0621118, 0.1242236, 0.3726708, 0.0621118, 0.0621118, 0.1863354, 0.0621118, 0.1242236, 0.2484472, 5.7142857, 0.4347826, 1.6149068, 0.1863354, 0.0621118, 0.3726708, 0.0621118, 9.1304348, 0.0621118, 0.3105590, 0.1242236, 0.3105590, 4.3478261, 0.0621118, 1.4285714, 0.1242236, 3.6024845, 4.7204969, 0.1242236, 0.3105590, 0.0621118, 1.0559006, 1.8012422, 0.0621118, 0.0621118, 0.0621118, 0.0621118, 0.1242236, 5.9006211, 0.1242236, 0.1242236, 1.1801242, 0.6211180, 0.0621118, 0.0621118, 0.3105590, 0.1863354, 0.3105590, 0.0621118, 0.4347826, 0.1242236, 0.9316770, 0.4968944, 0.1242236, 4.5341615, 0.0621118, 1.7391304, 0.8695652, 0.2484472, 0.0621118, 0.1242236, 0.0621118, 0.1242236, 0.0621118, 0.2484472, 0.4968944 | 0.0621118, 0.0621118, 0.1863354, 0.1242236, 0.0621118, 0.1242236, 0.0621118, 0.1242236, 0.0621118, 0.3726708, 0.0621118, 0.4347826, 0.2484472, 0.0621118, 0.2484472, 0.0621118, 0.1242236, 0.0621118, 0.1242236, 0.0621118, 0.1863354, 0.0621118, 0.0621118 | 0.2484472, 0.0621118, 0.0621118, 0.0621118, 0.1863354, 0.0621118, 0.0621118, 0.0621118, 1.3664596, 0.5590062, 0.0621118, 0.3105590, 0.0621118, 0.1242236, 0.4347826, 0.0621118, 0.0621118, 0.1242236, 0.0621118, 0.0621118, 0.1242236, 0.9937888, 0.0621118, 0.1863354, 0.0621118, 1.4285714, 0.0621118, 0.0621118, 0.4347826, 0.1863354, 0.7453416, 0.9937888, 0.6832298, 0.0621118, 0.5590062, 0.1242236, 0.3105590, 0.0621118, 0.0621118, 0.5590062, 0.1863354, 0.1242236, 0.0621118, 0.1863354 |
by_spc_health_steward %>% group_by(spc_common, steward) %>% summarize( val = sum(ct)) -> spc_steward_marginals
## `summarise()` has grouped output by 'spc_common'. You can override using the
## `.groups` argument.
by_spc_health_steward %>% left_join( spc_steward_marginals, by = c("spc_common", "steward")) %>% mutate( health_score=ifelse(health=="Good", 2, ifelse(health=="Fair", 1, 0 )), pct = ct / val) %>% ungroup('health') %>% dplyr::select( spc_common, health, health_score, steward, ct, pct) %>% pivot_wider(names_from = "steward", values_from = c("ct", "pct"), values_fill = 0 ) %>% arrange(spc_common, health_score) %>%
dplyr::select( spc_common, health, health_score, ct_None, pct_None, ct_1or2, pct_1or2, ct_3or4, pct_3or4, ct_4orMore, pct_4orMore) ->
spc_steward_health_probs_full
# Use this probs_full to store the health as text string.
spc_steward_health_probs_full %>% select( -health) -> spc_steward_health_probs
spc_steward_health_probs
## # A tibble: 392 × 10
## # Groups: spc_common [132]
## spc_common healt…¹ ct_None pct_N…² ct_1or2 pct_1…³ ct_3or4 pct_3…⁴ ct_4o…⁵
## <chr> <dbl> <int> <dbl> <int> <dbl> <int> <dbl> <int>
## 1 'Schubert' c… 0 170 0.0535 66 0.0424 5 0.0355 1
## 2 'Schubert' c… 1 488 0.154 220 0.141 9 0.0638 0
## 3 'Schubert' c… 2 2520 0.793 1271 0.816 127 0.901 11
## 4 American bee… 0 17 0.104 4 0.0404 0 0 0
## 5 American bee… 1 18 0.110 19 0.192 1 0.1 0
## 6 American bee… 2 128 0.785 76 0.768 9 0.9 1
## 7 American elm 0 175 0.0304 84 0.0424 8 0.0404 1
## 8 American elm 1 922 0.160 335 0.169 34 0.172 4
## 9 American elm 2 4669 0.810 1560 0.788 156 0.788 27
## 10 American hop… 0 33 0.0467 22 0.0661 2 0.0513 0
## # … with 382 more rows, 1 more variable: pct_4orMore <dbl>, and abbreviated
## # variable names ¹health_score, ²pct_None, ³pct_1or2, ⁴pct_3or4, ⁵ct_4orMore
spc_steward_health_probs %>% dplyr::select(!starts_with("ct_")) %>%
pivot_longer( cols=starts_with("pct_"), names_to = "val", values_to = "pct" ) %>%
arrange(spc_common, val, health_score) -> spc_steward_health_long
spc_steward_health_long
## # A tibble: 1,568 × 4
## # Groups: spc_common [132]
## spc_common health_score val pct
## <chr> <dbl> <chr> <dbl>
## 1 'Schubert' chokecherry 0 pct_1or2 0.0424
## 2 'Schubert' chokecherry 1 pct_1or2 0.141
## 3 'Schubert' chokecherry 2 pct_1or2 0.816
## 4 'Schubert' chokecherry 0 pct_3or4 0.0355
## 5 'Schubert' chokecherry 1 pct_3or4 0.0638
## 6 'Schubert' chokecherry 2 pct_3or4 0.901
## 7 'Schubert' chokecherry 0 pct_4orMore 0.0833
## 8 'Schubert' chokecherry 1 pct_4orMore 0
## 9 'Schubert' chokecherry 2 pct_4orMore 0.917
## 10 'Schubert' chokecherry 0 pct_None 0.0535
## # … with 1,558 more rows
(spc_steward_health_long %>% mutate(val = str_replace(val, "pct_", "")) -> spc_steward_health_long )
## # A tibble: 1,568 × 4
## # Groups: spc_common [132]
## spc_common health_score val pct
## <chr> <dbl> <chr> <dbl>
## 1 'Schubert' chokecherry 0 1or2 0.0424
## 2 'Schubert' chokecherry 1 1or2 0.141
## 3 'Schubert' chokecherry 2 1or2 0.816
## 4 'Schubert' chokecherry 0 3or4 0.0355
## 5 'Schubert' chokecherry 1 3or4 0.0638
## 6 'Schubert' chokecherry 2 3or4 0.901
## 7 'Schubert' chokecherry 0 4orMore 0.0833
## 8 'Schubert' chokecherry 1 4orMore 0
## 9 'Schubert' chokecherry 2 4orMore 0.917
## 10 'Schubert' chokecherry 0 None 0.0535
## # … with 1,558 more rows
spc_steward_health_long %>%
ggplot(aes(fill=as.factor(health_score), x=val, y = pct )) +
geom_bar(position="dodge", stat="identity") +
facet_wrap(~spc_common, ncol=5 )
Maybe the folloing grouping of bars is more informative.
head(spc_steward_health_probs_full)
## # A tibble: 6 × 11
## # Groups: spc_common [2]
## spc_c…¹ health healt…² ct_None pct_N…³ ct_1or2 pct_1…⁴ ct_3or4 pct_3…⁵ ct_4o…⁶
## <chr> <chr> <dbl> <int> <dbl> <int> <dbl> <int> <dbl> <int>
## 1 'Schub… Poor 0 170 0.0535 66 0.0424 5 0.0355 1
## 2 'Schub… Fair 1 488 0.154 220 0.141 9 0.0638 0
## 3 'Schub… Good 2 2520 0.793 1271 0.816 127 0.901 11
## 4 Americ… Poor 0 17 0.104 4 0.0404 0 0 0
## 5 Americ… Fair 1 18 0.110 19 0.192 1 0.1 0
## 6 Americ… Good 2 128 0.785 76 0.768 9 0.9 1
## # … with 1 more variable: pct_4orMore <dbl>, and abbreviated variable names
## # ¹spc_common, ²health_score, ³pct_None, ⁴pct_1or2, ⁵pct_3or4, ⁶ct_4orMore
spc_steward_health_probs_full %>%
dplyr::select( -health_score) %>%
dplyr::select(!starts_with("ct_")) %>%
pivot_longer( cols=starts_with("pct_"), names_to = "val", values_to = "pct" ) %>%
mutate(val = str_replace(val, "pct_", "")) %>%
arrange(spc_common, val, health) %>%
ggplot(aes(fill=val, x=health, y = pct )) +
geom_bar(position="dodge", stat="identity") +
facet_wrap(~spc_common, ncol=5 )
(x1 = df %>% filter(health != '', spc_common != '', status=='Alive') %>% dplyr::select(tree_id, borough, status, health, spc_common, steward) %>% group_by(spc_common, borough, health , steward) %>% summarize( ct = n()) )
## `summarise()` has grouped output by 'spc_common', 'borough', 'health'. You can
## override using the `.groups` argument.
## # A tibble: 4,554 × 5
## # Groups: spc_common, borough, health [1,808]
## spc_common borough health steward ct
## <chr> <chr> <chr> <chr> <int>
## 1 'Schubert' chokecherry Bronx Fair 1or2 22
## 2 'Schubert' chokecherry Bronx Fair 3or4 1
## 3 'Schubert' chokecherry Bronx Fair None 67
## 4 'Schubert' chokecherry Bronx Good 1or2 144
## 5 'Schubert' chokecherry Bronx Good 3or4 10
## 6 'Schubert' chokecherry Bronx Good 4orMore 1
## 7 'Schubert' chokecherry Bronx Good None 301
## 8 'Schubert' chokecherry Bronx Poor 1or2 4
## 9 'Schubert' chokecherry Bronx Poor None 25
## 10 'Schubert' chokecherry Brooklyn Fair 1or2 76
## # … with 4,544 more rows
head(x1)
## # A tibble: 6 × 5
## # Groups: spc_common, borough, health [2]
## spc_common borough health steward ct
## <chr> <chr> <chr> <chr> <int>
## 1 'Schubert' chokecherry Bronx Fair 1or2 22
## 2 'Schubert' chokecherry Bronx Fair 3or4 1
## 3 'Schubert' chokecherry Bronx Fair None 67
## 4 'Schubert' chokecherry Bronx Good 1or2 144
## 5 'Schubert' chokecherry Bronx Good 3or4 10
## 6 'Schubert' chokecherry Bronx Good 4orMore 1
# Borough View
(bv = x1 %>% group_by(spc_common, borough) %>% summarize( ct = sum(ct)) %>% pivot_wider(names_from = borough, values_from = ct, values_fill = 0 ) )
## `summarise()` has grouped output by 'spc_common'. You can override using the
## `.groups` argument.
## # A tibble: 132 × 6
## # Groups: spc_common [132]
## spc_common Bronx Brooklyn Manhattan Queens `Staten Island`
## <chr> <int> <int> <int> <int> <int>
## 1 'Schubert' chokecherry 575 1308 163 2013 829
## 2 American beech 31 83 22 88 49
## 3 American elm 1471 2587 1698 1709 510
## 4 American hophornbeam 185 366 84 295 151
## 5 American hornbeam 295 343 85 525 269
## 6 American larch 12 8 7 10 9
## 7 American linden 2132 4023 1583 4769 1023
## 8 Amur cork tree 32 46 8 71 26
## 9 Amur maackia 466 729 59 675 268
## 10 Amur maple 431 263 30 671 654
## # … with 122 more rows
bv %>% pivot_longer(!spc_common, names_to = "borough", values_to = "ct") %>% ggplot(aes(x = borough, y = spc_common, fill = log(ct) )) +
geom_tile() + scale_fill_viridis_c() +
geom_text(aes(label=ct) )
What proportion of trees are in good fair or poor health according to the health variable
(x2 = x1 %>% group_by(borough, spc_common, health ) %>% summarize( ct2 = sum(ct)) %>% group_by(borough, spc_common) %>% mutate( sct2 = sum(ct2) , pct = ct2 / sct2 ) )
## `summarise()` has grouped output by 'borough', 'spc_common'. You can override
## using the `.groups` argument.
## # A tibble: 1,808 × 6
## # Groups: borough, spc_common [655]
## borough spc_common health ct2 sct2 pct
## <chr> <chr> <chr> <int> <int> <dbl>
## 1 Bronx 'Schubert' chokecherry Fair 90 575 0.157
## 2 Bronx 'Schubert' chokecherry Good 456 575 0.793
## 3 Bronx 'Schubert' chokecherry Poor 29 575 0.0504
## 4 Bronx American beech Fair 7 31 0.226
## 5 Bronx American beech Good 21 31 0.677
## 6 Bronx American beech Poor 3 31 0.0968
## 7 Bronx American elm Fair 248 1471 0.169
## 8 Bronx American elm Good 1176 1471 0.799
## 9 Bronx American elm Poor 47 1471 0.0320
## 10 Bronx American hophornbeam Fair 28 185 0.151
## # … with 1,798 more rows
Test case
my_spc = 'American hornbeam'
x2 %>% filter(spc_common == my_spc) %>% ungroup() %>% dplyr::select(borough, health, pct ) %>%
pivot_wider(names_from = health, values_from = pct, values_fill = 0)
## # A tibble: 5 × 4
## borough Fair Good Poor
## <chr> <dbl> <dbl> <dbl>
## 1 Bronx 0.207 0.715 0.0780
## 2 Brooklyn 0.131 0.802 0.0671
## 3 Manhattan 0.153 0.788 0.0588
## 4 Queens 0.109 0.836 0.0552
## 5 Staten Island 0.100 0.851 0.0483