wqi_df <- read_csv('WQI_LandCover_ManuscriptData.csv')
Reformat data to sum LC and bin WQI
Made decision to include ‘natural vegetation’ to include shrub and herbaceous due to LC category definitions (https://www.mrlc.gov/data/legends/national-land-cover-database-2011-nlcd2011-legend)
wqi_df_bin <-
wqi_df %>%
mutate(
Agriculture = Hay + Cult_crop,
Developed = Dev_Open + Dev_Low + Dev_Med + Dev_High,
Natural_Vegetation = Decid_Forest + Evergreen_Forest + Mixed_Forest + Shrub + Herb,
LC_check = Agriculture + Developed + Natural_Vegetation,
bin = as.numeric(cut(WQI, breaks=seq(from=0, to = 100, by= 5), labels = F))
)
Substantial variability in WQI and land cover categories is present. The LC_check column indicates the sum of the Agriculture, Developed, and Natural_Vegetation categories. The bin column corresponds to 5 point bins of WQI score.
| WQI | Agriculture | Developed | Natural_Vegetation | LC_check | bin | |
|---|---|---|---|---|---|---|
| Min. :20.00 | Min. : 0.00 | Min. : 0.60 | Min. : 0.30 | Min. : 85.90 | Min. : 4.00 | |
| 1st Qu.:53.00 | 1st Qu.:12.90 | 1st Qu.: 6.40 | 1st Qu.:30.20 | 1st Qu.: 96.62 | 1st Qu.:11.00 | |
| Median :61.00 | Median :27.40 | Median :11.20 | Median :56.60 | Median : 98.20 | Median :13.00 | |
| Mean :59.75 | Mean :28.08 | Mean :16.98 | Mean :52.52 | Mean : 97.58 | Mean :12.35 | |
| 3rd Qu.:67.00 | 3rd Qu.:41.10 | 3rd Qu.:20.07 | 3rd Qu.:71.70 | 3rd Qu.: 99.20 | 3rd Qu.:14.00 | |
| Max. :86.00 | Max. :88.40 | Max. :85.70 | Max. :98.90 | Max. :100.20 | Max. :18.00 |
A linear model containing % Agriculture and % Developed explained 55.8% of variation in WQI; % Natural Vegetation explained 52.5%.
##
## Linear Model Results of Land Use vs. WQI
## ========================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------
## WQI
## (1) (2) (3) (4)
## ------------------------------------------------------------------------------------------------------------------------
## Intercept 38.916*** (0.876) 69.214*** (0.821) 67.135*** (0.636) 77.583*** (0.740)
## % Natural Vegetation 0.397*** (0.015)
## % Agriculture -0.337*** (0.024) -0.358*** (0.019)
## % Developed -0.435*** (0.027) -0.458*** (0.022)
## ------------------------------------------------------------------------------------------------------------------------
## Observations 610 610 610 610
## R2 0.526 0.238 0.292 0.560
## Adjusted R2 0.525 0.236 0.291 0.558
## Residual Std. Error 8.738 (df = 608) 11.085 (df = 608) 10.683 (df = 608) 8.432 (df = 607)
## F Statistic 675.271*** (df = 1; 608) 189.381*** (df = 1; 608) 250.541*** (df = 1; 608) 385.530*** (df = 2; 607)
## ========================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Now, summarize WQI and LU by WQI bin:
wqi_df_bin_summ <- ddply(wqi_df_bin, 'bin', summarise,
## wqi summ
wqi_N = sum(!is.na(WQI)),
wqi_min = min(WQI, na.rm = TRUE),
wqi_max = max(WQI, na.rm = TRUE),
wqi_mean = mean(WQI, na.rm = TRUE),
wqi_sd = sd(WQI, na.rm = TRUE),
## lc summ
Ag_mean = mean(Agriculture, na.rm = TRUE),
Ag_sd = sd(Agriculture, na.rm = TRUE),
Dev_mean = mean(Developed, na.rm = TRUE),
Dev_sd = sd(Developed, na.rm = TRUE),
Veg_mean = mean(Natural_Vegetation, na.rm = TRUE),
Veg_sd = sd(Natural_Vegetation, na.rm = TRUE)
)
| bin | wqi_N | wqi_min | wqi_max | wqi_mean | wqi_sd | Ag_mean | Ag_sd | Dev_mean | Dev_sd | Veg_mean | Veg_sd |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | 1 | 20 | 20 | 20.00000 | NA | 81.600000 | NA | 17.900000 | NA | 0.30000 | NA |
| 5 | 1 | 24 | 24 | 24.00000 | NA | 50.200000 | NA | 45.500000 | NA | 3.90000 | NA |
| 6 | 4 | 27 | 30 | 29.00000 | 1.414214 | 44.500000 | 29.558078 | 27.275000 | 29.205978 | 26.87500 | 19.687623 |
| 7 | 13 | 31 | 35 | 33.07692 | 1.552500 | 50.123077 | 31.347173 | 27.307692 | 23.539133 | 21.56154 | 18.632478 |
| 8 | 47 | 36 | 40 | 38.02128 | 1.242182 | 36.623404 | 24.299179 | 36.197872 | 22.451401 | 25.75319 | 11.168820 |
| 9 | 29 | 41 | 45 | 42.75862 | 1.430664 | 34.134483 | 24.425298 | 32.396552 | 26.625665 | 31.98621 | 17.768546 |
| 10 | 39 | 46 | 50 | 48.89744 | 1.165172 | 33.679487 | 17.381330 | 21.223077 | 13.696897 | 43.19744 | 19.463900 |
| 11 | 61 | 51 | 55 | 53.26230 | 1.340418 | 40.398361 | 14.062604 | 22.714754 | 10.088737 | 34.73115 | 12.332255 |
| 12 | 91 | 56 | 60 | 57.90110 | 1.382871 | 34.741758 | 15.897687 | 18.046154 | 11.167953 | 45.01429 | 18.920621 |
| 13 | 125 | 61 | 65 | 63.08000 | 1.400461 | 23.992800 | 10.358082 | 14.404000 | 12.997695 | 59.39760 | 16.371146 |
| 14 | 103 | 66 | 70 | 67.63107 | 1.357552 | 21.726214 | 11.522858 | 8.975728 | 5.532710 | 65.88155 | 14.202884 |
| 15 | 11 | 71 | 75 | 73.27273 | 2.004540 | 21.572727 | 9.922710 | 9.563636 | 4.317933 | 65.12727 | 7.453334 |
| 16 | 56 | 76 | 80 | 77.94643 | 1.444987 | 16.166071 | 14.531837 | 7.633929 | 5.142171 | 72.52143 | 15.125532 |
| 17 | 28 | 81 | 85 | 82.03571 | 1.231745 | 4.896429 | 5.244078 | 2.639286 | 1.880374 | 90.02857 | 6.772759 |
| 18 | 1 | 86 | 86 | 86.00000 | NA | 0.000000 | NA | 2.200000 | NA | 97.80000 | NA |
Melt to get into long format for ggplot
wqi_df_bin_summ_melt <-
reshape2::melt(wqi_df_bin_summ, id = c('bin', 'wqi_N', 'wqi_min', 'wqi_max', 'wqi_sd', 'Ag_sd', 'Dev_sd', 'Veg_sd'))
Now, time to plot with a ton of ggplot code
ggplot(subset(wqi_df_bin_summ_melt, variable != 'wqi_mean'),
aes(x = bin, y= value, fill = variable)) +
geom_bar(stat = 'identity', alpha = 0.7) +
scale_fill_manual(values = c('#fdbb84', '#e34a33', '#2ca25f'), name = 'Land Cover Type',
breaks=c("Ag_mean", "Dev_mean", "Veg_mean"),
labels=c("Agriculture", "Developed", "Natural Vegetation")) +
# now add wqi binned scores
geom_errorbar(data=subset(wqi_df_bin_summ_melt, variable == 'wqi_mean'),
aes(x = bin,
ymax = value + wqi_sd,
ymin = value - wqi_sd),
size = 1, width = 0, inherit.aes = F) +
geom_point(data=subset(wqi_df_bin_summ_melt, variable == 'wqi_mean'),
aes(x = bin, y = value),
size = 2, pch = 21,
color = 'black', fill = 'gray',
inherit.aes = F) +
labs(x = 'WQI Score', y = 'Land Cover Percentage') +
scale_y_continuous(expand = c(0,0)) +
scale_x_continuous(expand = c(0,0), limits = c(0,20),
minor_breaks = NULL,
breaks = seq(from = 1.5, to = 20.5, by = 1),
labels = seq(from=5, to = 100, by= 5)) +
theme_minimal() + theme(legend.position = 'bottom')
Landcover figure caption: Mean percent land cover within 5 point WQI intervals. Bars indicate percentage of land cover by type, points represent mean WQI score within the same intervals. +/- 1 standard deviation is indicated by error bars around points.
## Warning: Removed 3 rows containing missing values (geom_errorbar).