Read PJ Plot Metrics Data
metrics = read.csv("plot_level_structure_vs_lidar_metrics_20220923_nodups_by_date.csv")
Outlier Removal
outliers = read.csv("outliers.csv")
outliers = as.data.frame(outliers)
outliers = subset(outliers, select = c(plot_id, Rating))
colnames(outliers)[colnames(outliers) == "plot_id"] = "plot.id"
outliers = merge(metrics, outliers, by="plot.id")
outliers = outliers %>% filter(!(Rating == '3'))
metrics = subset(outliers, select = -c(Rating))
#set.seed(4077)
#x.vars = as.data.frame(metrics[,13:109]) #define independent (predictor) variables in metrics dataframe
#y.var = metrics[,10] #define dependent variable (AGB) in metrics dataframe
#metrics.vsurf = VSURF(x.vars, y.var, RFimplem = "ranger", parallel = TRUE, ncores = 20, importance = TRUE) #run VSURF
Variables that will be used in RF model as determined by the VSURF
#xvarsnew = x.vars[metrics.vsurf$varselect.pred] #get names of VSURF chosen variables
#colnames(xvarsnew)
new_metrics = metrics[c("plot.id","bio","cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300","p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #put into dataframe the metrics to be used in RF as chosen by the VSURF
Read and Clean Landfire (PJ Types) Data
pj_types = read.csv("PJ_types_updated.csv") #read pj types csv to get groupings of woodlands
pj_types$EVT_NAME[pj_types$EVT_NAME=="Great Basin Pinyon-Juniper Woodland "]="Great Basin Pinyon-Juniper Woodland" #fix the pj woodland great basin syntax error
#unique(pj_types$EVT_NAME) #check that there are 5 unique woodland names
colnames(pj_types)[colnames(pj_types) == "plot_id"] = "plot.id" #fix plot id syntax error
pj_types$EVT_NAME[pj_types$EVT_NAME=="Colorado Plateau Pinyon-Juniper Shrubland"]="Colorado Plateau Pinyon-Juniper Woodland" #combine Colorado Plateau shrubland and woodland because it is the same woodland, only different cover type
Combine PJ_types Landfire data and new_metrics (variables chosen by vsurf) data so you have a PJ type for each of the plots and values for the VSURF chosen metrics
mets.types = merge(new_metrics, pj_types, by="plot.id") #merge predictor variables and pj groups on plot.id
colnames(mets.types)[colnames(mets.types) == "EVT_NAME"] = "group" #rename EVT_NAME to "group". This is the PJ Woodland Type as defined by Landfire data
Create function so I can specify training group, and testing group
fun1 = function(x, y) {
df.train = x
df.test = y
mod = ranger(bio ~ cc + cd + mh.ag + vrd.0.50 + vnrd.50.100 + p.45 + p.25
+ p.55 + vrd.250.300 + p.20 + cr.qh.20 + p.85 + plot.y + plot.x
+ skew.ag + vnrd.0.50 + twi, data = df.train)
pred = predict(mod, df.test)
r2.test = summary(lm(pred$predictions ~ df.test$bio))$adj.r.squared
return(r2.test)
}
Landfire modeling
set.seed(4379)
df = mets.types[c("bio","group","cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #Landfire Data frame
#subset data to their stratas
cpsubset = subset(df, group == "Colorado Plateau Pinyon-Juniper Woodland")
masubset = subset(df, group == "Madrean Pinyon-Juniper Woodland")
srsubset = subset(df, group == "Southern Rocky Mountain Pinyon-Juniper Woodland")
gbsubset = subset(df, group == "Great Basin Pinyon-Juniper Woodland")
typeslist = list(cpsubset,masubset,srsubset,gbsubset)
cp = sapply(typeslist, y = cpsubset, fun1)
ma = sapply(typeslist, y = masubset, fun1)
sr = sapply(typeslist, y = srsubset, fun1)
gb = sapply(typeslist, y = gbsubset, fun1)
types_r2test = cbind(cp,ma,sr,gb) #make sure its cbind!
types_r2test = as.data.frame(types_r2test)
colnames = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
types_r2test$name = colnames
names(types_r2test) = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin', 'name')
col_order = c('name','Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
types_r2test = types_r2test[, col_order]
types_r2test = as.data.frame(types_r2test) #Final Dataframe R^2 Test
kable(types_r2test)
name | Colorado Plateau | Madrean | Southern Rocky Mountain | Great Basin |
---|---|---|---|---|
Colorado Plateau | 0.9368563 | 0.6031054 | 0.2292131 | 0.4635338 |
Madrean | 0.4150234 | 0.9390612 | 0.1724020 | 0.5972720 |
Southern Rocky Mountain | 0.2666712 | 0.3064240 | 0.9154455 | 0.3519319 |
Great Basin | 0.4128860 | 0.4466395 | 0.1790556 | 0.9374206 |
Quantile-based bias correction for Landfire models ###### Create function so I can specify training, testing
fun2 = function(x, y) {
dfTraining = x
dfTest = y
x.train = dfTraining #all independent variables + biomass
y.train = dfTraining[,1] #biomass
x.test = dfTest[,-1] #remove biomass
y.test = dfTest[,1] #keep only biomass
mod = ranger(bio ~ ., data = x.train) #define model
pred = predict(mod, data = x.train) #predict using model on train data
qs = seq(0,1,0.001) #get quantiles
q.obs = quantile(y.train, probs = qs)
q.pred = quantile(pred$predictions, probs = qs)
q.diff = q.obs - q.pred # get quantile differences
pred.test = predict(mod, data = x.test)
pred.test.corr = pred.test$predictions + approx(q.pred, q.diff, pred.test$predictions)$y # apply
r2.testnew = summary(lm(pred.test.corr ~ y.test))$adj.r.squared
return(r2.testnew)
}
Apply quantile bias correction function to Landfire data
set.seed(302)
cp = sapply(typeslist, y = cpsubset, fun2)
ma = sapply(typeslist, y = masubset, fun2)
sr = sapply(typeslist, y = srsubset, fun2)
gb = sapply(typeslist, y = gbsubset, fun2)
types_r2test_new = cbind(cp,ma,sr,gb) #make sure its cbind!
types_r2test_new = as.data.frame(types_r2test_new)
colnames = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
types_r2test_new$name = colnames
names(types_r2test_new) = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin', 'name')
col_order = c('name','Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
types_r2test_new = types_r2test_new[, col_order]
types_r2test_new = as.data.frame(types_r2test_new) #Final Dataframe R^2 Test
kable(types_r2test_new)
name | Colorado Plateau | Madrean | Southern Rocky Mountain | Great Basin |
---|---|---|---|---|
Colorado Plateau | 0.9486089 | 0.6170014 | 0.2238475 | 0.4527957 |
Madrean | 0.3925832 | 0.9315172 | 0.1736019 | 0.5619276 |
Southern Rocky Mountain | 0.2577034 | 0.3232358 | 0.9269643 | 0.3099159 |
Great Basin | 0.4035137 | 0.4063400 | 0.1852104 | 0.9569060 |
Landfire Heatmap
par(mar=c(5.1, 4.1, 4.1, 4.1)) # adapt margins
rownames(types_r2test_new) = types_r2test_new$name
g = types_r2test_new[, -1] #remove name variable
data = g[sapply(g,is.numeric)]
data = as.matrix(g)
data1 = melt(data)
#Var1 is training strata V2 is test strata
landfire_heatmap = ggplot(data1, aes(x = Var2,y = Var1, fill = value))+
geom_tile()+
scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
labs(title="Landfire", x ='Test', y = 'Train') +
geom_text(aes(label = round(value, 2)), color = "white", size = 4)
landfire_heatmap
Group number of plots for each dataset strata
table(mets.types$group)
##
## Colorado Plateau Pinyon-Juniper Woodland
## 345
## Great Basin Pinyon-Juniper Woodland
## 20
## Madrean Pinyon-Juniper Woodland
## 106
## Southern Rocky Mountain Pinyon-Juniper Woodland
## 106
Read and clean data
climate = read.csv("plot_NAclimate.csv")
climate = climate %>% rename(plot.id = plot_id) #rename column plot_id to plot.id
climate = climate[(climate$plot.id %in% metrics$plot.id),] #remove data plots not in metrics
climate = data.frame(climate)
#Rename zone numbers to climate zone identifiers as outlined in the Koppen-Geiger Climate Classification Map and visualize zone frequencies
climate$zone_num[climate$zone_num=="7"]="Bsk" #Bsk: Cold, semi-arid climate
climate$zone_num[climate$zone_num=="26"]="Dfb" #Dfb: Humid Continental Mild Summer, Wet all Year
climate$zone_num[climate$zone_num=="18"]="Dsb" #Dsb: Humid Continental Climate - Dry Cool Summer
climate$zone_num[climate$zone_num=="5"]="Bwk" #Bwk: Cold Desert Climate
climate$zone_num[climate$zone_num=="8"]="Csa" #Csa: Hot-summer Mediterranean Climate
climate$zone_num[climate$zone_num=="9"]="Csb" #Csb: Warm-Summer Mediterranean Climate
#Merge new_metrics (VSURF chosen variables) and the climate zone for each plot based on the plot id
mets.types.climate = merge(new_metrics, climate, by="plot.id") #merge predictor variables and climate groups on plot.id
colnames(mets.types.climate)[colnames(mets.types.climate) == "zone_num"] = "group" #rename zone_num to "group"
Climate modeling
df1 = mets.types.climate[c("bio","group","cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #define new dataframe with climate data and vsurf metrics
#subset data to their stratas
bsksubset = subset(df1, group == "Bsk")
dfbsubset = subset(df1, group == "Dfb")
dsbsubset = subset(df1, group == "Dsb")
bwksubset = subset(df1, group == "Bwk")
csasubset = subset(df1, group == "Csa")
csbsubset = subset(df1, group == "Csb")
climatelist = list(bsksubset,dfbsubset,dsbsubset,bwksubset,csasubset,csbsubset)
bsk = sapply(climatelist, y = bsksubset, fun1)
dfb = sapply(climatelist, y = dfbsubset, fun1)
dsb = sapply(climatelist, y = dsbsubset, fun1)
bwk = sapply(climatelist, y = bwksubset, fun1)
csa = sapply(climatelist, y = csasubset, fun1)
csb = sapply(climatelist, y = csbsubset, fun1)
climate_r2test = cbind(bsk,dfb,dsb,bwk,csa,csb) #make sure its cbind!
climate_r2test = as.data.frame(climate_r2test)
colnames = c('bsk', 'dfb', 'dsb','bwk', 'csa', 'csb')
climate_r2test$name = colnames
names(climate_r2test) = c('bsk', 'dfb', 'dsb','bwk', 'csa', 'csb', 'name')
col_order = c('name','bsk', 'dfb', 'dsb','bwk', 'csa', 'csb')
climate_r2test = climate_r2test[, col_order]
climate_r2test = as.data.frame(climate_r2test) #Final Dataframe R^2 Test
kable(climate_r2test)
name | bsk | dfb | dsb | bwk | csa | csb |
---|---|---|---|---|---|---|
bsk | 0.9350923 | 0.8588505 | 0.2855222 | 0.6358336 | 0.4367911 | 0.5898570 |
dfb | 0.3273430 | 0.9428021 | 0.3067462 | -0.1765821 | 0.4866249 | 0.4718233 |
dsb | 0.2959589 | 0.5487451 | 0.8816401 | 0.3206864 | 0.5107904 | 0.5639845 |
bwk | 0.2447129 | 0.1294043 | 0.3072177 | 0.8823109 | 0.3877524 | 0.4341143 |
csa | 0.3016368 | 0.4095260 | 0.2893984 | 0.2559004 | 0.9300191 | 0.5165714 |
csb | 0.3852891 | 0.4697509 | 0.3150601 | 0.1957733 | 0.5132352 | 0.9386295 |
Apply quantile bias correction function to Climate data
set.seed(4382)
climatelistnew = list(bsksubset, dfbsubset, dsbsubset, bwksubset, csasubset,csbsubset)
bsk = sapply(climatelistnew, y = bsksubset, fun2)
dfb = sapply(climatelist, y = dfbsubset, fun2)
dsb = sapply(climatelistnew, y = dsbsubset, fun2)
bwk = sapply(climatelist, y = bwksubset, fun2)
csa = sapply(climatelistnew, y = csasubset, fun2)
csb = sapply(climatelistnew, y = csbsubset, fun2)
#dfb and bwk missing. Need to do dfb individually and bind
climate_r2test_new = cbind(bsk,dfb,dsb,bwk,csa,csb) #make sure its cbind!
climate_r2test_new = as.data.frame(climate_r2test_new)
colnames = c('bsk', 'dfb', 'dsb', 'bwk','csa', 'csb')
climate_r2test_new$name = colnames
col_order = c('name', 'bsk','dfb','dsb', 'bwk', 'csa', 'csb')
climate_r2test_new = climate_r2test_new[, col_order]
climate_r2test_new = as.data.frame(climate_r2test_new) #Final Dataframe R^2 Test
kable(climate_r2test_new)
name | bsk | dfb | dsb | bwk | csa | csb |
---|---|---|---|---|---|---|
bsk | 0.9426097 | 0.8481140 | 0.2686701 | 0.7534487 | 0.4433002 | 0.5603104 |
dfb | 0.3494935 | 0.9132276 | 0.2248574 | -0.0028074 | 0.3883737 | 0.4205612 |
dsb | 0.3147162 | 0.5232728 | 0.9157291 | 0.4217184 | 0.5240855 | 0.5251109 |
bwk | 0.2342202 | 0.1846932 | 0.2813620 | 0.9220809 | 0.4055287 | 0.4343965 |
csa | 0.3086427 | 0.3822028 | 0.2595292 | 0.3456775 | 0.9540287 | 0.5385872 |
csb | 0.3488965 | 0.5651654 | 0.3186887 | 0.1842543 | 0.5616905 | 0.9239666 |
Climate Heatmap
par(mar=c(5.1, 4.1, 4.1, 4.1)) # adapt margins
rownames(climate_r2test_new) = climate_r2test_new$name
h = climate_r2test_new[, -1] #remove name variable
data2 = h[sapply(h,is.numeric)]
data2 = as.matrix(h)
data3 = melt(data2)
#Var1 is training strata V2 is test strata
climate_heatmap = ggplot(data3, aes(x = Var2,y = Var1, fill = value))+
geom_tile()+
scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
labs(title="Climate", x ='Test', y = 'Train') +
geom_text(aes(label = round(value, 2)), color = "white", size = 4)
climate_heatmap
Group number of plots for each dataset strata
table(mets.types.climate$group)
##
## Bsk Bwk Csa Csb Dfb Dsb
## 407 6 79 32 11 42
Read and clean ecoregions data
ecoregions = read.csv("eco_plots_table_cleaned.csv")
ecoregions = ecoregions %>% rename(plot.id = plot_id) #rename column plot_id to plot.id
ecoregions = subset(ecoregions, select = -c(L2, L1))
colnames(ecoregions)[colnames(ecoregions) == "L3"] = "group" #rename L3 to "group"
mets.types.eco = merge(new_metrics, ecoregions, by="plot.id")
ecoregions = subset(mets.types.eco, select = c(plot.id, group))
ecoregions = subset(mets.types.eco, plot.id != 'NMFWRI_NMSF_P25' | mets.types.eco$group != "Southern Rockies") #delete duplicate row
ecoregions = subset(ecoregions, select = c(plot.id, group))
mets.types.eco = merge(new_metrics, ecoregions, by="plot.id")
Ecoregions modeling
df2 = mets.types.eco[c("bio","group","cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #Define new dataframe with ecoregions and VSURF metrics
#subset data to their stratas
cplsubset = subset(df2, group == "Colorado Plateaus")
wumsubset = subset(df2, group == "Wasatch and Uinta Mountains")
anmmsubset = subset(df2, group == "Arizona/New Mexico Mountains")
cbrsubset = subset(df2, group == "Central Basin and Range")
srosubset = subset(df2, group == "Southern Rockies")
anmpsubset = subset(df2, group == "Arizona/New Mexico Plateau")
nbrsubset = subset(df2, group == "Northern Basin and Range")
ecolist = list(cplsubset,wumsubset,anmmsubset,cbrsubset,srosubset,anmpsubset,nbrsubset)
cpl = sapply(ecolist, y = cplsubset, fun1)
wum = sapply(ecolist, y = wumsubset, fun1)
anmm = sapply(ecolist, y = anmmsubset, fun1)
cbr = sapply(ecolist, y = cbrsubset, fun1)
sro = sapply(ecolist, y = srosubset, fun1)
anmp = sapply(ecolist, y = anmpsubset, fun1)
nbr = sapply(ecolist, y = nbrsubset, fun1)
eco_r2test = cbind(cpl,wum,anmm,cbr,sro,anmp, nbr) #make sure its cbind!
eco_r2test = as.data.frame(eco_r2test)
colnames = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range')
eco_r2test$name = colnames
names(eco_r2test) = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range','name')
col_order = c('name','Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range')
eco_r2test$name = as.character(eco_r2test$name)
eco_r2test = eco_r2test[, col_order]
eco_r2test = as.data.frame(eco_r2test)
kable(eco_r2test)
name | Colorado Plateaus | Wasatch and Uinta Mountains | Arizona/New Mexico Mountains | Central Basin and Range | Southern Rockies | Arizona/New Mexico Plateau | Northern Basin and Range |
---|---|---|---|---|---|---|---|
Colorado Plateaus | 0.9504143 | 0.7146891 | 0.4442224 | 0.7170951 | 0.1933612 | 0.3845464 | 0.1576704 |
Wasatch and Uinta Mountains | 0.5323348 | 0.9394347 | 0.4211500 | 0.7369050 | 0.0654008 | 0.1901524 | 0.1912149 |
Arizona/New Mexico Mountains | 0.5316391 | 0.6121587 | 0.9380635 | 0.7965292 | 0.1571317 | 0.3744238 | -0.1926878 |
Central Basin and Range | 0.5213767 | 0.7165116 | 0.4039291 | 0.9453706 | 0.1167281 | 0.1859021 | 0.3891940 |
Southern Rockies | 0.2984375 | 0.3163917 | 0.0652367 | 0.4018121 | 0.9101661 | 0.0267937 | 0.3548266 |
Arizona/New Mexico Plateau | 0.2850045 | 0.3690173 | 0.2383918 | 0.2402829 | 0.0135441 | 0.9316545 | -0.3569439 |
Northern Basin and Range | 0.5001275 | 0.4899798 | 0.4992736 | 0.3998993 | 0.5005482 | 0.4861693 | 0.0515958 |
Quantile bias correction Ecoregions
#Nbr excluded due to not enough points? Errors abound
set.seed(27183)
ecolist = list(cplsubset,wumsubset,anmmsubset,cbrsubset,srosubset,anmpsubset)
cpl = sapply(ecolist, y = cplsubset, fun2)
wum = sapply(ecolist, y = wumsubset, fun2)
anmm = sapply(ecolist, y = anmmsubset, fun2)
cbr = sapply(ecolist, y = cbrsubset, fun2)
sro = sapply(ecolist, y = srosubset, fun2)
anmp = sapply(ecolist, y = anmpsubset, fun2)
#nbr = sapply(ecolist, y = nbrsubset, fun2) #nbr being difficult, need to apply function separately and add to table --> "need at least two non-NA values to interpolate"
eco_r2test_new = cbind(cpl,wum,anmm,cbr,sro,anmp) #make sure its cbind!
eco_r2test_new = as.data.frame(eco_r2test_new)
colnames = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau')
eco_r2test_new$name = colnames
names(eco_r2test_new) = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'name')
col_order = c('name','Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau')
eco_r2test_new$name = as.character(eco_r2test_new$name)
eco_r2test_new = eco_r2test_new[, col_order]
eco_r2test_new = as.data.frame(eco_r2test_new) #double check column names all correspond correctly
kable(eco_r2test_new)
name | Colorado Plateaus | Wasatch and Uinta Mountains | Arizona/New Mexico Mountains | Central Basin and Range | Southern Rockies | Arizona/New Mexico Plateau |
---|---|---|---|---|---|---|
Colorado Plateaus | 0.9480693 | 0.7036532 | 0.4403688 | 0.7050489 | 0.1851302 | 0.3780664 |
Wasatch and Uinta Mountains | 0.5049594 | 0.9714311 | 0.3896303 | 0.7027209 | 0.0747271 | 0.1966509 |
Arizona/New Mexico Mountains | 0.5391734 | 0.6327346 | 0.9387260 | 0.8019751 | 0.1597679 | 0.3629285 |
Central Basin and Range | 0.4995647 | 0.7036838 | 0.3986290 | 0.9605335 | 0.1084056 | 0.2007989 |
Southern Rockies | 0.3183344 | 0.3014855 | 0.0676894 | 0.2653763 | 0.9272947 | 0.0866184 |
Arizona/New Mexico Plateau | 0.2893662 | 0.3016727 | 0.2210203 | 0.2520933 | 0.0112961 | 0.9512367 |
Ecoregion Heatmap
par(mar=c(5.1, 4.1, 4.1, 4.1)) # adapt margins
rownames(eco_r2test_new) = eco_r2test_new$name
j = eco_r2test_new[, -1] #remove name variable
data4 = j[sapply(j,is.numeric)]
data4 = as.matrix(j)
data5 = melt(data4)
#Var1 is training strata V2 is test strata
ecoregion_heatmap = ggplot(data5, aes(x = Var2,y = Var1, fill = value))+
geom_tile()+
scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
labs(title="Ecoregions", x ='Test', y = 'Train') +
scale_x_discrete(labels = label_wrap(5))+
geom_text(aes(label = round(value, 2)), color = "white", size = 4)
#+theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
ecoregion_heatmap
Group number of plots for each dataset strata
table(mets.types.eco$group)
##
## Arizona/New Mexico Mountains Arizona/New Mexico Plateau
## 316 41
## Central Basin and Range Colorado Plateaus
## 17 125
## Northern Basin and Range Southern Rockies
## 3 43
## Wasatch and Uinta Mountains
## 32
Add landfire, climate, ecoregion strata to one data frame
#Merge new_metrics with landfire, climate, and ecoregions variables by plot id
#pj_types #Landfire
colnames(pj_types)[colnames(pj_types) == "EVT_NAME"] = "landfire" #rename EVT_NAME to "landfire"
#climate #Koppen-Geiger
colnames(climate)[colnames(climate) == "zone_num"] = "climate" #rename zone_num to climate
#ecoregions #L3 ecoregion
colnames(ecoregions)[colnames(ecoregions) == "group"] = "ecoregion" #rename L3 to "group"
#ecoregions = ecoregions[c('plot.id','ecoregion')] #subset ecoregions data to plot.id and L3
#Merge all dataframes to get all strata and metrics into one dataframe (onemodel)
data3 = merge(x = new_metrics, y = pj_types, by = "plot.id", all.x = TRUE)
data4 = merge(x = data3, y = climate, by = "plot.id", all.x = TRUE)
onemodel1 = merge(x = data4, y = ecoregions, by = "plot.id", all.x = TRUE)
onemodel = onemodel1[c("plot.id", "landfire", "climate", "ecoregion", "bio", "cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #define new dataframe for final model
Ranger and Mixed Effects Models
#Should basic ranger model include all strata as factors?
set.seed(17695)
#do not want strata in x cols dataframe for mixed effects. Also excludes other strata
#Incorporate strata as model predictors in RF
#split into training and test data
sample = sample(c(TRUE, FALSE), nrow(onemodel), replace=TRUE, prob=c(0.70,0.30))
df.train = onemodel[sample, ]
df.test = onemodel[!sample, ]
#RF with bias correction is a ranger model with all strata as factors
# make factors
df.train$ecoregion = as.factor(df.train$ecoregion)
df.test$ecoregion = as.factor(df.test$ecoregion)
df.train$landfire = as.factor(df.train$landfire)
df.test$landfire = as.factor(df.test$landfire)
df.train$climate = as.factor(df.train$climate)
df.test$climate = as.factor(df.test$climate)
#define independent and dependent variables
y = df.train$bio
X = df.train[,c(6:22)] #regular ranger model --> does not include strata as factors in predictor variables
# build basic ranger model (does not include strata as factors in df)
mod = ranger(x = X,
y = y,
data = df.train)
# get quantile-based bias correction -- basic ranger model
pred.mod = predict(mod, df.train)$predictions
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod = quantile(obs, probs = qs)
q.pred.mod = quantile(pred.mod, probs = qs)
q.diff.mod = q.obs.mod - q.pred.mod
# make predictions on test data
pred.mod = predict(mod, df.test)$predictions
obs = df.test$bio
# perform bias correction
pred.mod.bc = pred.mod + approx(q.pred.mod, q.diff.mod, pred.mod, rule = 2)$y
#Mixed Effects Landfire Model
y = df.train$bio
X.me = df.train[,c(6:22)] #random effects model --> exclude 'landfire' from predictor variables and use as random effect. Excludes climate and ecoregion factors from model as well.
mod.me = MERFranger(Y = y,
X = X.me,
random = "(1|landfire)", #set lf to be the random effect
data = df.train)
# get quantile-based bias correction -- mixed effects ranger model
pred.mod.me = predict(mod.me, df.train) #cannot use $predictions like in regular model because it is a MERF not a ranger object. Get vector of numeric predictions if you exclude the $predictions in code
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod.me = quantile(obs, probs = qs)
q.pred.mod.me = quantile(pred.mod.me, probs = qs)
q.diff.mod.me = q.obs.mod.me - q.pred.mod.me
# make predictions on test data
pred.mod.me = predict(mod.me, df.test)
pred.mod.me = unname(pred.mod.me)
obs = df.test$bio
# perform bias correction
pred.mod.me.bc = pred.mod.me + approx(q.pred.mod.me, q.diff.mod.me, pred.mod.me, rule = 2)$y
#Mixed Effects Climate Model
y = df.train$bio
X.me.1 = df.train[,c(6:22)] #exclude 'climate' from predictor variables and use as random effect. Excludes landfire and ecoregion factors from model as well.
mod.me.1 = MERFranger(Y = y,
X = X.me.1,
random = "(1|climate)", #set cli to be the random effect
data = df.train)
# get quantile-based bias correction -- mixed effects ranger model
pred.mod.me.1 = predict(mod.me.1, df.train) #cannot use $predictions like in regular model because it is a MERF not a ranger object. Get vector of numeric predictions if you exclude the $predictions in code
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod.me.1 = quantile(obs, probs = qs)
q.pred.mod.me.1 = quantile(pred.mod.me.1, probs = qs)
q.diff.mod.me.1 = q.obs.mod.me.1 - q.pred.mod.me.1
# make predictions on test data
pred.mod.me.1 = predict(mod.me.1, df.test)
pred.mod.me.1= unname(pred.mod.me.1)
obs = df.test$bio
# perform bias correction
pred.mod.me.1.bc = pred.mod.me.1 + approx(q.pred.mod.me.1, q.diff.mod.me.1, pred.mod.me.1, rule = 2)$y
#Mixed Effects Ecoregion Model
y = df.train$bio
X.me.2 = df.train[,c(6:22)] #exclude 'ecoregion' from predictor variables and use as random effect. Excludes climate and landfire factors from model as well (only lidar predictor variables).
mod.me.2 = MERFranger(Y = y,
X = X.me.2,
random = "(1|ecoregion)", #set eco to be the random effect
data = df.train)
# get quantile-based bias correction -- mixed effects ranger model
pred.mod.me.2 = predict(mod.me.2, df.train) #cannot use $predictions like in regular model because it is a MERF not a ranger object. Get vector of numeric predictions if you exclude the $predictions in code
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod.me.2 = quantile(obs, probs = qs)
q.pred.mod.me.2 = quantile(pred.mod.me.2, probs = qs)
q.diff.mod.me.2 = q.obs.mod.me.2 - q.pred.mod.me.2
# make predictions on test data
pred.mod.me.2 = predict(mod.me.2, df.test)
pred.mod.me.2= unname(pred.mod.me.2)
obs = df.test$bio
# perform bias correction
pred.mod.me.2.bc = pred.mod.me.2 + approx(q.pred.mod.me.2, q.diff.mod.me.2, pred.mod.me.2, rule = 2)$y
# define plotting function
plot.fun = function(x,y,main,xlab="Observed AGB",ylab="Predicted AGB"){
plot(y ~ x, xlim = c(ax.min, ax.max), ylim = c(ax.min, ax.max), pch = 16,
main = main, xlab = xlab, ylab = ylab)
grid()
lines(x = c(-1000,1000), y = c(-1000,1000), lty = 2)
mod = lm(y ~ x)
abline(mod, lwd = 2, col = "red")
a = coef(mod)[2]
b = coef(mod)[1]
if (a < 0) eq = paste0("y=", round(a,2), "x-", abs(round(b,2)))
if (a >= 0) eq = paste0("y=", round(a,2), "x+", abs(round(b,2)))
r2 = paste0("R2=", round(summary(mod)$adj.r.squared,2))
rmse = paste0("RMSE=", round(sqrt(mean((y-x)^2)),2))
legend("topleft", legend = c(eq,r2,rmse), text.col = "red", bty = "n", x.intersp = 0)
}
# plot out the results
par(mfrow = c(2,4),
las = 1,
mar = c(5,5,2,1))
ax.min = min(c(pred.mod, pred.mod.me, obs))
ax.max = max(c(pred.mod, pred.mod.me, obs))
plot.fun(obs, pred.mod, "RF")
plot.fun(obs, pred.mod.bc, "RF Bias Corr")
plot.fun(obs, pred.mod.me, "LF MERF")
plot.fun(obs, pred.mod.me.bc, "LF MERF Bias Corr")
plot.fun(obs, pred.mod.me.1, "Cli MERF")
plot.fun(obs, pred.mod.me.1.bc, "Cli MERF Bias Corr")
plot.fun(obs, pred.mod.me.2, "Eco MERF")
plot.fun(obs, pred.mod.me.2.bc, "Eco MERF Bias Corr")
Plot the random effects for landfire model - intercepts = 0, landfire and climate mixed effects models are straight lines
# get data frame of random effects
df.re = as.data.frame(mod.me$RandomEffects$landfire) #plot ecoregion effects
colnames(df.re) = "Intercept"
# get bar colors
cols = colorRampPalette(brewer.pal(11, "Spectral"))(nrow(df.re))
# plot the random effects
par(mar = c(8,5,1,1),
las = 1)
b = barplot(df.re$Intercept, space = 0, col = cols, ylim = c(min(df.re - 1), max(df.re + 1)), ylab = "Intercept")
box()
mtext(text = row.names(df.re), side = 1, line = 1, at = b, las = 2, cex = 0.45)
abline(h = 0)
df.re
## Intercept
## Colorado Plateau Pinyon-Juniper Woodland 0
## Great Basin Pinyon-Juniper Woodland 0
## Madrean Pinyon-Juniper Woodland 0
## Southern Rocky Mountain Pinyon-Juniper Woodland 0
Plot the random effects for climate model - intercepts = 0
# get data frame of random effects
df.re = as.data.frame(mod.me.1$RandomEffects$climate) #plot ecoregion effects
colnames(df.re) = "Intercept"
# get bar colors
cols = colorRampPalette(brewer.pal(11, "Spectral"))(nrow(df.re))
# plot the random effects
par(mar = c(8,5,1,1),
las = 1)
b = barplot(df.re$Intercept, space = 0, col = cols, ylim = c(min(df.re - 1), max(df.re + 1)), ylab = "Intercept")
box()
mtext(text = row.names(df.re), side = 1, line = 1, at = b, las = 2, cex = 0.55)
abline(h = 0)
Plot the random effects for ecoregion model
# get data frame of random effects
df.re = as.data.frame(mod.me.2$RandomEffects$ecoregion) #plot ecoregion effects
colnames(df.re) = "Intercept"
# get bar colors
cols = colorRampPalette(brewer.pal(11, "Spectral"))(nrow(df.re))
# plot the random effects
par(mar = c(8,5,1,1),
las = 1)
b = barplot(df.re$Intercept, space = 0, col = cols, ylim = c(min(df.re - 1), max(df.re + 1)), ylab = "Intercept")
box()
mtext(text = row.names(df.re), side = 1, line = 1, at = b, las = 2, cex = 0.55)
abline(h = 0)
Instead of site.id in original code, use ecoregion as the random effect Is there a way to include more than one random effect in model?
set.seed(867509)
#redefine things just in case
new_metrics = metrics[c("site.id","plot.id","bio", "cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")]
#Merge all dataframes to get all strata and metrics into one dataframe (onemodel)
data3 = merge(x = new_metrics, y = pj_types, by = "plot.id", all.x = TRUE)
data4 = merge(x = data3, y = climate, by = "plot.id", all.x = TRUE)
onemodel1 = merge(x = data4, y = ecoregions, by = "plot.id", all.x = TRUE)
df = onemodel1[c("plot.id", "landfire", "climate", "ecoregion", "bio", "cc", "cd", "mh.ag", "vrd.0.50", "vnrd.50.100", "p.45", "p.25", "p.55", "vrd.250.300", "p.20", "cr.qh.20", "p.85", "plot.y", "plot.x", "skew.ag", "vnrd.0.50", "twi")] #define new
for (i in seq(1,200)){
df$train.test = "train"
ecoregions1 = unique(df$ecoregion)
for (ecoregion in ecoregions1){
eco.rows = which(df$ecoregion == ecoregion)
test.row = sample(eco.rows, 2)
df$train.test[test.row] = "test"
}
df.train = df[df$train.test == "train",]
df.test = df[df$train.test == "test",]
df.train$train.test = NULL
df.test$train.test = NULL
#regular ranger model --> does not include all strata as factors in predictor variables
mod = ranger(x = df.train[,c(6:22)] ,
y = df.train$bio,
data = df.train)
#should i include landfire and climate as factors in model? Columns 3 and 4, ecoregion column 5. Currently does not include landfire and climate as factors in model.
mod.me = MERFranger(Y = df.train$bio,
X = df.train[,c(6:22)],
random = "(1|ecoregion)",
data = df.train)
pred.mod = predict(mod, df.train)$predictions
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod = quantile(obs, probs = qs)
q.pred.mod = quantile(pred.mod, probs = qs)
q.diff.mod = q.obs.mod - q.pred.mod
pred.mod.me = predict(mod, df.train)$predictions
obs = df.train$bio
qs = seq(0,1,0.001)
q.obs.mod.me = quantile(obs, probs = qs)
q.pred.mod.me = quantile(pred.mod.me, probs = qs)
q.diff.mod.me = q.obs.mod.me - q.pred.mod.me
pred.mod = predict(mod, df.test)$predictions
pred.mod.me = predict(mod.me, df.test)
obs = df.test$bio
pred.mod.bc = pred.mod + approx(q.pred.mod, q.diff.mod, pred.mod, rule = 2)$y
pred.mod.me.bc = pred.mod.me + approx(q.pred.mod.me, q.diff.mod.me, pred.mod.me, rule = 2)$y
r2.mod = summary(lm(pred.mod ~ obs))$adj.r.squared
r2.mod.me = summary(lm(pred.mod.me ~ obs))$adj.r.squared
r2.mod.bc = summary(lm(pred.mod.bc ~ obs))$adj.r.squared
r2.mod.me.bc = summary(lm(pred.mod.me.bc ~ obs))$adj.r.squared
rmse.mod = sqrt(mean((pred.mod - obs)^2))
rmse.mod.me = sqrt(mean((pred.mod.me - obs)^2))
rmse.mod.bc = sqrt(mean((pred.mod.bc - obs)^2))
rmse.mod.me.bc = sqrt(mean((pred.mod.me.bc - obs)^2))
if (i == 1){
results.df = data.frame(r2.mod = r2.mod,
r2.mod.me = r2.mod.me,
r2.mod.bc = r2.mod.bc,
r2.mod.me.bc = r2.mod.me.bc,
rmse.mod = rmse.mod,
rmse.mod.me = rmse.mod.me,
rmse.mod.bc = rmse.mod.bc,
rmse.mod.me.bc = rmse.mod.me.bc)
df.re = t(as.data.frame(mod.me$RandomEffects$ecoregion))
} else {
results.df = rbind(results.df, data.frame(r2.mod = r2.mod,
r2.mod.me = r2.mod.me,
r2.mod.bc = r2.mod.bc,
r2.mod.me.bc = r2.mod.me.bc,
rmse.mod = rmse.mod,
rmse.mod.me = rmse.mod.me,
rmse.mod.bc = rmse.mod.bc,
rmse.mod.me.bc = rmse.mod.me.bc))
df.re = rbind(df.re, t(as.data.frame(mod.me$RandomEffects$ecoregion)))
}
}
Plot the bootstrap results
set.seed(238931)
# get kernel densities
d.r2.mod = density(results.df$r2.mod)
d.r2.mod.me = density(results.df$r2.mod.me)
d.r2.mod.bc = density(results.df$r2.mod.bc)
d.r2.mod.me.bc = density(results.df$r2.mod.me.bc)
d.rmse.mod = density(results.df$rmse.mod)
d.rmse.mod.me = density(results.df$rmse.mod.me)
d.rmse.mod.bc = density(results.df$rmse.mod.bc)
d.rmse.mod.me.bc = density(results.df$rmse.mod.me.bc)
# get axis limits
xlim.r2 = c(min(c(d.r2.mod$x, d.r2.mod.me$x, d.r2.mod.bc$x, d.r2.mod.me.bc$x)),
max(c(d.r2.mod$x, d.r2.mod.me$x, d.r2.mod.bc$x, d.r2.mod.me.bc$x)))
xlim.rmse = c(min(c(d.rmse.mod$x, d.rmse.mod.me$x, d.rmse.mod.bc$x, d.rmse.mod.me.bc$x)),
max(c(d.rmse.mod$x, d.rmse.mod.me$x, d.rmse.mod.bc$x, d.rmse.mod.me.bc$x)))
ylim.r2 = c(min(c(d.r2.mod$y, d.r2.mod.me$y, d.r2.mod.bc$y, d.r2.mod.me.bc$y)),
max(c(d.r2.mod$y, d.r2.mod.me$y, d.r2.mod.bc$y, d.r2.mod.me.bc$y)))
ylim.rmse = c(min(c(d.rmse.mod$y, d.rmse.mod.me$y, d.rmse.mod.bc$y, d.rmse.mod.me.bc$y)),
max(c(d.rmse.mod$y, d.rmse.mod.me$y, d.rmse.mod.bc$y, d.rmse.mod.me.bc$y)))
# set up the plot
par(mfrow = c(2,2),
las = 1,
mar = c(5,1,2,1))
# plot 1 -- RF vs. MERF, R2
plot(xlim.r2, ylim.r2, type = "n", yaxt = "n", ylab = NA, xlab = "R2", main = "R2")
polygon(d.r2.mod$x, d.r2.mod$y, col = rgb(1,0,0,0.25))
polygon(d.r2.mod.me$x, d.r2.mod.me$y, col = rgb(0,0,1,0.25))
abline(v = median(results.df$r2.mod), lwd = 2, col = "red")
abline(v = median(results.df$r2.mod.me), lwd = 2, col = "blue")
r2.med.mod = paste0("Med Basic = ", round(median(results.df$r2.mod),2))
r2.med.mod.me = paste0("Med ME = ", round(median(results.df$r2.mod.me),2))
legend("topleft", legend = c(r2.med.mod, r2.med.mod.me), text.col = c("red", "blue"), bty = "n", x.intersp = 0)
# plot 2 -- RF vs. MERF, RMSE
plot(xlim.rmse, ylim.rmse, type = "n", yaxt = "n", ylab = NA, xlab = "RMSE", main = "RMSE")
polygon(d.rmse.mod$x, d.rmse.mod$y, col = rgb(1,0,0,0.25))
polygon(d.rmse.mod.me$x, d.rmse.mod.me$y, col = rgb(0,0,1,0.25))
abline(v = median(results.df$rmse.mod), lwd = 2, col = "red")
abline(v = median(results.df$rmse.mod.me), lwd = 2, col = "blue")
rmse.med.mod = paste0("Med Basic = ", round(median(results.df$rmse.mod),2))
rmse.med.mod.me = paste0("Med ME = ", round(median(results.df$rmse.mod.me),2))
legend("topleft", legend = c(rmse.med.mod, rmse.med.mod.me), text.col = c("red", "blue"), bty = "n", x.intersp = 0)
# plot 3 -- RF vs. MERF Bias Corr, R2
plot(xlim.r2, ylim.r2, type = "n", yaxt = "n", ylab = NA, xlab = "R2 Bias", main = "R2 Bias Corr")
polygon(d.r2.mod.bc$x, d.r2.mod.bc$y, col = rgb(1,0,0,0.25))
polygon(d.r2.mod.me.bc$x, d.r2.mod.me.bc$y, col = rgb(0,0,1,0.25))
abline(v = median(results.df$r2.mod.bc), lwd = 2, col = "red")
abline(v = median(results.df$r2.mod.me.bc), lwd = 2, col = "blue")
r2.med.mod.bc = paste0("Med Basic = ", round(median(results.df$r2.mod.bc),2))
r2.med.mod.me.bc = paste0("Med ME = ", round(median(results.df$r2.mod.me.bc),2))
legend("topleft", legend = c(r2.med.mod.bc, r2.med.mod.me.bc), text.col = c("red", "blue"), bty = "n", x.intersp = 0)
# plot 4 -- RF vs. MERF Bias Corr, RMSE
plot(xlim.rmse, ylim.rmse, type = "n", yaxt = "n", ylab = NA, xlab = "RMSE", main = "RMSE Bias Corr")
polygon(d.rmse.mod.bc$x, d.rmse.mod.bc$y, col = rgb(1,0,0,0.25))
polygon(d.rmse.mod.me.bc$x, d.rmse.mod.me.bc$y, col = rgb(0,0,1,0.25))
abline(v = median(results.df$rmse.mod.bc), lwd = 2, col = "red")
abline(v = median(results.df$rmse.mod.me.bc), lwd = 2, col = "blue")
rmse.med.mod.bc = paste0("Med Basic = ", round(median(results.df$rmse.mod.bc),2))
rmse.med.mod.me.bc = paste0("Med ME = ", round(median(results.df$rmse.mod.me.bc),2))
legend("topleft", legend = c(rmse.med.mod.bc, rmse.med.mod.me.bc), text.col = c("red", "blue"), bty = "n", x.intersp = 0)
# get axis limits
ymin = 100
ymax = -100
eco.ids = colnames(df.re)
for (ecoregion in eco.ids){
d = density(df.re[,ecoregion])
y = d$x
if (min(y) < ymin) ymin = min(y)
if (max(y) > ymax) ymax = max(y)
}
# set up plot
par(mar = c(8,5,1,1),
las = 1)
# create empty plot
plot(x = c(0.5, ncol(df.re) + 0.5),
y = c(ymin, ymax),
type = "n",
ylab = "Intercept",
xaxt = "n",
xlab = NA)
abline(h = 0)
# loop through sites and plot violins
eco.ids = colnames(df.re)
for (i in seq(1,length(eco.ids))){
abline(v = i, lty = 3, col = "lightgray")
ecoregion = eco.ids[i]
d = density(df.re[,ecoregion])
y = d$x
x = d$y + i
y = c(y, rev(y))
x = c(x, rev(d$y * -1 + i))
col = cols[i]
polygon(x, y, col = col)
points(i, median(df.re[,ecoregion]), pch = 16)
mtext(ecoregion, 1, 1, at = i, las = 2, cex = 0.55)
}
###### Clean up tree data
treedata = read.csv("tree_level_data.csv") #Load tree data which specifies tree species for all plots
trees = subset(treedata, select = c(plot_id, tree_sp))
trees = trees %>% rename(plot.id = plot_id) #rename column plot_id to plot.id
trees = trees[(trees$plot.id %in% metrics$plot.id),] #remove data plots not in metrics
treesfreq = data.frame(unclass(table(trees))) #turn tree frequency table into dataframe
#treesfreq #frequency of each tree species in each plot
treesfreq = cbind(plot.id = rownames(treesfreq), treesfreq) #move plot.id from the index column to the first column in the data frame
rownames(treesfreq) = 1:nrow(treesfreq)
#subset to only P and J for fun reasons
treespj = subset(treesfreq, select = c(plot.id, JUDE2, JUMO, JUOS, JUSC2, PIED, PIMO, PIMOF)) #include PIPO??
#All trees and all variables. Only plot.ids used in metrics are in this dataframe
trees_df = merge(trees, onemodel)
trees_df = subset(trees_df, select = c(plot.id, tree_sp, landfire, climate, ecoregion))
Plot tree species count for each strata
trees_df2 = trees_df %>% filter(tree_sp == 'JUDE2' | tree_sp == 'JUMO' | tree_sp == 'JUOS' | tree_sp == 'JUSC2' | tree_sp == 'PIED' | tree_sp == 'PIMO' | tree_sp == 'PIMOF')
#lf = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
trees_df2 %>%
group_by(landfire, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=landfire, y=Count, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5))
trees_df2 %>%
group_by(climate, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=climate, y=Count, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5))
#eco = c('Colorado Plateaus', 'Wasatch & Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range')
trees_df2 %>%
group_by(ecoregion, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=ecoregion, y=Count, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5))
lf_trees_ct1 = trees_df2 %>% count(landfire, tree_sp)
lf_trees_ct1 = as.data.frame(lf_trees_ct1)
cli_trees_ct = trees_df2 %>% count(climate, tree_sp)
cli_trees_ct = as.data.frame(cli_trees_ct)
eco_trees_ct = trees_df2 %>% count(ecoregion, tree_sp)
eco_trees_ct = as.data.frame(eco_trees_ct)
lf_trees_ct2 = lf_trees_ct1 %>% group_by(landfire) %>% top_n(2, n)
cli_trees_ct = cli_trees_ct %>% group_by(climate) %>% top_n(2, n)
eco_trees_ct = eco_trees_ct %>% group_by(ecoregion) %>% top_n(2, n)
lf_trees_ct3 = lf_trees_ct2 %>%
group_by(landfire, tree_sp) %>%
ggplot(aes(x=landfire, y=n, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5))
cli_trees_ct = cli_trees_ct %>%
group_by(climate, tree_sp) %>%
ggplot(aes(x=climate, y=n, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge")
eco_trees_ct = eco_trees_ct %>%
group_by(ecoregion, tree_sp) %>%
ggplot(aes(x=ecoregion, y=n, fill=tree_sp)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5))
grid.arrange(lf_trees_ct3,landfire_heatmap)
grid.arrange(cli_trees_ct,climate_heatmap)
grid.arrange(eco_trees_ct,ecoregion_heatmap)
Create function to turn data frame into matrix and sum columns
fun3 = function(df) {
x = df[,unlist(lapply(df, is.numeric))]
y = as.matrix(x)
return(colSums(y))
}
Tree species sums for each landfire cluster
mets.types.treespj = merge(treespj, pj_types, by="plot.id") #merge predictor treespj and pj_types on plot.id
colnames(mets.types.treespj)[colnames(mets.types.treespj) == "landfire"] = "group" #rename landfire to group
#subset mets.types.treespj to each group
#investigate the PJ species composition of each group
mets.types.treespj.cp = subset(mets.types.treespj, group == "Colorado Plateau Pinyon-Juniper Woodland")
mets.types.treespj.ma = subset(mets.types.treespj, group == "Madrean Pinyon-Juniper Woodland")
mets.types.treespj.sr = subset(mets.types.treespj, group == "Southern Rocky Mountain Pinyon-Juniper Woodland")
mets.types.treespj.gb = subset(mets.types.treespj, group == "Great Basin Pinyon-Juniper Woodland")
typ = list(mets.types.treespj.cp, mets.types.treespj.ma, mets.types.treespj.sr, mets.types.treespj.gb)
#rownames(pj_types_plot_sums)
pj_types_plot_sums_fun = lapply(typ, fun3)
pj_types_plot_sums = as.data.frame(do.call(cbind, pj_types_plot_sums_fun))
names(pj_types_plot_sums) = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
pj_types_plot_sums = t(pj_types_plot_sums)
kable(pj_types_plot_sums)
JUDE2 | JUMO | JUOS | JUSC2 | PIED | PIMO | PIMOF | |
---|---|---|---|---|---|---|---|
Colorado Plateau | 554 | 777 | 3916 | 101 | 6635 | 0 | 0 |
Madrean | 1011 | 34 | 115 | 7 | 380 | 0 | 116 |
Southern Rocky Mountain | 7 | 612 | 0 | 96 | 1284 | 0 | 0 |
Great Basin | 0 | 1 | 532 | 0 | 55 | 415 | 0 |
Tree species sums for each climate cluster
mets.types.clim.treespj = merge(treespj, climate, by="plot.id") #merge predictor treespj and pj_types on plot.id
colnames(mets.types.clim.treespj)[colnames(mets.types.clim.treespj) == "climate"] = "group" #rename climate to group
#subset data to their stratas
mets.types.clim.treespj.bsk = subset(mets.types.clim.treespj, group == "Bsk")
mets.types.clim.treespj.dfb = subset(mets.types.clim.treespj, group == "Dfb")
mets.types.clim.treespj.dsb = subset(mets.types.clim.treespj, group == "Dsb")
mets.types.clim.treespj.bwk = subset(mets.types.clim.treespj, group == "Bwk")
mets.types.clim.treespj.csa = subset(mets.types.clim.treespj, group == "Csa")
mets.types.clim.treespj.csb = subset(mets.types.clim.treespj, group == "Csb")
climatelist2 = list(mets.types.clim.treespj.bsk,mets.types.clim.treespj.dfb,mets.types.clim.treespj.dsb,mets.types.clim.treespj.bwk,mets.types.clim.treespj.csa,mets.types.clim.treespj.csb)
mets.types.clim.treespj.fun = lapply(climatelist2, fun3)
mets.types.clim.treespj.sums = as.data.frame(do.call(cbind, mets.types.clim.treespj.fun))
names(mets.types.clim.treespj.sums) = c('Bsk', 'Dfb','Dsb', 'Bwk', 'Csa', 'Csb')
mets.types.clim.treespj.sums = t(mets.types.clim.treespj.sums)
kable(mets.types.clim.treespj.sums)
JUDE2 | JUMO | JUOS | JUSC2 | PIED | PIMO | PIMOF | |
---|---|---|---|---|---|---|---|
Bsk | 271 | 1376 | 4448 | 187 | 8058 | 415 | 116 |
Dfb | 0 | 1 | 16 | 2 | 45 | 0 | 0 |
Dsb | 189 | 19 | 42 | 5 | 147 | 0 | 0 |
Bwk | 0 | 5 | 44 | 0 | 14 | 0 | 0 |
Csa | 836 | 23 | 13 | 7 | 88 | 0 | 0 |
Csb | 276 | 0 | 0 | 3 | 2 | 0 | 0 |
Tree species sums for each ecoregion cluster
mets.types.eco.treespj = merge(treespj, ecoregions, by="plot.id") #merge predictor treespj and pj_types on plot.id
colnames(mets.types.eco.treespj)[colnames(mets.types.eco.treespj) == "ecoregion"] = "group" #rename ecoregion to group
#subset data to their stratas
mets.types.eco.treespj.cp = subset(mets.types.eco.treespj, group == "Colorado Plateaus")
mets.types.eco.treespj.wum = subset(mets.types.eco.treespj, group == "Wasatch and Uinta Mountains")
mets.types.eco.treespj.anmm = subset(mets.types.eco.treespj, group == "Arizona/New Mexico Mountains")
mets.types.eco.treespj.cbr = subset(mets.types.eco.treespj, group == "Central Basin and Range")
mets.types.eco.treespj.sro = subset(mets.types.eco.treespj, group == "Southern Rockies")
mets.types.eco.treespj.anmp = subset(mets.types.eco.treespj, group == "Arizona/New Mexico Plateau")
mets.types.eco.treespj.nbr = subset(mets.types.eco.treespj, group == "Northern Basin and Range")
ecolist2 = list(mets.types.eco.treespj.cp,mets.types.eco.treespj.wum,mets.types.eco.treespj.anmm,mets.types.eco.treespj.cbr,mets.types.eco.treespj.sro,mets.types.eco.treespj.anmp,mets.types.eco.treespj.nbr)
mets.types.eco.plot.sums.fun = lapply(ecolist2, fun3)
mets.types.eco.plot.sums = as.data.frame(do.call(cbind, mets.types.eco.plot.sums.fun))
names(mets.types.eco.plot.sums) = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range')
mets.types.eco.plot.sums = t(mets.types.eco.plot.sums)
kable(mets.types.eco.plot.sums)
JUDE2 | JUMO | JUOS | JUSC2 | PIED | PIMO | PIMOF | |
---|---|---|---|---|---|---|---|
Colorado Plateaus | 0 | 0 | 3326 | 12 | 3587 | 0 | 0 |
Wasatch and Uinta Mountains | 0 | 0 | 191 | 64 | 582 | 0 | 0 |
Arizona/New Mexico Mountains | 1572 | 873 | 514 | 49 | 3147 | 0 | 116 |
Central Basin and Range | 0 | 1 | 410 | 0 | 0 | 415 | 0 |
Southern Rockies | 0 | 230 | 0 | 79 | 707 | 0 | 0 |
Arizona/New Mexico Plateau | 0 | 320 | 0 | 0 | 276 | 0 | 0 |
Northern Basin and Range | 0 | 0 | 122 | 0 | 55 | 0 | 0 |
Notes:
Based on tree species sums for each strata, strata with a higher species variability seem to be more transferable than strata with a low species variability. In other words, the Colorado Plateau PJ in the Landfire data set encompasses 5 different PJ species and its models are highly transferable to different strata, while Southern Rocky Mountain PJ encompasses 4 different PJ species and its models are less transferable to different strata. Other stronger examples of this in the ecoregions and climate datasets.
Do similar species compositions impact model transferability?
Plots of mean height (mh) and mean height aboveground (mh.ag) for different strata
metricsheight = metrics[c("plot.id","mh")] #put into dataframe the metrics to be used in RF as chosen by the VSURF
heights = merge(onemodel1, metricsheight, by = "plot.id")
heights = subset(heights, select = c(plot.id, landfire, climate, ecoregion, bio, mh, mh.ag))
Mean heights and mean heights aboveground (without shrubs) for each strata in landfire data
#function to get mean heights and mean heights aboveground (without shrubs) for each strata in landfire data
fun5 = function(df){
lf.mean.height = mean(df[,"mh"])
lf.mean.height.ag = mean(df[,"mh.ag"])
return(cbind(lf.mean.height, lf.mean.height.ag))}
#landfire
#subset data to their stratas
cpsubset = subset(heights, landfire == "Colorado Plateau Pinyon-Juniper Woodland")
masubset = subset(heights, landfire == "Madrean Pinyon-Juniper Woodland")
srsubset = subset(heights, landfire == "Southern Rocky Mountain Pinyon-Juniper Woodland")
gbsubset = subset(heights, landfire == "Great Basin Pinyon-Juniper Woodland")
typeslist = list(cpsubset,masubset,srsubset,gbsubset)
mean.heights.lf = lapply(typeslist, fun5)
mean.heights.lf.df = as.data.frame(do.call(rbind, mean.heights.lf))
rownames(mean.heights.lf.df) = c('Colorado Plateau', 'Madrean', 'Southern Rocky Mountain','Great Basin')
mean.heights.lf.df = tibble::rownames_to_column(mean.heights.lf.df, "VALUE")
mean.heights.lf.df = mean.heights.lf.df %>% rename(lf.strata = "VALUE")
Plots
p1 = ggplot(data=mean.heights.lf.df, aes(x=lf.strata, y=lf.mean.height)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
p2 = ggplot(data=mean.heights.lf.df, aes(x=lf.strata, y=lf.mean.height.ag)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
Get mean heights and mean heights aboveground (without shrubs) for each strata in climate data
#function to get mean heights and mean heights aboveground (without shrubs) for each strata in landfire data
fun5 = function(df){
cli.mean.height = mean(df[,"mh"])
cli.mean.height.ag = mean(df[,"mh.ag"])
return(cbind(cli.mean.height, cli.mean.height.ag))}
#climate
#subset data to their stratas
bsksubset = subset(heights, climate == "Bsk")
dfbsubset = subset(heights, climate == "Dfb")
dsbsubset = subset(heights, climate == "Dsb")
bwksubset = subset(heights, climate == "Bwk")
csasubset = subset(heights, climate == "Csa")
csbsubset = subset(heights, climate == "Csb")
climatelist = list(bsksubset,dfbsubset,dsbsubset,bwksubset,csasubset,csbsubset)
mean.heights.cli = lapply(climatelist, fun5)
mean.heights.cli.df = as.data.frame(do.call(rbind, mean.heights.cli))
rownames(mean.heights.cli.df) = c('bsk', 'dfb', 'dsb','bwk', 'csa', 'csb')
mean.heights.cli.df = tibble::rownames_to_column(mean.heights.cli.df, "VALUE")
mean.heights.cli.df = mean.heights.cli.df %>% rename(climate.strata = "VALUE")
Plots
p3 = ggplot(data=mean.heights.cli.df, aes(x=climate.strata, y=cli.mean.height)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
p4 = ggplot(data=mean.heights.cli.df, aes(x=climate.strata, y=cli.mean.height.ag)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
Get mean heights and mean heights aboveground (without shrubs) for each strata in ecoregion data
#function to get mean heights and mean heights aboveground (without shrubs) for each strata in landfire data
fun5 = function(df){
eco.mean.height = mean(df[,"mh"])
eco.mean.height.ag = mean(df[,"mh.ag"])
return(cbind(eco.mean.height, eco.mean.height.ag))}
#ecoregions
#subset data to their stratas
cplsubset = subset(heights, ecoregion == "Colorado Plateaus")
wumsubset = subset(heights, ecoregion == "Wasatch and Uinta Mountains")
anmmsubset = subset(heights, ecoregion == "Arizona/New Mexico Mountains")
cbrsubset = subset(heights, ecoregion == "Central Basin and Range")
srosubset = subset(heights, ecoregion == "Southern Rockies")
anmpsubset = subset(heights, ecoregion == "Arizona/New Mexico Plateau")
nbrsubset = subset(heights, ecoregion == "Northern Basin and Range")
ecolist = list(cplsubset,wumsubset,anmmsubset,cbrsubset,srosubset,anmpsubset,nbrsubset)
mean.heights.eco = lapply(ecolist, fun5)
mean.heights.eco.df = as.data.frame(do.call(rbind, mean.heights.eco))
rownames(mean.heights.eco.df) = c('Colorado Plateaus', 'Wasatch and Uinta Mountains', 'Arizona/New Mexico Mountains','Central Basin and Range', 'Southern Rockies', 'Arizona/New Mexico Plateau', 'Northern Basin and Range')
mean.heights.eco.df = tibble::rownames_to_column(mean.heights.eco.df, "VALUE")
mean.heights.eco.df = mean.heights.eco.df %>% rename(eco.strata = "VALUE")
Plots
p5= ggplot(data=mean.heights.eco.df, aes(x=eco.strata, y=eco.mean.height)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
#+ scale_x_discrete(labels = label_wrap(10))
p6 = ggplot(data=mean.heights.eco.df, aes(x=eco.strata, y=eco.mean.height.ag)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(10)) + scale_y_continuous(limits = c(0, 4))
Mean height and mean height aboveground plots
#grid.arrange(p5,p1,p2,p3,p4,p6, ncol = 3, nrow=2)
grid.arrange(p1,p2, ncol = 1, nrow=2)
grid.arrange(p3,p4, ncol = 1, nrow=2)
grid.arrange(p5,p6, ncol = 1, nrow=2)
Subtract mean height aboveground from mean height to get shrub cover. Does the presence of a lot of low cover vegetation (shrub) impact model transferability? Possibly too much error introduced in this analysis due to the taking of averages for mean height and mean height aboveground.
mean.heights.eco.df$diff=(mean.heights.eco.df$eco.mean.height.ag-mean.heights.eco.df$eco.mean.height)
p7 = ggplot(data=mean.heights.eco.df, aes(x=eco.strata, y=diff)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(17)) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
mean.heights.cli.df$diff=(mean.heights.cli.df$cli.mean.height.ag-mean.heights.cli.df$cli.mean.height)
p8 = ggplot(data=mean.heights.cli.df, aes(x=climate.strata, y=diff)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(5)) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
mean.heights.lf.df$diff=(mean.heights.lf.df$lf.mean.height.ag-mean.heights.lf.df$lf.mean.height)
p9 = ggplot(data=mean.heights.lf.df, aes(x=lf.strata, y=diff)) +
geom_bar(stat="identity") + scale_x_discrete(labels = label_wrap(5)) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
#If the diff is smaller, it indicates less ground vegetation. Higher diff indicates more ground vegetation.
#Could cross reference this with shrub counts per strata.
px = p9 + p8 + p7 & scale_y_continuous(limits = c(0, 2.1))
plot(px)
How many shrubs: look at shrub numbers for each strata using tree data. Could investigate average proportion of shrubs to trees?
#Double check that trees are removed from subset
#Subset trees dataframe to only shrubs
trees_df3 = trees_df %>% filter(!(tree_sp == 'JUDE2' | tree_sp == 'JUMO' | tree_sp == 'JUOS' | tree_sp == 'JUSC2' | tree_sp == 'PIED' | tree_sp == 'PIMO' | tree_sp == 'PIMOF'| tree_sp == 'PIPO' | tree_sp == 'ABCO'))
#unique(trees_df3$tree_sp) #make sure only shrubs
p10 = trees_df3 %>%
group_by(landfire, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=landfire, y=Count)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(17)) +
xlab("landfire strata") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
p11 = trees_df3 %>%
group_by(climate, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=climate, y=Count)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(5)) +
xlab("climate strata") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
p12 = trees_df3 %>%
group_by(ecoregion, tree_sp) %>%
summarize(Count = n()) %>%
ggplot(aes(x=ecoregion, y=Count)) +
geom_bar(stat='identity', position= "dodge") +
scale_x_discrete(labels = label_wrap(17)) +
xlab("ecoregion strata") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
py = p10 + p11 + p12 & scale_y_continuous(limits = c(0, 400))
#Look into proportion of trees to shrubs?
plot(py)
ggplot(data = onemodel, aes(x = factor(landfire), y = bio)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Violin Plot") +
xlab("Landfire clusters") +
ylab("AGB") +
scale_x_discrete(labels = label_wrap(10))
#theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
ggplot(data = onemodel, aes(x = factor(climate), y = bio)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Violin Plot") +
xlab("Cli clusters") +
ylab("AGB") +
scale_x_discrete(labels = label_wrap(10))
#theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
ggplot(data = onemodel, aes(x = factor(ecoregion), y = bio)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Violin Plot") +
xlab("Eco clusters") +
ylab("AGB") +
scale_x_discrete(labels = label_wrap(10))
#will need to redo getData if metrics dataset changes (outliers are removed or added)
#df.onemodel = onemodel[c("plot.x","plot.y","plot.id", "landfire", "climate", "ecoregion")]
#ll_proj = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
#elev.onemodel = get_elev_point(df.onemodel, prj = ll_proj)
#elev.onemodel = as.data.frame(elev.onemodel)
elev.onemodel = read.csv("elev_onemodel.csv")
elev.onemodel = as.data.frame(elev.onemodel)
Elevation Distribution Violin Plots
ggplot(data = elev.onemodel, aes(x = landfire, y = elevation)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Landfire Violin Plot") +
xlab("Landfire clusters") +
ylab("Elevation") +
scale_x_discrete(labels = label_wrap(10))
#theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
ggplot(data = elev.onemodel, aes(x = climate, y = elevation)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Climate Violin Plot") +
xlab("Climate clusters") +
ylab("Elevation") +
scale_x_discrete(labels = label_wrap(10))
#theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
ggplot(data = elev.onemodel, aes(x = ecoregion, y = elevation)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
ggtitle("Ecoregion Violin Plot") +
xlab("Ecoregion clusters") +
ylab("Elevation") +
scale_x_discrete(labels = label_wrap(10))
Elevation Distribution Box Plots
pl.1 = ggplot(elev.onemodel, aes(landfire, elevation)) + geom_boxplot() +
scale_x_discrete(labels = label_wrap(10))
pl.2 = ggplot(elev.onemodel, aes(climate, elevation)) + geom_boxplot() +
scale_x_discrete(labels = label_wrap(10))
pl.3 = ggplot(elev.onemodel, aes(ecoregion, elevation)) + geom_boxplot() +
scale_x_discrete(labels = label_wrap(10))
pl.1
pl.2
pl.3