library (terra)
## terra version 1.3.22
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks terra::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x dplyr::src()     masks terra::src()
library(landscapemetrics)

Objectives

The goals of this assignment are to use land cover datasets in the Richmond, VA region to assess:

  1. How land cover metrics change with increasing development over time using NLCD for 2001 and 2019 for the greater Richmond area (Cities and counties around RVA including Henrico, Hanover, Chesterfield, New Kent, Prince George, Charles City, Powhatan, and Goochland Counties)
    • for this objective we will focus on class level metrics for forest and impervious surfaces and assess changes in the amount and configuration of these cover types over time.
    • We will use paired t-tests to compare each metric in the two time periods for forest and impervious classes
    • We will use linear regression to see if distance to city explains additional variation in each metric
#pull in Landcover datasets, check crs, resolution, unique values
rva2001=rast("GrRVA_NLCD_2001.tif")
rva2019=rast("GrRVA_NLCD_2019.tif")
#Plot each raster
plot(rva2001)

plot(rva2019)

#reclassify both rasters to be impervious = 21, forest = 41, everything else = 0

#greater RVA NLCD data (2019)
reclass=c(rep(0,1), rep(21,1), rep(0,4), rep(41,1),rep(0,8))
reclass
##  [1]  0 21  0  0  0  0 41  0  0  0  0  0  0  0  0
reclass.mat=cbind(unique(rva2019)[[1]], reclass)
reclass.mat
##          reclass
##  [1,] 11       0
##  [2,] 21      21
##  [3,] 22       0
##  [4,] 23       0
##  [5,] 24       0
##  [6,] 31       0
##  [7,] 41      41
##  [8,] 42       0
##  [9,] 43       0
## [10,] 52       0
## [11,] 71       0
## [12,] 81       0
## [13,] 82       0
## [14,] 90       0
## [15,] 95       0
classtype19=classify(rva2019, reclass.mat)
#greater RVA NLCD data (2001)
reclass1=c(rep(0,1), rep(21,1), rep(0,4), rep(41,1),rep(0,8))
reclass1
##  [1]  0 21  0  0  0  0 41  0  0  0  0  0  0  0  0
reclass.mat1=cbind(unique(rva2001)[[1]], reclass1)
reclass.mat1
##          reclass1
##  [1,] 11        0
##  [2,] 21       21
##  [3,] 22        0
##  [4,] 23        0
##  [5,] 24        0
##  [6,] 31        0
##  [7,] 41       41
##  [8,] 42        0
##  [9,] 43        0
## [10,] 52        0
## [11,] 71        0
## [12,] 81        0
## [13,] 82        0
## [14,] 90        0
## [15,] 95        0
classtype01=classify(rva2001,reclass.mat1)
#NLCD classes:
#11 (open water)
#21 (Developed open space - <20% imperv)
#22 (low intensity development, 20-49% imperv)
#23 (high intensity development, 50-79% imperv)
#24 (very high intensity development, >80%imperv)
#31 (barren)
#41 (deciduous forest)
#42 (evergreen forest)
#43 (mixed forest)
#52 (shrub)
#71 (grassland) 
#81 (pasture/hay)
#82 (cultivated crop)
#90 (woody wetland)
#95 (emergent wetland)

Compare the two rasters and visualize where change occurs - do you see differences?

##All code provided for you in this section, but please go through it to see if you understand each step:

#important notes: In the above classification, I made forest = 4 and impervious = 2...these numbers are important in the code for this section.  If you used different numbers to classify your rasters, you will need to change the numbers below.

#the following code stacks the two NLCD rasters after making sure they overlap exactly
ex<-ext(classtype19)
classtype01<-crop(classtype01, ex)
?ext
stack<-c(classtype01, classtype19)

#then quantifies the pixels that change using the crosstab function
rec_xtab_df <- crosstab(stack,long=T)
rec_xtab_df
##   NLCD.Land.Cover.Class NLCD.Land.Cover.Class.1     Freq
## 1                     0                       0 11505874
## 2                     0                      21    40880
## 3                     0                      41    73944
## 4                    21                       0    47112
## 5                    21                      21   877226
## 6                    41                       0   664924
## 7                    41                      21    27631
## 8                    41                      41  1918025
names(rec_xtab_df)<-c("rva2001", "rva2019", "Freq")
rec_xtab_df
##   rva2001 rva2019     Freq
## 1       0       0 11505874
## 2       0      21    40880
## 3       0      41    73944
## 4      21       0    47112
## 5      21      21   877226
## 6      41       0   664924
## 7      41      21    27631
## 8      41      41  1918025
#interpret this cross tabulation

#and plots the areas that change TO impervious
r_dev21 <- classtype19==21 # developed on date 2
r_not_dev21 <- classtype01!=21 #remove areas that were already developed in date1, we do not persist

r_change_dev <- r_dev21 * r_not_dev21 #mask
plot(r_change_dev,main="Land transitions to developed over 2001-2019")

#and plots the areas that LOSE forest
r_for41 <- classtype01==41 # forested on date 1
r_still_for41 <- classtype19 !=41 #remove areas that where not forested on date2

r_changeF <- r_for41 * r_still_for41 #mask
plot(r_changeF,main="Land where forest lost over 2001-2019")

#and plots the areas that GAIN forest
r_cat41b <- classtype01!=41 # not forested on date 1
r_still_cat41b <- classtype19 ==41 #remove areas that where not forested on date2

r_changeFb <- r_cat41b * r_still_cat41b #mask
plot(r_changeFb,main="Land where forest is gained over 2001-2019")

Create sample points & calculate distance to city center

# we need to create sample points where we will sample the landscape metrics
# we want these points to be within the greater Richmond area, so pull in greater_RVA.shp as an sf and make sure it has the same projection as the NLCD data.

g_RVA<-st_read("greater_RVA.shp")
## Reading layer `greater_RVA' from data source 
##   `/Users/joshuasementelli/Downloads/Urban Landscape Metrics part 2/greater_RVA.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 12 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -78.15928 ymin: 36.99298 xmax: -76.74198 ymax: 38.00828
## Geodetic CRS:  NAD83
proj<-crs(rva2001)
g_RVA<-st_transform(g_RVA, proj)

#now create a bounding box that is the extent of the greater RVA area
bb<-st_bbox(g_RVA)
box = st_as_sfc(bb)

#create a regular sample of 200 points within the greater RVA extent (box) using st_sample and plot them on top of one of your forest-imperv rasters
?st_sample
regularsamp=st_sample(box, size = 220, exact = TRUE, type = "regular")[1:200]
plot(classtype19)
#plot the new sample points on top
plot(regularsamp, add = T)

#so that we can assess how distance to city center influences the metrics and the change in metrics over time, we need to calculate a distance to the center of the city for each point.I provide the code for this where I create a simple feature with two points at the city center .....b/c I cannot figure out how to make an sf with only one point
pt1 = st_point(c(1615022,1768974))
pt2 = st_point(c(1615021,1768973))
?st_point
d = data.frame(a = 1:2)
d$geom = st_sfc(pt1, pt2)
center = st_as_sf(d)
?st_sfc
?st_as_sf
#plot city center to see where it is
plot(classtype01)
plot(center, pch=20, add=T, color = "black")

center<-st_set_crs(center, proj)
?st_set_crs
#create a distance vector for each point and then create a vector for plot_id and merge them into a dataframe
?st_distance
dist<-as.vector(st_distance(regularsamp, center[1,], by_element = T))
plot_id<-seq(1:200)#make sure this is the same length as the distance vector!!
dist<-data.frame(dist, plot_id)

Calculate Class Metrics

#create coordinate matrix as input for sample_lsm
coords_gRVA <- as.matrix(st_coordinates(regularsamp))

#calculate the following CLASS-LEVEL metrics for each year (2001 and 2019) within a 1km radius circle:
#percent of the landcape (pland), largest patch index, edge density (ed), clumpiness index (clumpy), number of patches (np), mean fractal dimension (frac_mn)

cl_metrics_01<-sample_lsm(landscape = classtype01, y = coords_gRVA, shape = "circle",
                      size = 1000, what = c("lsm_c_pland", "lsm_c_ed", "lsm_c_clumpy", 
                                           "lsm_c_np", "lsm_c_lpi", "lsm_c_frac_mn"))
cl_metrics_19<-sample_lsm(landscape = classtype19, y = coords_gRVA, shape = "square",
                      size = 1000, what = c("lsm_c_pland", "lsm_c_ed", "lsm_c_clumpy", 
                                           "lsm_c_np", "lsm_c_lpi", "lsm_c_frac_mn"))
## Warning: The 'perecentage_inside' is below 90% for at least one buffer.
#add distance column to these based on plot_id
cl_metrics01<- merge(cl_metrics_01, dist, by="plot_id")
cl_metrics19<- merge(cl_metrics_19, dist, by="plot_id")

Look at the data….

What sort of clean-up needs to be done before you can run a regression model?

#Refer to objective 1 to do the following:
#add year column to each dataframe

cl_metrics01$year<-"2001"
cl_metrics19$year<-"2019"
#spread out data into wide form for both years
cl_metrics01_long <- cl_metrics01 %>% 
  group_by(year) %>% 
  spread(metric, value)
cl_metrics19_long <- cl_metrics19 %>% 
  group_by(year) %>% 
  spread(metric, value)


#rbind these two df together:
cl_metrics_01_19=rbind(cl_metrics01_long, cl_metrics19_long)



#create a distance variable that is in km
cl_metrics_01_19<- cl_metrics_01_19 %>% mutate(distance = dist*0.001)

#save
##write.csv(cl_metrics_01_19, "cl_metrics_01_19.csv")

#filter each class (forest / impervious) into two separate dataframes
cl_metrics0119_for <- subset(cl_metrics_01_19, class == 41, select = c("plot_id","percentage_inside", "dist","year","clumpy", "ed", "frac_mn", "lpi", "np", "pland", "distance"))
cl_metrics0119_imperv <- subset(cl_metrics_01_19, class == 21, select = c("plot_id","percentage_inside", "dist","year","clumpy", "ed", "frac_mn", "lpi", "np", "pland", "distance"))

Run stats!

Paired t-test.

#first need to make sure that there are paired samples for each year
cl_metrics0119_for$year<-as.factor(cl_metrics0119_for$year)
summary(cl_metrics0119_for$year)
## 2001 2019 
##  199  199
table(cl_metrics0119_for$plot_id, cl_metrics0119_for$year)
##      
##       2001 2019
##   1      1    1
##   2      1    1
##   3      1    1
##   4      1    1
##   5      1    1
##   6      1    1
##   7      1    1
##   8      1    1
##   9      1    1
##   10     1    1
##   11     1    1
##   12     1    1
##   13     1    1
##   14     1    1
##   15     1    1
##   16     1    1
##   17     1    1
##   18     1    1
##   19     1    1
##   20     1    1
##   21     1    1
##   22     1    1
##   23     1    1
##   24     1    1
##   25     1    1
##   26     1    1
##   27     1    1
##   28     1    1
##   29     1    1
##   30     1    1
##   31     1    1
##   32     1    1
##   33     1    1
##   34     1    1
##   35     1    1
##   36     1    1
##   37     1    1
##   38     1    1
##   39     1    1
##   40     1    1
##   41     1    1
##   42     1    1
##   43     1    1
##   44     1    1
##   45     1    1
##   46     1    1
##   47     1    1
##   48     1    1
##   49     1    1
##   50     1    1
##   51     1    1
##   52     1    1
##   53     1    1
##   54     1    1
##   55     1    1
##   56     1    1
##   57     1    1
##   58     1    1
##   59     1    1
##   60     1    1
##   61     1    1
##   62     1    1
##   63     1    1
##   64     1    1
##   65     1    1
##   66     1    1
##   67     1    1
##   68     1    1
##   69     1    1
##   70     1    1
##   71     1    1
##   72     1    1
##   73     1    1
##   74     1    1
##   75     1    1
##   76     1    1
##   77     1    1
##   78     1    1
##   79     1    1
##   81     1    1
##   82     1    1
##   83     1    1
##   84     1    1
##   85     1    1
##   86     1    1
##   87     1    1
##   88     1    1
##   89     1    1
##   90     1    1
##   91     1    1
##   92     1    1
##   93     1    1
##   94     1    1
##   95     1    1
##   96     1    1
##   97     1    1
##   98     1    1
##   99     1    1
##   100    1    1
##   101    1    1
##   102    1    1
##   103    1    1
##   104    1    1
##   105    1    1
##   106    1    1
##   107    1    1
##   108    1    1
##   109    1    1
##   110    1    1
##   111    1    1
##   112    1    1
##   113    1    1
##   114    1    1
##   115    1    1
##   116    1    1
##   117    1    1
##   118    1    1
##   119    1    1
##   120    1    1
##   121    1    1
##   122    1    1
##   123    1    1
##   124    1    1
##   125    1    1
##   126    1    1
##   127    1    1
##   128    1    1
##   129    1    1
##   130    1    1
##   131    1    1
##   132    1    1
##   133    1    1
##   134    1    1
##   135    1    1
##   136    1    1
##   137    1    1
##   138    1    1
##   139    1    1
##   140    1    1
##   141    1    1
##   142    1    1
##   143    1    1
##   144    1    1
##   145    1    1
##   146    1    1
##   147    1    1
##   148    1    1
##   149    1    1
##   150    1    1
##   151    1    1
##   152    1    1
##   153    1    1
##   154    1    1
##   155    1    1
##   156    1    1
##   157    1    1
##   158    1    1
##   159    1    1
##   160    1    1
##   161    1    1
##   162    1    1
##   163    1    1
##   164    1    1
##   165    1    1
##   166    1    1
##   167    1    1
##   168    1    1
##   169    1    1
##   170    1    1
##   171    1    1
##   172    1    1
##   173    1    1
##   174    1    1
##   175    1    1
##   176    1    1
##   177    1    1
##   178    1    1
##   179    1    1
##   180    1    1
##   181    1    1
##   182    1    1
##   183    1    1
##   184    1    1
##   185    1    1
##   186    1    1
##   187    1    1
##   188    1    1
##   189    1    1
##   190    1    1
##   191    1    1
##   192    1    1
##   193    1    1
##   194    1    1
##   195    1    1
##   196    1    1
##   197    1    1
##   198    1    1
##   199    1    1
##   200    1    1
cl_metrics0119_imperv$year<-as.factor(cl_metrics0119_imperv$year)
summary(cl_metrics0119_imperv$year)
## 2001 2019 
##  194  198
table(cl_metrics0119_imperv$plot_id, cl_metrics0119_imperv$year)
##      
##       2001 2019
##   1      1    1
##   2      1    1
##   3      1    1
##   4      1    1
##   5      1    1
##   6      1    1
##   7      1    1
##   8      1    1
##   9      1    1
##   10     1    1
##   11     1    1
##   12     1    1
##   13     1    1
##   14     1    1
##   15     1    1
##   16     1    1
##   17     1    1
##   18     1    1
##   19     1    1
##   20     1    1
##   21     1    1
##   22     1    1
##   23     1    1
##   24     1    1
##   25     1    1
##   26     1    1
##   27     1    1
##   28     1    1
##   29     1    1
##   30     1    1
##   31     1    1
##   32     0    1
##   33     1    1
##   34     1    1
##   35     1    1
##   36     1    1
##   37     1    1
##   38     1    1
##   39     1    1
##   40     1    1
##   41     1    1
##   42     1    1
##   43     1    1
##   44     1    1
##   45     1    1
##   46     1    1
##   47     1    1
##   48     1    1
##   49     1    1
##   50     0    1
##   51     1    1
##   52     1    1
##   53     1    1
##   54     1    1
##   55     1    1
##   56     1    1
##   57     1    1
##   58     0    1
##   59     1    1
##   60     1    1
##   61     1    1
##   62     1    1
##   63     1    1
##   64     1    1
##   65     1    1
##   66     1    1
##   67     1    1
##   68     1    1
##   69     1    1
##   70     1    1
##   71     1    1
##   72     1    1
##   73     1    1
##   74     1    1
##   75     1    1
##   76     1    1
##   77     1    1
##   78     1    1
##   79     1    1
##   81     1    1
##   82     1    1
##   83     1    1
##   84     1    1
##   85     1    1
##   86     1    1
##   87     1    1
##   88     1    1
##   89     1    1
##   90     1    1
##   91     1    1
##   92     1    1
##   93     1    1
##   94     1    1
##   95     1    1
##   96     0    1
##   97     1    1
##   98     1    1
##   99     1    1
##   101    1    1
##   102    1    1
##   103    1    1
##   104    1    1
##   105    1    1
##   106    1    1
##   107    1    1
##   108    1    1
##   109    1    1
##   110    1    1
##   111    1    1
##   112    1    1
##   113    1    1
##   114    1    1
##   115    1    1
##   116    1    1
##   117    1    1
##   118    1    1
##   119    1    1
##   120    1    1
##   121    1    1
##   122    1    1
##   123    1    1
##   124    1    1
##   125    1    1
##   126    1    1
##   127    1    1
##   128    1    1
##   129    1    1
##   130    1    1
##   131    1    1
##   132    1    1
##   133    1    1
##   134    1    1
##   135    1    1
##   136    1    1
##   137    1    1
##   138    1    1
##   139    1    1
##   140    1    1
##   141    1    1
##   142    1    1
##   143    1    1
##   144    1    1
##   145    1    1
##   146    1    1
##   147    1    1
##   148    1    1
##   149    1    1
##   150    1    1
##   151    1    1
##   152    1    1
##   153    1    1
##   154    1    1
##   155    1    1
##   156    1    1
##   157    1    1
##   158    1    1
##   159    1    1
##   160    1    1
##   161    1    1
##   162    1    1
##   163    1    1
##   164    1    1
##   165    1    1
##   166    1    1
##   167    1    1
##   168    1    1
##   169    1    1
##   170    1    1
##   171    1    1
##   172    1    1
##   173    1    1
##   174    1    1
##   175    1    1
##   176    1    1
##   177    1    1
##   178    1    1
##   179    1    1
##   180    1    1
##   181    1    1
##   182    1    1
##   183    1    1
##   184    1    1
##   185    1    1
##   186    1    1
##   187    1    1
##   188    1    1
##   189    1    1
##   190    1    1
##   191    1    1
##   192    1    1
##   193    1    1
##   194    1    1
##   195    1    1
##   196    1    1
##   197    1    1
##   198    1    1
##   199    1    1
##   200    1    1
#see from above table if any plot_id values do not have data for both years, and delete these using filter function:
cl_metrics0119_for<-filter(cl_metrics0119_for, plot_id != 80 & plot_id != 91)
cl_metrics0119_imperv<-filter(cl_metrics0119_for, plot_id != 9 & plot_id != 50 & plot_id !=95 )

#run paired t-test and plot any significant results based on alpha < 0.05:
fit<-t.test(pland~year, cl_metrics0119_for, paired=T)
fit##significant p-value
## 
##  Paired t-test
## 
## data:  pland by year
## t = 11.147, df = 197, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  3.777226 5.401055
## sample estimates:
## mean of the differences 
##                4.589141
fit1<-t.test(lpi~year, cl_metrics0119_for, paired=T)
fit1##significant p-value
## 
##  Paired t-test
## 
## data:  lpi by year
## t = 9.0274, df = 197, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.296058 3.579637
## sample estimates:
## mean of the differences 
##                2.937847
fit2<-t.test(ed~year, cl_metrics0119_for, paired=T)
fit2 
## 
##  Paired t-test
## 
## data:  ed by year
## t = 10.998, df = 197, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   7.91028 11.36711
## sample estimates:
## mean of the differences 
##                9.638695
fit3<-t.test(clumpy~year, cl_metrics0119_for, paired=T)
fit3
## 
##  Paired t-test
## 
## data:  clumpy by year
## t = 2.6193, df = 196, p-value = 0.0095
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.008113852 0.057565334
## sample estimates:
## mean of the differences 
##              0.03283959
fit4<-t.test(np~year, cl_metrics0119_for, paired=T)
fit4
## 
##  Paired t-test
## 
## data:  np by year
## t = -10.236, df = 197, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.595912 -3.787926
## sample estimates:
## mean of the differences 
##               -4.691919
fit5<-t.test(frac_mn~year, cl_metrics0119_for, paired=T)
fit5
## 
##  Paired t-test
## 
## data:  frac_mn by year
## t = 2.9048, df = 197, p-value = 0.004095
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001158153 0.006055749
## sample estimates:
## mean of the differences 
##             0.003606951
fitimperv<-t.test(pland~year, cl_metrics0119_imperv, paired=T)
fitimperv
## 
##  Paired t-test
## 
## data:  pland by year
## t = 10.94, df = 194, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  3.736706 5.380342
## sample estimates:
## mean of the differences 
##                4.558524
fit1imperv<-t.test(lpi~year, cl_metrics0119_imperv, paired=T)
fit1imperv
## 
##  Paired t-test
## 
## data:  lpi by year
## t = 8.8259, df = 194, p-value = 6.354e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.256555 3.555283
## sample estimates:
## mean of the differences 
##                2.905919
fit2imperv<-t.test(ed~year, cl_metrics0119_imperv, paired=T)
fit2imperv
## 
##  Paired t-test
## 
## data:  ed by year
## t = 10.733, df = 194, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   7.774721 11.275299
## sample estimates:
## mean of the differences 
##                 9.52501
fit3imperv<-t.test(clumpy~year, cl_metrics0119_imperv, paired=T)
fit3imperv
## 
##  Paired t-test
## 
## data:  clumpy by year
## t = 2.538, df = 193, p-value = 0.01194
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.007196044 0.057379798
## sample estimates:
## mean of the differences 
##              0.03228792
fit4imperv<-t.test(np~year, cl_metrics0119_imperv, paired=T)
fit4imperv
## 
##  Paired t-test
## 
## data:  np by year
## t = -10.147, df = 194, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.610459 -3.784413
## sample estimates:
## mean of the differences 
##               -4.697436
fit5imperv<-t.test(frac_mn~year, cl_metrics0119_imperv, paired=T)
fit5imperv
## 
##  Paired t-test
## 
## data:  frac_mn by year
## t = 2.8029, df = 194, p-value = 0.005579
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.001030446 0.005924130
## sample estimates:
## mean of the differences 
##             0.003477288
#For significant differences, plot and compare means:
ggplot(cl_metrics0119_for, aes(year, pland))+geom_boxplot()

ggplot(cl_metrics0119_for, aes(year, lpi))+geom_boxplot()

ggplot(cl_metrics0119_for, aes(year, ed))+geom_boxplot()

ggplot(cl_metrics0119_for, aes(year, clumpy))+geom_boxplot()
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

ggplot(cl_metrics0119_for, aes(year, np))+geom_boxplot()

ggplot(cl_metrics0119_for, aes(year, frac_mn))+geom_boxplot()

ggplot(cl_metrics0119_imperv, aes(year, pland))+geom_boxplot()

ggplot(cl_metrics0119_imperv, aes(year, lpi))+geom_boxplot()

ggplot(cl_metrics0119_imperv, aes(year, ed))+geom_boxplot()

ggplot(cl_metrics0119_imperv, aes(year, clumpy))+geom_boxplot()
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

ggplot(cl_metrics0119_imperv, aes(year, np))+geom_boxplot()

ggplot(cl_metrics0119_imperv, aes(year, frac_mn))+geom_boxplot()

cl_metrics0119_for %>% group_by(year) %>% summarize(mean=mean(pland), sd=sd(pland))
## # A tibble: 2 × 3
##   year   mean    sd
##   <fct> <dbl> <dbl>
## 1 2001   17.3  13.5
## 2 2019   12.7  11.0
#consider biological versus statistical significance.

#repeat above for each metric for forests and for impervious

Run stats!

For forest class only, use linear models with year and distance as predictors of variation in metrics

lmfit<-lm(pland~year+distance, cl_metrics0119_for)
summary(lmfit) ##year, not distance display significant p-value
## 
## Call:
## lm(formula = pland ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.905 -10.303  -1.931   6.954  56.382 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.18559    1.78193  10.206  < 2e-16 ***
## year2019    -4.58914    1.23942  -3.703 0.000244 ***
## distance    -0.01998    0.03470  -0.576 0.564973    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.33 on 393 degrees of freedom
## Multiple R-squared:  0.0345, Adjusted R-squared:  0.02958 
## F-statistic: 7.021 on 2 and 393 DF,  p-value: 0.00101
ggplot(cl_metrics0119_for, aes(distance, pland, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Percent forest")
## `geom_smooth()` using formula 'y ~ x'

lmfit1<-lm(lpi~year+distance, cl_metrics0119_for)
summary(lmfit1) ##year, not distance display significant p-value
## 
## Call:
## lm(formula = lpi ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.088 -4.546 -2.670  1.809 39.070 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.268080   1.121691   7.371 1.01e-12 ***
## year2019    -2.937847   0.780189  -3.766 0.000192 ***
## distance    -0.009153   0.021841  -0.419 0.675376    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.763 on 393 degrees of freedom
## Multiple R-squared:  0.03524,    Adjusted R-squared:  0.03033 
## F-statistic: 7.178 on 2 and 393 DF,  p-value: 0.0008678
ggplot(cl_metrics0119_for, aes(distance, lpi, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Largest Patch Index")
## `geom_smooth()` using formula 'y ~ x'

lmfit2<-lm(ed~year+distance, cl_metrics0119_for)
summary(lmfit2)##year, not distance display significant p-value
## 
## Call:
## lm(formula = ed ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -61.614 -28.678   0.758  27.138  84.376 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 63.46948    4.75753  13.341  < 2e-16 ***
## year2019    -9.63870    3.30909  -2.913  0.00379 ** 
## distance    -0.08163    0.09264  -0.881  0.37876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.92 on 393 degrees of freedom
## Multiple R-squared:  0.02302,    Adjusted R-squared:  0.01805 
## F-statistic:  4.63 on 2 and 393 DF,  p-value: 0.01029
ggplot(cl_metrics0119_for, aes(distance, ed, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Edge Density")
## `geom_smooth()` using formula 'y ~ x'

lmfit3<-lm(clumpy~year+distance, cl_metrics0119_for)
summary(lmfit3)##year, not distance display significant p-value
## 
## Call:
## lm(formula = clumpy ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.61736 -0.05275  0.01071  0.07525  0.41180 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6214785  0.0228027  27.255   <2e-16 ***
## year2019    -0.0328396  0.0158948  -2.066   0.0395 *  
## distance    -0.0001096  0.0004439  -0.247   0.8050    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1578 on 391 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01095,    Adjusted R-squared:  0.005893 
## F-statistic: 2.165 on 2 and 391 DF,  p-value: 0.1161
ggplot(cl_metrics0119_for, aes(distance, clumpy, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Clumpiness Index")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

lmfit4<-lm(np~year+distance, cl_metrics0119_for)
summary(lmfit4)##both year & distance display significant p-value
## 
## Call:
## lm(formula = np ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.317  -7.729  -0.037   7.367  39.673 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.00130    1.60628  15.565  < 2e-16 ***
## year2019     4.69192    1.11724   4.200 3.31e-05 ***
## distance    -0.09434    0.03128  -3.016  0.00273 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.12 on 393 degrees of freedom
## Multiple R-squared:  0.06369,    Adjusted R-squared:  0.05893 
## F-statistic: 13.37 on 2 and 393 DF,  p-value: 2.419e-06
ggplot(cl_metrics0119_for, aes(distance, np, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Number of Patches")
## `geom_smooth()` using formula 'y ~ x'

lmfit5<-lm(frac_mn~year+distance, cl_metrics0119_for)
summary(lmfit5) ##year, not distance display significant p-value
## 
## Call:
## lm(formula = frac_mn ~ year + distance, data = cl_metrics0119_for)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.081458 -0.010160  0.002351  0.012423  0.060947 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.082e+00  2.973e-03 363.979   <2e-16 ***
## year2019    -3.607e-03  2.068e-03  -1.744   0.0819 .  
## distance    -2.189e-05  5.790e-05  -0.378   0.7055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02058 on 393 degrees of freedom
## Multiple R-squared:  0.008038,   Adjusted R-squared:  0.00299 
## F-statistic: 1.592 on 2 and 393 DF,  p-value: 0.2048
ggplot(cl_metrics0119_for, aes(distance, frac_mn, color = year))+
  geom_point()+
  geom_smooth(method = lm)+xlab("Distance from RVA (km)")+ylab("Mean Fractal Dimension")
## `geom_smooth()` using formula 'y ~ x'

#repeat for other forest metrics

#Consider how much of the variation in the metrics is explained by these models?

Summarize and reflect on your results:

Write a paragraph summarizing your results for this objective.
Consider the predictions you made prior to doing the analysis. Which ones supported your predictions? Did any of the results surprise you? Which ones and why?

##Across all landscape metrics year displayed signifant p-values while distance only displayed statistical significance when using year+distance as a predictor of number of patches. Total percent forest appears to decline between ’01 and ’19 while number of patches increases over the years.

Is year or distance to city center a better predictor of variation in most metrics? Why does this make sense?

##Year seemed to be the best predictor of variation in landscape metrics across the linear models ran. This seems to make sense simply because development accumulated as we progress further into the millenia.

Consider one of the LS metrics assessed here and hypothesize an ecological or socio-ecological process that may be associated with that metric and how it varies as you move farther away from the Richmond City center.

##Number of patches explained by distance from city center and year did appear to have a negative linear relationship, where number of patches decreased as we moved further away from Richmond. This can be explained due to less fragmentation and less development going on outside of the city.

knit your document as html and turn it in!