The purpose of this analysis is to identify cases where NRSA 2013-2014 design coordinates do or do not fall within each site’s associated sampled reach, using A and K transect locations. The output of the analysis answers the following two questions:

Does the sampled reach, represented as transect A and K coordinates, fall on the same NHDPlus flowline as the design coordinate?
Does the design coordinate fall within the sampled reach?

The output also generates a column, “CHECK_MANUALLY”, that indicates whether or not a site should be checked in ArcGIS Desktop.

NHDPlus flowlines pre-processing

First, we’ll pre-process the NHDPlusv2 Flowline layers before using linear referencing to obtain measures for each NRSA coordinate.

Step A. Join PlusFlowlineVAA.dbf to NHDFlowline.shp by COMID and export to a new shapefile. We’ll do this using match(). Perform for all flowline layers.

Note: For NHD hydroregion 09, the “NHDFlowline” shapefile was titled “nhdflowline”. I manually editted the name of the files associated with this shapefile to match all of the others (eg. “nhdflowline.shp” became “NHDFlowline.shp”) - I did this mostly to avoid having to insert a grepl expression into readOGR().

library(rgdal)
library(foreign)

NHD <-c("01","02","03N","03S","03W","04","05","06","07","08","09","10L","10U","11","12","13","14","15","16","17","18")
regions<-c("NE","MA","SA","SA","SA","GL","MS","MS","MS","MS","SR","MS","MS","MS","TX","RG","CO","CO","GB","PN","CA")

#set working directory - filepaths will all relate to this directory
root.dir<-"E:/NHDRegions"

for (i in 1:length(NHD)){
      
#working directory for NHD Flowlines shapefile
      subfolder<-paste(c("NHDPlus",regions[i]),collapse="")
      dir<-paste(c(root.dir,"/",NHD[i],"/",subfolder,"/NHDPlus",NHD[i],"/NHDSnapshot/Hydrography"),collapse="")
      setwd(dir)
      NHDFlowline<-readOGR(".","NHDFlowline")  
#assign attribute table to separate dataframe to perform match on, standardize "COMID" field
      NHDattr<-NHDFlowline@data   
      colnames(NHDattr)[1]<-"COMID"
#working directory for the NHDFlowline VAA table containing LevelPath
      dir<-paste(c(root.dir,"/",NHD[i],"/",subfolder,"/NHDPlus",NHD[i],"/NHDPlusAttributes"),collapse="")
      setwd(dir)
      VAA<-read.dbf("PlusFlowlineVAA.dbf")
#join NHDFlowline table with VAA LevelPath
      NHDattr$LevelPath<-VAA$LevelPathI[match(NHDattr$COMID,VAA$ComID)] 
#copy NHD hydro layer so we're not altering the original shapefile
      NHDFlowline2<-NHDFlowline
#assign joined table as NHDFlowline attribute table
      NHDFlowline2@data<-NHDattr 
      folder<-paste(c(root.dir,"/",NHD[i]),collapse="")
      output<-paste(c("NHDFlowline_",NHD[i]),collapse="")
      writeOGR(obj=NHDFlowline2,dsn=folder,output,driver='ESRI Shapefile')
}

Step B. Convert joined NHD Flowline shapefile into a features layer. Then create routes on features layer, with LevelPath as the Route Identifier Field. Repeat for all.

import arcpy, os, sys
from arcpy import env

NHD = '01','02','03N','03S','03W','04','05','06','07','08','09','10L','10U','11','12','13','14','15','16','17','18'

for i in NHD:
      root="E:\\NHDRegions"
      shapefile=root+'\\'+i+'\\'+'NHDFlowline_'+i+'.shp'
      inlayer=root+'\\'+i+'\\'+'NHDFlowline_'+i+'_lyr'
      outlayer=root+'\\'+i+'\\'+'Routes_'+i
      
      arcpy.MakeFeatureLayer_management(shapefile,inlayer) #convert shapefile to layer
      arcpy.CreateRoutes_lr(in_line_features=inlayer,route_id_field="LevelPath",out_feature_class=outlayer,measure_source="LENGTH",from_measure_field="#",to_measure_field="#",coordinate_priority="UPPER_LEFT",measure_factor="1",measure_offset="0",ignore_gaps="IGNORE",build_index="INDEX")

Coodinates pre-processing

Step 1. First we’ll bring in Marlys’ coordinates file and subset it to include only sampled design sites.

library(dplyr)
library(reshape2)

root.dir<-"E:/NRSACoordinates"
setwd(root.dir)
october.pull<-read.csv("riverSSsooHooOct 27 2014.csv",stringsAsFactor=FALSE) # read in file
october.pull<-mutate(october.pull, FILTER=substr(october.pull$SITE_ID,3,4)) # create column that identifies design sites
filters<-c("KS","LS","R9","RM","RO","RS","S9","SS") # create vector of "filters" to subset by
samp.yes<-october.pull[october.pull$SITESAMP=="Y",] # subset for sites that were sampled
filt.sites<-samp.yes[samp.yes$FILTER %in% filters=="TRUE",] # filter out reference, USGS, BLM sites
table(filt.sites$FILTER) # should only contain KS, LS, R9, RM, RO, RS, S9, SS filter tags

## 
##  KS  LS  R9  RM  RO  RS  S9  SS 
##  26 271 511 233 255  52 477 282

# this dataframe contains all sampled probability sites 
filt.sites<-filt.sites[,c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE","LATDD_TOP_BANK_A","LATDD_TOP_MIDSTREAM_A","LONDD_TOP_BANK_A","LONDD_TOP_MIDSTREAM_A","LATDD_TOP_BANK_K","LATDD_TOP_MIDSTREAM_K","LONDD_TOP_BANK_K","LONDD_TOP_MIDSTREAM_K","DATA_SUBMISSION","FILTER")] # get rid of extraneous columns

filt.cc<-filt.sites[complete.cases(filt.sites),] # get rid of sites without all A and K coordinates

Step 2. The subset created generates a data frame that includes sites that have both midstream and bank A and K coordinates. We’ll now add in sites that have either a full set of midstream A and K coordinates, or a full set of bank A and K coordinates.

cc.sites<-filt.cc$UID # create vector that lists all complete.cases()
leftovers<-filt.sites[filt.sites$UID %in% cc.sites=="FALSE",] # identify sites without complete cases
table(leftovers$VALXSITE) # hone in on types of sites in the data frame (wadeable and boatable categories)

## 
##  BOATABLE   INTBOAT   INTWADE PARBYBOAT PARBYWADE  WADEABLE 
##       306         1        31         2        15      1207

leftovers.b<-leftovers[leftovers$VALXSITE %in% c("BOATABLE","INTBOAT","PARBYBOAT"),] # subset for boatables
# Retrieve sites where A and K are provided completely for either top_bank or top_midstream 
# Bank A and K:
bankAK<-leftovers.b[!is.na(leftovers.b$LATDD_TOP_BANK_A) & !is.na(leftovers.b$LONDD_TOP_BANK_A) & !is.na(leftovers.b$LATDD_TOP_BANK_K) &!is.na(leftovers.b$LONDD_TOP_BANK_K),]
head(bankAK)[c(1:10,12,14,16)]

##       UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 176 10439 ORR9-0912        1 6/26/2013 2013        Y    OR  OR1 BOATABLE
## 312 10711 ORRM-1001        1 7/10/2013 2013        Y    OR  OR1 BOATABLE
## 388 10857 ORR9-0911        1 7/18/2013 2013        Y    OR  OR1 BOATABLE
## 431 10932 LAS9-0948        1 7/22/2013 2013        Y    LA  LA1 BOATABLE
## 456 10969 WARM-1001        1 7/24/2013 2013        Y    WA  WA1 BOATABLE
## 547 11121 LASS-1051        1 7/31/2013 2013        Y    LA  LA1 BOATABLE
##     LATDD_TOP_BANK_A LONDD_TOP_BANK_A LATDD_TOP_BANK_K LONDD_TOP_BANK_K
## 176          45.4848        -122.9602          45.4897        -122.9540
## 312          45.7592        -117.7608          45.7745        -117.7586
## 388          44.0517        -123.0809          44.0735        -123.1160
## 431          30.6093         -91.8793          30.5929         -91.8726
## 456          47.1860        -120.9206          47.1803        -120.8965
## 547          29.9525         -92.4615          29.9397         -92.4614

# Midstream A and K:
midAK<-leftovers.b[!is.na(leftovers.b$LATDD_TOP_MIDSTREAM_A) & !is.na(leftovers.b$LONDD_TOP_MIDSTREAM_A) & !is.na(leftovers.b$LATDD_TOP_MIDSTREAM_K) &!is.na(leftovers.b$LONDD_TOP_MIDSTREAM_K),]
head(midAK)[c(1:9,11,13,15,17)]

##         UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 948   11858 DERO-1002        1 8/28/2013 2013        Y    DE  DE1 BOATABLE
## 1732 143036 NDR9-0918        2 6/23/2014 2014        Y    ND  PG6 BOATABLE
##      LATDD_TOP_MIDSTREAM_A LONDD_TOP_MIDSTREAM_A LATDD_TOP_MIDSTREAM_K
## 948                38.5891              -75.6678               38.5587
## 1732               45.9954              -98.1599               45.9611
##      LONDD_TOP_MIDSTREAM_K
## 948               -75.6903
## 1732              -98.1698

# Merge two sets of sites
add.leftovers<-merge(bankAK,midAK,all=TRUE)
# Merge complete.cases() dataframe with sites that contain full sets of either midstream or bank A/K 
field.samp<-merge(filt.cc,add.leftovers,all=TRUE)
dim(field.samp)

## [1] 573  19

Step 3. For all sampled sites, we need the corresponding coordinates from the design file. We’ll bring those in now.

samp.list<-as.character(unique(field.samp$SITE_ID)) # Generate list of sampled sites to subset design file by
length(samp.list)

## [1] 514

# Bring in design file, subset by this list, and merge
design.file<-read.csv("NRSA14_Integrated_20120602_20140113.csv",stringsAsFactor=FALSE)
design.subset<-design.file[,1:9] #cut down columns to only those needed
design.samp<-design.subset[design.subset$siteID14 %in% samp.list,]
dim(design.samp)

## [1] 511   9

# Merge field.samp data frame with design coordinates dataframe by site ID (design file doe not contain UID's)
merged.file<-merge(field.samp,design.samp,by.x="SITE_ID",by.y="siteID14",all.x=TRUE)
dim(merged.file)

## [1] 573  27

# Re-order columns so all lat's are followed by lon's, drop un-needed columns from design data frame
merged.file<-merged.file[,c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE","LATDD_TOP_BANK_A","LONDD_TOP_BANK_A","LATDD_TOP_BANK_K","LONDD_TOP_BANK_K","LATDD_TOP_MIDSTREAM_A","LONDD_TOP_MIDSTREAM_A","LATDD_TOP_MIDSTREAM_K","LONDD_TOP_MIDSTREAM_K","LATDD_GRS","LONDD_GRS")]

Step 4. We’ll now split out each set of coordinates and create individual shapefiles for each. If this section of the code has issues running, perform range checks on each latitude and longitude to ensure no errors (range(llsubset$LONDD_TOP_BANK_K)), such as misplaced decimal points or missing negative signs, or accidental negative signs. These issues will cause Knitr to throw the following error: Geographical CRS given to non-conformant data.

library(rgdal)

## Loading required package: sp
## rgdal: version: 0.9-1, (SVN revision 518)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.0, released 2014/04/16
## Path to GDAL shared files: C:/Users/msoohoo/Documents/R/win-library/3.1/rgdal/gdal
## GDAL does not use iconv for recoding strings.
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: C:/Users/msoohoo/Documents/R/win-library/3.1/rgdal/proj

#states<-readOGR("G:/NARS/Cross_Survey/Geospatial/GIS_Data/Background_Layers","48states") can't access G: drive today
coords<-c("BANK_A","BANK_K","MIDSTREAM_A","MIDSTREAM_K","DESIGN")

par(mfrow=c(2,3))

for (i in 1:length(coords)){
      if (i == 1){
            llsubset<-merged.file[,c(1:11)]
      } else if (i== 2){
            llsubset<-merged.file[,c(1:9,12:13)]
            llsubset<-llsubset[order(llsubset$LONDD_TOP_BANK_K),] #delete with new file
            llsubset$LONDD_TOP_BANK_K[1]<--86.04541 #delete with new file
            llsubset$LONDD_TOP_BANK_K[2]<--80.495 #delete with new file
      } else if (i==3){
            llsubset<-merged.file[,c(1:9,14:15)] 
      } else if (i==4){
            llsubset<-merged.file[,c(1:9,16:17)]
      } else if (i==5){
            llsubset<-merged.file[,c(1:9,18:19)]
      }
      cc<-complete.cases(llsubset) 
      llsubset<-llsubset[cc,]
      latlong<-llsubset[,c(11,10)]
      llCRS<-CRS("+init=epsg:4269 +proj=longlat +ellps=GRS80 +datum=NAD83")
      coordinates(llsubset)<-latlong
      proj4string(llsubset)<- llCRS
      #states<-spTransform(states,llCRS)
      #plot(states, axes=TRUE, main=coords[i])
      plot(llsubset,pch=1,axes=TRUE,main=coords[i]) #delete once access to G: drive is restored
      #points(llsubset,pch=1)
      outfolder=paste(c("E:/NRSACoordinates/",coords[i]),collapse="")
      writeOGR(obj=llsubset,dsn=outfolder,coords[i],driver="ESRI Shapefile",check_exists=TRUE, overwrite_layer=TRUE)
}

## Warning in writeOGR(obj = llsubset, dsn = outfolder, coords[i], driver =
## "ESRI Shapefile", : Field names abbreviated for ESRI Shapefile driver

## Warning in writeOGR(obj = llsubset, dsn = outfolder, coords[i], driver =
## "ESRI Shapefile", : Field names abbreviated for ESRI Shapefile driver

## Warning in writeOGR(obj = llsubset, dsn = outfolder, coords[i], driver =
## "ESRI Shapefile", : Field names abbreviated for ESRI Shapefile driver

## Warning in writeOGR(obj = llsubset, dsn = outfolder, coords[i], driver =
## "ESRI Shapefile", : Field names abbreviated for ESRI Shapefile driver

The coordinate shapefiles are now ready. We’ll now use the NHDPlus Routes layers to Locate Features Along Routes for each set of coordinates.

Locate Features Along Routes

Step 5. We’ll now relate the transect and design coordinates to the NHD routes using the Locate Features Along Routes tool in Arcmap. This produces a “measure” field that indicates each point’s position along a given flowline (using LevelPath as our route identifier).

import arcpy, os, sys
from arcpy import env

coord= 'BANK_A','BANK_K','MIDSTREAM_A','MIDSTREAM_K','DESIGN'

NHD = '01','02','03N','03S','03W','04','05','06','07','08','09','10L','10U','11','12','13','14','15','16','17','18'

#process one coordinate at a time, locating routes for all hydroregions before looping to next coordinate
for j in coord:
      
      root1="E:\\NRSACoordinates"
      shapefile=root1+'\\'+j+'\\'+j+'.shp'
      inlayer=root1+'\\'+j+'\\'+j+'_lyr'
      arcpy.MakeFeatureLayer_management(shapefile,inlayer) #convert shapefile to layer

      for k in NHD: 
                  
            coords=root1+'\\'+j+'\\'+j+'_lyr' #identify coordinate layer          
      
            root2="E:\\NHDRegions"
            routes=root2+'\\'+k+'\\'+'Routes_'+k+'.shp' #identify routes shapefile
            routetable=root1+'\\'+j+'\\'+'Routes_'+k+'_'+j #output Locate Routes table
            outroute='Routes_'+k+'_'+j+'_Events' #output temporary Route Event layer

            arcpy.LocateFeaturesAlongRoutes_lr(in_features=coords,in_routes=routes,route_id_field="LevelPath",radius_or_tolerance="200 Meters",out_table=routetable,out_event_properties="RID POINT MEAS",route_locations="FIRST",distance_field="DISTANCE",zero_length_events="ZERO",in_fields="FIELDS",m_direction_offsetting="M_DIRECTON")
            
            arcpy.MakeRouteEventLayer_lr(in_routes=routes,route_id_field="LevelPath",in_table=routetable,in_event_properties="rid POINT meas",out_layer=outroute,offset_field="#",add_error_field="NO_ERROR_FIELD",add_angle_field="NO_ANGLE_FIELD",angle_type="NORMAL",complement_angle="ANGLE",offset_direction="LEFT",point_event_type="POINT")
            
            lyroutput=root1+'\\'+j+'\\'+'Routes_'+k+'_'+j+'_Events'
            arcpy.SaveToLayerFile_management(outroute, lyroutput, "ABSOLUTE") #save file to memory
            
            outroutelyr=root1+'\\'+j+'\\'+'Routes_'+k+'_'+j+'_Events.lyr' 
            outshapeloc=root1+'\\'+j+'\\'+'Routes_'+k+'_'+j+'_Events.shp'
            arcpy.CopyFeatures_management(in_features=outroutelyr,out_feature_class=outshapeloc,config_keyword="#",spatial_grid_1="0",spatial_grid_2="0",spatial_grid_3="0") #save as shapefile

Step 6. Merge all route event layers together for each coordinate location. Identify duplicates in resulting dataframe and store this information in a new field, “DUPCHK”.

library(rgdal)
library(foreign)
library(plyr)

coords<-c("BANK_A","BANK_K","MIDSTREAM_A","MIDSTREAM_K","DESIGN")
NHD<-c("01","02","03N","03S","03W","04","05","06","07","08","09","10L","10U","11","12","13","14","15","16","17","18")

root.dir<-"E:/NRSACoordinates"

#for each coordinate, read all LocateRoutes dbf's into R so they can be merged
for (i in 1:length(coords)){

      dir<-paste(c(root.dir,"/",coords[i]),collapse="")
      setwd(dir)
      
      for (k in 1:length(NHD)){
            NHDroute<-paste(c("Routes_",NHD[k],"_",coords[i],"_Events.dbf"),collapse="")
            dbf<-read.dbf(NHDroute)
            colnames(dbf)<-c("RID","MEAS","DISTANCE","UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE", "CREW","VALXSITE","LATDD","LONDD") #standardize field names between design and field coordinates
            name<-paste(c("NHD",NHD[k]),collapse="") #create name for dbf based on hydroregion 
            assign(name,dbf) #assign new name to dbf so that next dbf can be read R as object 'dbf'
            rm(dbf) #remove old dbf so new one can be generated without overwriting (may not need)
      }

#merge all dbf files for each given coordinate, then export as a shapefile 
all<-Reduce(function(x, y) merge(x, y, all=TRUE), list(NHD01,NHD02,NHD03N,NHD03S,NHD03W,NHD04,NHD05,NHD06,NHD07,NHD08,NHD09,NHD10L,NHD10U,NHD11,NHD12,NHD13,NHD14,NHD15,NHD16,NHD17,NHD18))

#add column that identifies whether or not a given UID is included multiple times in the dataframe - in some cases, these are pure duplicates where for whatever reason they were included in Marlys' spreadsheet twice; in others, duplicates exist because sites within 200m (the search radius) of two hydrolayers were Located Along Routes twice...these cases require someone to examine them in ArcGIS to determine which hydrolayer is the correct one for that particular site (e.g. map against original point pre-snapping). 0 = FALSE (no duplicates) 1 = TRUE (duplicates found)
all<-mutate(all,DUPCHK=duplicated(all$UID))

#add column indicating the coordinate location that these Locate Routes results reference
all<-mutate(all,LOCATION=rep(coords[i],nrow(all)))
all[1:2,]

latlong<-all[,c(14,13)] #identify columns that contain location coordinates
llCRS<-CRS("+init=epsg:4269 +proj=longlat +ellps=GRS80 +datum=NAD83")
coordinates(all)<-latlong
proj4string(all)<- llCRS
outfolder=paste(c("E:/NRSACoordinates/",coords[i]),collapse="")
outall<-paste(c("Routes_Merged_",coords[i]),collapse="")
writeOGR(obj=all,dsn=outfolder,outall,driver="ESRI Shapefile",check_exists=TRUE, overwrite_layer=TRUE)
}

Step 7. Compile merged LocateRoutes dbf’s together into a single dataframe. Add fields for logical statements that test whether the design coordinate falls on the same flowline as the sampled A and K snapped coordinates, and whether or not the design measure (MEAS) falls between the A and K MEA’s (e.g. whether the design falls within the sampled reach).Lastly, output results as a CSV file in the root directory.

#load libraries
library(foreign)
library(reshape2)
library(dplyr)

coords<-c("BANK_A","BANK_K","MIDSTREAM_A","MIDSTREAM_K","DESIGN")

root.dir<-"E:/NRSACoordinates"

#for each coordinate, read Routes_Merged_X.dbf into R space 
for (i in 1:length(coords)){
      dir<-paste(c(root.dir,"/",coords[i]),collapse="")
      setwd(dir)
      RoutesMerged<-paste(c("Routes_Merged_",coords[i],".dbf"),collapse="")
      dbf<-read.dbf(RoutesMerged)
      name<-paste(c("Routes_Merged_",coords[i]),collapse="") #create name for dbf based on coordinate
      assign(name,dbf) #assign new name to dbf so that next dbf can be read R as object 'dbf'
      rm(dbf) #remove old dbf so new one can be generated without overwriting (may not need)
}

setwd(root.dir)
#merge files together
all<-Reduce(function(x, y) merge(x, y, all=TRUE), list(Routes_Merged_BANK_A,Routes_Merged_BANK_K,Routes_Merged_MIDSTREAM_A,Routes_Merged_MIDSTREAM_K,Routes_Merged_DESIGN))

#clean up file before casting wide
all<-all[,c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE","LATDD","LONDD","LOCATION","DUPCHK","RID","MEAS")]
head(all)

##      UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 1 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 2 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 3 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 4  11581 OHR9-0903        1 8/21/2013 2013        Y    OH  OS1 BOATABLE
## 5 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
## 6 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
##      LATDD     LONDD    LOCATION DUPCHK RID      MEAS
## 1 43.30670 -73.63060      BANK_K      0   0  8.981182
## 2 43.30670 -73.63060 MIDSTREAM_K      0   0  8.981182
## 3 43.30763 -73.63245      DESIGN      0   0  8.983759
## 4 40.58522 -81.40242 MIDSTREAM_A      0   0 16.049492
## 5 27.32270 -80.57950      BANK_K      0   0 19.203234
## 6 27.32270 -80.57960 MIDSTREAM_K      0   0 19.203234

#first need to melt the columns that we'll soon recast out, everything you want cast needs to be in ONE COLUMN
all.long<-melt(all,id.vars=c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE","LOCATION","DUPCHK"))
head(all.long)

##      UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 1 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 2 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 3 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 4  11581 OHR9-0903        1 8/21/2013 2013        Y    OH  OS1 BOATABLE
## 5 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
## 6 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
##      LOCATION DUPCHK variable    value
## 1      BANK_K      0    LATDD 43.30670
## 2 MIDSTREAM_K      0    LATDD 43.30670
## 3      DESIGN      0    LATDD 43.30763
## 4 MIDSTREAM_A      0    LATDD 40.58522
## 5      BANK_K      0    LATDD 27.32270
## 6 MIDSTREAM_K      0    LATDD 27.32270

#now cast out by LOCATION + variable (RID and MEAS and coordinates will have their own respective columns)
all.wide<-dcast(all.long,UID+SITE_ID+VISIT_NO+DATE_COL+YEAR+SITESAMP+STATE+CREW+VALXSITE+DUPCHK~LOCATION+variable)

#re-order columns so coordinates are all together, RID's all together, MEAS all together
ordercolumns<-c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE"
            ,"DUPCHK","BANK_A_LATDD","BANK_A_LONDD","BANK_K_LATDD","BANK_K_LONDD"
            ,"MIDSTREAM_A_LATDD","MIDSTREAM_A_LONDD","MIDSTREAM_K_LATDD","MIDSTREAM_K_LONDD"
            ,"DESIGN_LATDD","DESIGN_LONDD","BANK_A_RID","BANK_K_RID","MIDSTREAM_A_RID"
            ,"MIDSTREAM_K_RID","DESIGN_RID","BANK_A_MEAS","MIDSTREAM_A_MEAS","BANK_K_MEAS","MIDSTREAM_K_MEAS"
            ,"DESIGN_MEAS")
all.wide<-all.wide[,ordercolumns]
head(all.wide)

##      UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 1 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 2  11581 OHR9-0903        1 8/21/2013 2013        Y    OH  OS1 BOATABLE
## 3 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
## 4 151002 ORR9-0905        2 8/18/2014 2014        Y    OR  OR1 BOATABLE
## 5 148064 ORR9-0905        1 7/29/2014 2014        Y    OR  OR1 BOATABLE
## 6  11699 NCSS-1081        1 8/23/2013 2013        Y    NC  PG4 BOATABLE
##   DUPCHK BANK_A_LATDD BANK_A_LONDD BANK_K_LATDD BANK_K_LONDD
## 1      0           NA           NA      43.3067    -73.63060
## 2      0     40.58552    -81.40222           NA           NA
## 3      0     27.31620    -80.57960      27.3227    -80.57950
## 4      0     44.47900   -122.78500      44.4891   -122.81490
## 5      0     44.47830   -122.78440      44.4889   -122.81470
## 6      0     35.79737    -76.63769      35.7981    -76.63368
##   MIDSTREAM_A_LATDD MIDSTREAM_A_LONDD MIDSTREAM_K_LATDD MIDSTREAM_K_LONDD
## 1                NA                NA          43.30670         -73.63060
## 2          40.58522         -81.40242          40.57522         -81.39243
## 3          27.31610         -80.57950          27.32270         -80.57960
## 4                NA                NA          44.48910        -122.81450
## 5          44.47860        -122.78400                NA                NA
## 6          35.79737         -76.63769          35.79810         -76.63368
##   DESIGN_LATDD DESIGN_LONDD BANK_A_RID BANK_K_RID MIDSTREAM_A_RID
## 1     43.30763    -73.63245         NA          0              NA
## 2     40.58128    -81.39514  430005446         NA               0
## 3     27.32147    -80.57938          0          0               0
## 4     44.49071   -122.81372   50010097          0              NA
## 5     44.49071   -122.81372   50010097          0        50010097
## 6     35.79718    -76.63771          0          0               0
##   MIDSTREAM_K_RID DESIGN_RID BANK_A_MEAS MIDSTREAM_A_MEAS BANK_K_MEAS
## 1               0          0          NA               NA    8.981182
## 2       430005446  430005446   1.0838421       16.0494919          NA
## 3               0          0  19.2210531       19.2210531   19.203234
## 4               0   50010097   0.6371068               NA   25.625994
## 5              NA   50010097   0.6384676        0.6384676   25.625994
## 6               0          0  26.0794428       26.0794428   26.075367
##   MIDSTREAM_K_MEAS DESIGN_MEAS
## 1         8.981182   8.9837591
## 2         1.100125   1.0925644
## 3        19.203234  19.2040517
## 4        25.625994   0.6003736
## 5               NA   0.6003736
## 6        26.075367  26.0794935

#logical that asks if BANKA OR MIDA RID=DESIGN RID AND BANKK OR MIDK RID=DESIGN RID, returns TRUE
all.wide<-mutate(all.wide,RID_MATCH=BANK_A_RID==DESIGN_RID|MIDSTREAM_A_RID==DESIGN_RID
                                           & BANK_K_RID==DESIGN_RID|MIDSTREAM_K_RID==DESIGN_RID)

#logical that asks if BANKA OR MIDA MEAS > DESIGN MEAS > BANKK OR MIDK MEAS (e.g. does design coord fall between A and K?)
all.wide<-mutate(all.wide,DSN_IN_REACH1=BANK_A_MEAS>=DESIGN_MEAS & DESIGN_MEAS>=BANK_K_MEAS)
all.wide<-mutate(all.wide,DSN_IN_REACH2=MIDSTREAM_A_MEAS>=DESIGN_MEAS & DESIGN_MEAS>=MIDSTREAM_K_MEAS)
all.wide<-mutate(all.wide,DSN_IN_REACH3=BANK_A_MEAS<=DESIGN_MEAS & DESIGN_MEAS<=BANK_K_MEAS)
all.wide<-mutate(all.wide,DSN_IN_REACH4=MIDSTREAM_A_MEAS<=DESIGN_MEAS & DESIGN_MEAS<=MIDSTREAM_K_MEAS)
all.wide<-mutate(all.wide,DSN_IN_REACH_SUMMARY=ifelse(DSN_IN_REACH1==TRUE|DSN_IN_REACH2==TRUE|DSN_IN_REACH3==TRUE|DSN_IN_REACH4==TRUE,TRUE,FALSE))

# check sites manually if LevelPath for coordinate and design don't match, or if MEAS of design doesn't fall between A and K 
all.wide<-mutate(all.wide,CHECK_MANUALLY=ifelse(all.wide$RID_MATCH==FALSE|all.wide$DSN_IN_REACH_SUMMARY==FALSE,"YES","NO")) 
head(all.wide)

##      UID   SITE_ID VISIT_NO  DATE_COL YEAR SITESAMP STATE CREW VALXSITE
## 1 145880 NYSS-1239        1 7/16/2014 2014        Y    NY  PG6 BOATABLE
## 2  11581 OHR9-0903        1 8/21/2013 2013        Y    OH  OS1 BOATABLE
## 3 138551 FLSS-1069        1 5/13/2014 2014        Y    FL PG12 BOATABLE
## 4 151002 ORR9-0905        2 8/18/2014 2014        Y    OR  OR1 BOATABLE
## 5 148064 ORR9-0905        1 7/29/2014 2014        Y    OR  OR1 BOATABLE
## 6  11699 NCSS-1081        1 8/23/2013 2013        Y    NC  PG4 BOATABLE
##   DUPCHK BANK_A_LATDD BANK_A_LONDD BANK_K_LATDD BANK_K_LONDD
## 1      0           NA           NA      43.3067    -73.63060
## 2      0     40.58552    -81.40222           NA           NA
## 3      0     27.31620    -80.57960      27.3227    -80.57950
## 4      0     44.47900   -122.78500      44.4891   -122.81490
## 5      0     44.47830   -122.78440      44.4889   -122.81470
## 6      0     35.79737    -76.63769      35.7981    -76.63368
##   MIDSTREAM_A_LATDD MIDSTREAM_A_LONDD MIDSTREAM_K_LATDD MIDSTREAM_K_LONDD
## 1                NA                NA          43.30670         -73.63060
## 2          40.58522         -81.40242          40.57522         -81.39243
## 3          27.31610         -80.57950          27.32270         -80.57960
## 4                NA                NA          44.48910        -122.81450
## 5          44.47860        -122.78400                NA                NA
## 6          35.79737         -76.63769          35.79810         -76.63368
##   DESIGN_LATDD DESIGN_LONDD BANK_A_RID BANK_K_RID MIDSTREAM_A_RID
## 1     43.30763    -73.63245         NA          0              NA
## 2     40.58128    -81.39514  430005446         NA               0
## 3     27.32147    -80.57938          0          0               0
## 4     44.49071   -122.81372   50010097          0              NA
## 5     44.49071   -122.81372   50010097          0        50010097
## 6     35.79718    -76.63771          0          0               0
##   MIDSTREAM_K_RID DESIGN_RID BANK_A_MEAS MIDSTREAM_A_MEAS BANK_K_MEAS
## 1               0          0          NA               NA    8.981182
## 2       430005446  430005446   1.0838421       16.0494919          NA
## 3               0          0  19.2210531       19.2210531   19.203234
## 4               0   50010097   0.6371068               NA   25.625994
## 5              NA   50010097   0.6384676        0.6384676   25.625994
## 6               0          0  26.0794428       26.0794428   26.075367
##   MIDSTREAM_K_MEAS DESIGN_MEAS RID_MATCH DSN_IN_REACH1 DSN_IN_REACH2
## 1         8.981182   8.9837591      TRUE            NA            NA
## 2         1.100125   1.0925644      TRUE         FALSE         FALSE
## 3        19.203234  19.2040517      TRUE          TRUE          TRUE
## 4        25.625994   0.6003736      TRUE         FALSE         FALSE
## 5               NA   0.6003736      TRUE         FALSE            NA
## 6        26.075367  26.0794935      TRUE         FALSE         FALSE
##   DSN_IN_REACH3 DSN_IN_REACH4 DSN_IN_REACH_SUMMARY CHECK_MANUALLY
## 1         FALSE         FALSE                   NA           <NA>
## 2            NA         FALSE                   NA           <NA>
## 3         FALSE         FALSE                 TRUE             NO
## 4         FALSE            NA                   NA           <NA>
## 5         FALSE         FALSE                   NA           <NA>
## 6         FALSE         FALSE                FALSE            YES

At the beginning of this analysis, we created a dataframe that included only sampled, probability sites that included either a full set of transect A and K coordinates, or one complete set of coordinates for either bank A/K or midstream A/K. All of the boatable sites that weren’t included in that initial dataframe, sites that were dropped off because they weren’t found in the design file used for this process, and any sites that were dropped because they did not snap to an NHD flowline within a 200 meter radius, should be checked manually. We’ll add those back to our final dataframe with “Yes” populated under the “CHECK_MANUALLY” column.

manualchk.sites<-filt.sites[filt.sites$UID %in% all.wide$UID=="FALSE",] # subset for sampled probability sites that aren't in the all.wide dataframe
table(manualchk.sites$VALXSITE)

## 
##  BOATABLE   INTBOAT   INTWADE PARBYBOAT PARBYWADE  WADEABLE 
##       278         1        31         2        15      1207

manualchk.sites<-manualchk.sites[manualchk.sites$VALXSITE %in% c("BOATABLE","INTBOAT","PARBYBOAT"),] # subset for boatables (number should be reduced greatly once we have final 2014 file)
manualchk.sites<-manualchk.sites[,c(1:17)] # drop DATA_SUBMISSION and FILTER columns
dim(manualchk.sites)

## [1] 281  17

# clean up column names to merge into all.wide dataframe
names(manualchk.sites)<-c("UID","SITE_ID","VISIT_NO","DATE_COL","YEAR","SITESAMP","STATE","CREW","VALXSITE",
                          "BANK_A_LATDD","MIDSTREAM_A_LATDD","BANK_A_LONDD","MIDSTREAM_A_LONDD","BANK_K_LATDD",
                          "MIDSTREAM_K_LATDD","BANK_K_LONDD","MIDSTREAM_K_LONDD")

manualchk.sites$CHECK_MANUALLY<-rep("YES",nrow(manualchk.sites)) # indicate that all of these sites need to be checked manually

final.file<-merge(all.wide,manualchk.sites,all.x=TRUE,all.y=TRUE) # merge these sites back into all.wide dataframe to create final file

R processing is done. Now use ArcGIS desktop to analyze sites individually, as necessary, using this file:

table(final.file$CHECK_MANUALLY)

## 
##  NO YES 
## 493 316

write.csv(final.file,"NRSA1314_CoordinatesCheck.csv",row.names=FALSE)

NRSA 2013-2014 Coordinates Processing

Mimi Soo-Hoo

Tuesday, December 30, 2014

NHDPlus flowlines pre-processing

Coodinates pre-processing

Locate Features Along Routes