Set-up

Be sure to install and load the following:

library(tidyverse)
library(sp)
library(rgdal)
library(spdplyr)

Q1: Download the Counties_shp.zip file on the Utah GIS repository](https://gis.utah.gov/data/). This is a great data resource, so you may want to look around for your project. You can unzip the file using 7Zip or a similar tool. Load and plot this object in R.

What is the class of the object?

The class of the object is “sp”, spatial polygons dataframe.

getClass("Spatial")
## Class "Spatial" [package "sp"]
## 
## Slots:
##                               
## Name:         bbox proj4string
## Class:      matrix         CRS
## 
## Known Subclasses: 
## Class "SpatialPoints", directly
## Class "SpatialMultiPoints", directly
## Class "SpatialGrid", directly
## Class "SpatialLines", directly
## Class "SpatialPolygons", directly
## Class "SpatialPointsDataFrame", by class "SpatialPoints", distance 2
## Class "SpatialPixels", by class "SpatialPoints", distance 2
## Class "SpatialMultiPointsDataFrame", by class "SpatialMultiPoints", distance 2
## Class "SpatialGridDataFrame", by class "SpatialGrid", distance 2
## Class "SpatialLinesDataFrame", by class "SpatialLines", distance 2
## Class "SpatialPixelsDataFrame", by class "SpatialPoints", distance 3
## Class "SpatialPolygonsDataFrame", by class "SpatialPolygons", distance 2
library(rgdal)
counties <- readOGR(dsn = "C:/Users/joshc/Documents/USU/2023/GEOG 4870/Module 7_Vectors/Counties_shp/Counties_shp/Counties/Counties.shp")
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\joshc\Documents\USU\2023\GEOG 4870\Module 7_Vectors\Counties_shp\Counties_shp\Counties\Counties.shp", layer: "Counties"
## with 29 features
## It has 12 fields
## Integer64 fields read as strings:  POP_LASTCE POP_CURRES

class(counties)

Q2. Is your dataset projected or unprojected? Explain in words how you know this.

After running the summary, you can view that the data projection is listed as true and the projection string is listed as NAD83 UTM Zone 12.

summary(counties)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x  228583.4  673944.6
## y 4094743.9 4653571.8
## Is projected: TRUE 
## proj4string :
## [+proj=utm +zone=12 +datum=NAD83 +units=m +no_defs]
## Data attributes:
##   COUNTYNBR           ENTITYNBR            ENTITYYR        NAME          
##  Length:29          Min.   :2.006e+09   Min.   :2006   Length:29         
##  Class :character   1st Qu.:2.010e+09   1st Qu.:2010   Class :character  
##  Mode  :character   Median :2.010e+09   Median :2010   Mode  :character  
##                     Mean   :2.010e+09   Mean   :2010                     
##                     3rd Qu.:2.010e+09   3rd Qu.:2010                     
##                     Max.   :2.010e+09   Max.   :2010                     
##       FIPS     STATEPLANE         POP_LASTCE         POP_CURRES       
##  Min.   : 1   Length:29          Length:29          Length:29         
##  1st Qu.:15   Class :character   Class :character   Class :character  
##  Median :29   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :29                                                           
##  3rd Qu.:43                                                           
##  Max.   :57                                                           
##    FIPS_STR             COLOR4        Shape_Leng       Shape_Area       
##  Length:29          Min.   :1.000   Min.   :210430   Min.   :1.581e+09  
##  Class :character   1st Qu.:2.000   1st Qu.:299647   1st Qu.:3.035e+09  
##  Mode  :character   Median :3.000   Median :394352   Median :6.298e+09  
##                     Mean   :2.517   Mean   :428350   Mean   :7.580e+09  
##                     3rd Qu.:3.000   3rd Qu.:559369   3rd Qu.:1.063e+10  
##                     Max.   :4.000   Max.   :843995   Max.   :2.054e+10

Q3. How many variables are in the Counties.shp file?

The counties shapefile has 12 variables.

class(counties)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
counties@data
##    COUNTYNBR  ENTITYNBR ENTITYYR       NAME FIPS STATEPLANE POP_LASTCE
## 0         20 2010201010     2010    SANPETE   39    Central      28571
## 1         11 2010111010     2010       IRON   21      South      48069
## 2         13 2010131010     2010       KANE   25      South       7291
## 3         29 2010291010     2010      WEBER   57      North     236289
## 4         19 2006191010     2006   SAN JUAN   37      South      15308
## 5         09 2010091010     2010   GARFIELD   17      South       5353
## 6         17 2010171010     2010       RICH   33      North       2384
## 7         22 2010221010     2010     SUMMIT   43      North      36774
## 8         23 2010231010     2010     TOOELE   45    Central      60009
## 9         01 2010011010     2010     BEAVER    1      South       6679
## 10        02 2010021010     2010  BOX ELDER    3      North      50982
## 11        03 2010031010     2010      CACHE    5      North     115025
## 12        24 2010241010     2010     UINTAH   47    Central      33767
## 13        10 2010101010     2010      GRAND   19    Central       9383
## 14        27 2010271010     2010 WASHINGTON   53      South     144945
## 15        14 2010141010     2010    MILLARD   27    Central      12582
## 16        26 2010261010     2010    WASATCH   51    Central      24292
## 17        12 2010121010     2010       JUAB   23    Central      10632
## 18        25 2010251010     2010       UTAH   49    Central     542008
## 19        07 2010071010     2010   DUCHESNE   13    Central      19363
## 20        05 2010051010     2010    DAGGETT    9      North       1078
## 21        16 2010161010     2010      PIUTE   31      South       1600
## 22        06 2010061010     2010      DAVIS   11      North     315697
## 23        15 2010151010     2010     MORGAN   29      North       9827
## 24        28 2010281010     2010      WAYNE   55      South       2861
## 25        08 2010081010     2010      EMERY   15    Central      11144
## 26        21 2010211010     2010     SEVIER   41    Central      21135
## 27        04 2010041010     2010     CARBON    7    Central      21613
## 28        18 2010181010     2010  SALT LAKE   35    Central    1050341
##    POP_CURRES FIPS_STR COLOR4 Shape_Leng  Shape_Area
## 0       30033    49039      2   317646.9  4146734071
## 1       52280    49021      3   433987.6  8550446964
## 2        7559    49025      4   559368.8 10631564021
## 3      248839    49057      1   273454.4  1707999618
## 4       16344    49037      3   843994.6 20538417618
## 5        5240    49017      1   628903.3 13481860261
## 6        2371    49033      3   299646.7  2811302707
## 7       40772    49043      1   477619.1  4870028431
## 8       67133    49045      2   574753.1 18871961112
## 9        6843    49001      4   392925.2  6696284552
## 10      54970    49003      4   569389.5 17428638722
## 11     126491    49005      2   289985.0  3035321841
## 12      36613    49047      2   540619.1 11661945608
## 13      10060    49019      1   516092.9  9539294361
## 14     165601    49053      1   347253.9  6297924696
## 15      13477    49027      3   576258.3 17708674893
## 16      31224    49051      2   332287.4  3129885249
## 17      11798    49023      4   626586.8  8819926150
## 18     617735    49049      3   481312.8  5544870628
## 19      20828    49013      4   394351.6  8412584876
## 20       1052    49009      3   271451.0  1861953449
## 21       1607    49031      2   226048.5  1982962415
## 22     348770    49011      3   210430.4  1644067012
## 23      11725    49029      2   272456.1  1581062748
## 24       2738    49055      4   447343.1  6384191355
## 25      10673    49015      3   614872.1 11574191378
## 26      21766    49041      1   337181.9  4964991763
## 27      21211    49007      1   333190.1  3844121594
## 28    1128283    49035      4   232727.0  2085425239

Q4. How many unique counties are listed in the Counties.shp file?

There are 29 unique counties listed in the shapefile.

counties@data %>%
  summarise(counties@data$NAME)
##    counties@data$NAME
## 1             SANPETE
## 2                IRON
## 3                KANE
## 4               WEBER
## 5            SAN JUAN
## 6            GARFIELD
## 7                RICH
## 8              SUMMIT
## 9              TOOELE
## 10             BEAVER
## 11          BOX ELDER
## 12              CACHE
## 13             UINTAH
## 14              GRAND
## 15         WASHINGTON
## 16            MILLARD
## 17            WASATCH
## 18               JUAB
## 19               UTAH
## 20           DUCHESNE
## 21            DAGGETT
## 22              PIUTE
## 23              DAVIS
## 24             MORGAN
## 25              WAYNE
## 26              EMERY
## 27             SEVIER
## 28             CARBON
## 29          SALT LAKE

Q5. Before moving on, make sure the two columns describing population (POP_CURRES and POP_LASTCE) are stored as numeric data rather than factors. You can change this using the following code:

counties@data$POP_CURRES <- as.numeric(as.character(counties@data$POP_CURRES))
counties@data$POP_LASTCE <- as.numeric(as.character(counties@data$POP_LASTCE))

Now, how many counties have a current population estimate (POP_CURRES) above 40,000? # There are 10 counties with a population above 40,000.

counties@data %>%
  group_by(NAME) %>%
  summarize(POP_CURRES)
## # A tibble: 29 × 2
##    NAME      POP_CURRES
##    <chr>          <dbl>
##  1 BEAVER          6843
##  2 BOX ELDER      54970
##  3 CACHE         126491
##  4 CARBON         21211
##  5 DAGGETT         1052
##  6 DAVIS         348770
##  7 DUCHESNE       20828
##  8 EMERY          10673
##  9 GARFIELD        5240
## 10 GRAND          10060
## # … with 19 more rows

#Q6. Which county has the highest population (POP_CURRES)? # Salt Lake County has te highest population. 1,128,283

Q7: What is the FIPS_STR code for CACHE county?

The FIPS_STR code for Cache County is 49005.

counties@data %>%
  group_by(NAME) %>%
  summarize(FIPS_STR)
## # A tibble: 29 × 2
##    NAME      FIPS_STR
##    <chr>     <chr>   
##  1 BEAVER    49001   
##  2 BOX ELDER 49003   
##  3 CACHE     49005   
##  4 CARBON    49007   
##  5 DAGGETT   49009   
##  6 DAVIS     49011   
##  7 DUCHESNE  49013   
##  8 EMERY     49015   
##  9 GARFIELD  49017   
## 10 GRAND     49019   
## # … with 19 more rows

Q8. Reproject the county shapefile to USA Contiguous Albers Equal Area Conic. Plot both datasets.

Do you see any difference? What is the new datum of the reprojected dataset?

The transformed projection is smaller and tilted. The new datum is NAD 1983

plot(counties)

counties_tr <- spTransform(counties, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"))
## Warning: PROJ support is provided by the sf and terra packages among others
plot(counties_tr) 

plot(counties)

counties_tr@proj4string
## Coordinate Reference System:
## Deprecated Proj.4 representation:
##  +proj=aea +lat_0=37.5 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0
## +datum=NAD83 +units=m +no_defs 
## WKT2 2019 representation:
## PROJCRS["unknown",
##     BASEGEOGCRS["unknown",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]],
##             ID["EPSG",6269]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8901]]],
##     CONVERSION["unknown",
##         METHOD["Albers Equal Area",
##             ID["EPSG",9822]],
##         PARAMETER["Latitude of false origin",37.5,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-96,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",29.5,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",45.5,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]]

Q9. Use spdplyr to create a new shapefile with only CACHE, UTAH, and WAYNE counties.

What is the sum of the Shape_Area in these three counties?

The sum of the shape area is: 14964383824

counties@data %>%
  group_by(NAME) %>%
  summarize(Shape_Area)
## # A tibble: 29 × 2
##    NAME        Shape_Area
##    <chr>            <dbl>
##  1 BEAVER     6696284552.
##  2 BOX ELDER 17428638722.
##  3 CACHE      3035321841.
##  4 CARBON     3844121594.
##  5 DAGGETT    1861953449.
##  6 DAVIS      1644067012.
##  7 DUCHESNE   8412584876.
##  8 EMERY     11574191378.
##  9 GARFIELD  13481860261.
## 10 GRAND      9539294361.
## # … with 19 more rows

Q10. Plot the new shapefile you created in Q9 and submit this plot on Canvas.

I’ve exhaused all techniques over a 4 hour period and could not create the shapefile without error. I will make another attempt at a later date.

#```{r} #utah_pl <- polygons($FIPS (“49”))

#plot(utah_pl)

#```

#prj <- CRS(“+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs”) #select <- dplyr::select #coords <- counties %>% select(Shape_Leng, Shape_Area) #counties_pt <- SpatialPolygonsDataFrame(Coords = coords, data = counties, proj4string = prj) #plot(counties_pt)