Cleaning the Global environment and the console

rm(list = ls())      # Clears the global environment for a fresh start
cat('\f')            # Cleans the console

Set working directory

setwd("C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam")
getwd()
## [1] "C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam"

Loading libraries

This code allows to load multiple library at once.

my_library <- c("sf", "dplyr", "RColorBrewer", "raster", "tmap", "leaflet", "rgdal")
lapply(my_library, library, character.only = TRUE)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/hasan/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/hasan/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.

Unzipping the Data

unzip(zipfile = "ExamData.zip", exdir = getwd())
path = "C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam/ExamData"
list.files(path, pattern=NULL, all.files=FALSE,
    full.names=FALSE)
## [1] "cities1.csv"          "SeminarR.gdb"         "Terrain1.tfw"        
## [4] "Terrain1.tif"         "Terrain1.tif.aux.xml" "Terrain1.tif.ovr"

Loading the data

dsn1 <- "ExamData/SeminarR.gdb"
dsn2 <- "ExamData/Terrain1.tif"
USA <- st_read(dsn1)
## Multiple layers are present in data source C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb, reading layer `USA_counties'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `USA_counties' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 3140 features and 53 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -6293474 ymin: 311822.4 xmax: 2256319 ymax: 6198811
## projected CRS:  NAD83 / Conus Albers
Terr.USA <- raster(dsn2)
Data <- as_tibble(read.csv("ExamData/cities1.csv", header = TRUE))

Data Inspection

str(USA)
## Classes 'sf' and 'data.frame':   3140 obs. of  54 variables:
##  $ NAME        : chr  "Lake of the Woods" "Ferry" "Stevens" "Okanogan" ...
##  $ STATE_NAME  : chr  "Minnesota" "Washington" "Washington" "Washington" ...
##  $ STATE_FIPS  : chr  "27" "53" "53" "53" ...
##  $ CNTY_FIPS   : chr  "077" "019" "065" "047" ...
##  $ FIPS        : chr  "27077" "53019" "53065" "53047" ...
##  $ POP1990     : int  4076 6295 30948 33350 8915 8332 17481 59218 12121 5046 ...
##  $ POP1999     : int  4597 7150 39965 38596 11788 9840 18691 72458 12524 4653 ...
##  $ POP90_SQMI  : int  2 3 12 6 6 7 5 11 4 3 ...
##  $ HOUSEHOLDS  : int  1576 2247 11241 12654 3395 2857 6668 22834 3816 1922 ...
##  $ MALES       : int  2037 3280 15454 16828 4426 4252 8777 29316 5985 2486 ...
##  $ FEMALES     : int  2039 3015 15494 16522 4489 4080 8704 29902 6136 2560 ...
##  $ WHITE       : int  4042 5084 28747 27615 8640 7950 17103 57897 5258 4900 ...
##  $ BLACK       : int  1 20 65 52 12 3 11 56 11 5 ...
##  $ AMERI_ES    : int  19 1131 1807 3597 206 150 282 880 6823 118 ...
##  $ ASIAN_PI    : int  10 24 179 166 25 26 54 240 10 16 ...
##  $ OTHER       : int  4 36 150 1920 32 203 31 145 19 7 ...
##  $ HISPANIC    : int  25 85 483 2779 120 310 197 616 78 36 ...
##  $ AGE_UNDER5  : int  337 486 2271 2536 660 635 1250 4211 1337 367 ...
##  $ AGE_5_17    : int  791 1499 7486 7051 1963 2065 3979 12538 3159 1110 ...
##  $ AGE_18_29   : int  549 887 3586 4492 996 1138 2175 7480 1892 606 ...
##  $ AGE_30_49   : int  1122 1929 9605 9749 2670 2351 5410 19245 3209 1457 ...
##  $ AGE_50_64   : int  584 827 4145 4890 1384 1121 2525 8028 1364 687 ...
##  $ AGE_65_UP   : int  693 667 3855 4632 1242 1022 2142 7716 1160 819 ...
##  $ NEVERMARRY  : int  538 1118 4037 5042 1124 1247 2265 8150 2275 723 ...
##  $ MARRIED     : int  2102 2741 14795 15320 4349 3935 8604 28719 4526 2432 ...
##  $ SEPARATED   : int  24 82 384 561 114 89 218 713 155 46 ...
##  $ WIDOWED     : int  248 251 1486 1889 507 391 846 3004 621 294 ...
##  $ DIVORCED    : int  200 498 2096 2406 641 480 1177 4489 679 306 ...
##  $ HSEHLD_1_M  : int  185 302 1146 1410 373 300 820 2484 394 264 ...
##  $ HSEHLD_1_F  : int  184 195 1210 1633 402 299 734 3019 435 302 ...
##  $ MARHH_CHD   : int  465 652 3618 3355 961 938 2075 6721 1258 542 ...
##  $ MARHH_NO_C  : int  565 660 3591 3946 1157 928 2127 7223 824 594 ...
##  $ MHH_CHILD   : int  26 90 292 401 75 55 166 403 162 35 ...
##  $ FHH_CHILD   : int  57 169 731 933 229 164 369 1400 483 102 ...
##  $ HSE_UNITS   : int  3050 3239 14601 16629 5404 3242 8002 26979 4797 2354 ...
##  $ VACANT      : int  1474 992 3360 3975 2009 385 1334 4145 981 432 ...
##  $ OWNER_OCC   : int  1332 1568 8566 8439 2500 2237 4888 16131 2325 1381 ...
##  $ RENTER_OCC  : int  244 679 2675 4215 895 620 1780 6703 1491 541 ...
##  $ MEDIAN_VAL  : int  40900 50100 55900 50300 49500 49500 48900 64200 43800 39100 ...
##  $ MEDIANRENT  : int  185 197 231 222 237 217 205 272 147 189 ...
##  $ UNITS_1DET  : int  1927 2128 10388 11281 3915 2393 5349 18087 3172 1713 ...
##  $ UNITS_1ATT  : int  14 10 109 147 29 7 44 578 158 19 ...
##  $ UNITS2      : int  24 40 133 346 41 31 103 700 226 47 ...
##  $ UNITS3_9    : int  29 51 324 816 97 107 219 1607 212 121 ...
##  $ UNITS10_49  : int  80 44 264 286 89 34 198 976 149 84 ...
##  $ UNITS50_UP  : int  0 0 0 0 0 0 0 267 0 0 ...
##  $ MOBILEHOME  : int  937 936 3264 3431 1185 643 1941 4463 815 341 ...
##  $ NO_FARMS87  : int  222 218 1073 1476 227 297 245 825 405 393 ...
##  $ AVG_SIZE87  : int  536 3489 490 907 276 267 251 313 4065 2879 ...
##  $ CROP_ACR87  : int  83787 29482 131700 144053 22923 51806 19418 105359 484202 671593 ...
##  $ AVG_SALE87  : int  27958 22155 18138 71970 10367 29463 9346 24714 85215 74509 ...
##  $ Shape_Length: num  370200 361955 453727 582790 298910 ...
##  $ Shape_Area  : num  4.62e+09 5.91e+09 6.55e+09 1.37e+10 3.74e+09 ...
##  $ Shape       :sfc_MULTIPOLYGON of length 3140; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:24, 1:2] 49032 49036 67391 67147 64277 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "Shape"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:53] "NAME" "STATE_NAME" "STATE_FIPS" "CNTY_FIPS" ...
str(Terr.USA)
## Formal class 'RasterLayer' [package "raster"] with 12 slots
##   ..@ file    :Formal class '.RasterFile' [package "raster"] with 13 slots
##   .. .. ..@ name        : chr "C:\\Rizwan\\Education\\UofM PhD Work\\Seminar in Earth Sciences\\Exams\\Final_Exam\\ExamData\\Terrain1.tif"
##   .. .. ..@ datanotation: chr "FLT4S"
##   .. .. ..@ byteorder   : chr "little"
##   .. .. ..@ nodatavalue : num -Inf
##   .. .. ..@ NAchanged   : logi FALSE
##   .. .. ..@ nbands      : int 1
##   .. .. ..@ bandorder   : chr "BIL"
##   .. .. ..@ offset      : int 0
##   .. .. ..@ toptobottom : logi TRUE
##   .. .. ..@ blockrows   : int 128
##   .. .. ..@ blockcols   : int 128
##   .. .. ..@ driver      : chr "gdal"
##   .. .. ..@ open        : logi FALSE
##   ..@ data    :Formal class '.SingleLayerData' [package "raster"] with 13 slots
##   .. .. ..@ values    : logi(0) 
##   .. .. ..@ offset    : num 0
##   .. .. ..@ gain      : num 1
##   .. .. ..@ inmemory  : logi FALSE
##   .. .. ..@ fromdisk  : logi TRUE
##   .. .. ..@ isfactor  : logi FALSE
##   .. .. ..@ attributes: list()
##   .. .. ..@ haveminmax: logi TRUE
##   .. .. ..@ min       : num -84.7
##   .. .. ..@ max       : num 4191
##   .. .. ..@ band      : int 1
##   .. .. ..@ unit      : chr ""
##   .. .. ..@ names     : chr "Terrain1"
##   ..@ legend  :Formal class '.RasterLegend' [package "raster"] with 5 slots
##   .. .. ..@ type      : chr(0) 
##   .. .. ..@ values    : logi(0) 
##   .. .. ..@ color     : logi(0) 
##   .. .. ..@ names     : logi(0) 
##   .. .. ..@ colortable: logi(0) 
##   ..@ title   : chr(0) 
##   ..@ extent  :Formal class 'Extent' [package "raster"] with 4 slots
##   .. .. ..@ xmin: num -2453944
##   .. .. ..@ xmax: num 2315056
##   .. .. ..@ ymin: num -252919
##   .. .. ..@ ymax: num 3723081
##   ..@ rotated : logi FALSE
##   ..@ rotation:Formal class '.Rotation' [package "raster"] with 2 slots
##   .. .. ..@ geotrans: num(0) 
##   .. .. ..@ transfun:function ()  
##   ..@ ncols   : int 4769
##   ..@ nrows   : int 3976
##   ..@ crs     :Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs"
##   ..@ history : list()
##   ..@ z       : list()
head(Data)
## # A tibble: 6 x 9
##     FID AREANAME    CLASS ST    STFIPS HOUSEUNITS POPULATION   lat  long
##   <int> <chr>       <chr> <chr>  <int>      <int>      <int> <dbl> <dbl>
## 1     0 Abbeville   city  AL         1       1320       3173  31.6 -85.3
## 2     1 Adamsville  city  AL         1       1554       4161  33.6 -87.0
## 3     2 Addison     town  AL         1        286        626  34.2 -87.2
## 4     3 Akron       town  AL         1        220        468  32.9 -87.7
## 5     4 Alabaster   city  AL         1       5144      14732  33.2 -86.8
## 6     5 Albertville city  AL         1       6238      14507  34.3 -86.2
cat("Number of NA value in the data table is: ", sum(is.na(Data)), sep = '')
## Number of NA value in the data table is: 0

Read all layers from the geodatabase

LayerList <- st_layers(dsn1)
LayerList
## Driver: OpenFileGDB 
## Available layers:
##     layer_name     geometry_type features fields
## 1 USA_counties     Multi Polygon     3140     53
## 2    USA_roads Multi Line String      679      8
class(LayerList)
## [1] "sf_layers"
LayerList[1]
## $name
## [1] "USA_counties" "USA_roads"

Question 1

Name = LayerList$name
Layers <- lapply(c(1:length(Name)), function(i) { 
  st_read(dsn1, layer = Name[i])
})
## Reading layer `USA_counties' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 3140 features and 53 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -6293474 ymin: 311822.4 xmax: 2256319 ymax: 6198811
## projected CRS:  NAD83 / Conus Albers
## Reading layer `USA_roads' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 679 features and 8 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: -6146140 ymin: 316215.5 xmax: 2242259 ymax: 5729382
## projected CRS:  NAD83 / Conus Albers
names(Layers) <- Name
Counties <- Layers$USA_counties
Roads <- Layers$USA_roads
Counties_NoAlHw <- subset(Counties, Counties$STATE_NAME != "Alaska" & Counties$STATE_NAME != "Hawaii")
tm_shape(Counties_NoAlHw)+tm_polygons()+
  tm_shape(Roads)+tm_lines(col = 'blue')+
  tm_layout(title = " United States of America", title.position = c('center', 'top'))+
  tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
  tm_scale_bar(position = c('left', 'bottom'), text.size = 1)

# Converting the data frame to a sf object
cities <- st_as_sf(Data, coords = c('long', 'lat'), crs = 4269)
cities_NoAlHw <- subset(cities, cities$ST != "AK" & cities$ST != "HI")

# Transforming projection to match the counties/roads projection system
cities_transform <- st_transform(cities_NoAlHw, st_crs(Counties))
tm_shape(Counties_NoAlHw)+tm_polygons()+
  tm_shape(Roads)+tm_lines(col = 'blue')+
  tm_shape(cities_transform)+tm_dots()+
  tm_layout(title = "Cities of the USA", title.position = c('center', 'top'))+
  tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
  tm_scale_bar(position = c('left', 'bottom'), text.size = 1)

How many cities located within 20 km buffer from the roads?

Roads_join <- Roads %>% st_buffer(20000) %>% st_join(cities_transform)
nCities <- Roads_join %>% dplyr::select(CLASS, ST, POPULATION)%>% group_by(CLASS) %>%
  summarise(NumberCities = n()) %>% arrange(desc(NumberCities))
## `summarise()` ungrouping output (override with `.groups` argument)
Tot.Cities <- as.data.frame(nCities[1,1:2])
Tot.Cities
##   CLASS NumberCities                          Shape
## 1  city        11693 POLYGON ((-172664.1 313901....

How many cities by states and mean population

byStates <- Roads_join %>% dplyr::select(CLASS, POPULATION, ST) %>% 
  group_by(ST) %>% summarise(NumCity = n(),
                             meanPop = mean(POPULATION))
## `summarise()` ungrouping output (override with `.groups` argument)
byStates
## Simple feature collection with 50 features and 3 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: -6166138 ymin: 296217 xmax: 2262258 ymax: 5749381
## projected CRS:  NAD83 / Conus Albers
## # A tibble: 50 x 4
##    ST    NumCity meanPop                                                   Shape
##    <chr>   <int>   <dbl>                                           <POLYGON [m]>
##  1 AL        431  14918. ((605172 844872, 605664.8 845040.9, 607639.6 845718.1,~
##  2 AR        196   8479. ((519860 1341225, 511849.2 1340868, 502569.7 1339073, ~
##  3 AZ        118  61477. ((-1306491 1059745, -1306767 1059910, -1307640 1060496~
##  4 CA       1504  59945. ((-1799446 1283609, -1784884 1281231, -1783792 1281022~
##  5 CO        291  26914. ((-898139.5 1419245, -900468.6 1417787, -901400.2 1417~
##  6 CT        226  22421. ((1818971 2189031, 1818460 2189958, 1817999 2190910, 1~
##  7 DC         13 606900  ((1575079 1836797, 1574722 1837460, 1574276 1838403, 1~
##  8 DE         83   9860. ((1656571 2017097, 1656952 2017930, 1657486 2018938, 1~
##  9 FL       1503  19044. ((1610630 448013.4, 1611450 443844.5, 1611959 441435.4~
## 10 GA        540  13737. ((1343886 954809.4, 1343753 955305, 1343523 956404.4, ~
## # ... with 40 more rows

Question 2

cities_transform$Elevation <- extract(Terr.USA, cities_transform)
elev_sum <- cities_transform %>% dplyr::select(ST, Elevation) %>% 
  group_by(ST) %>% 
  summarise(meanElev = mean(Elevation, na.rm = TRUE),
            stdvElev = sd(Elevation, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
elev_sum
## Simple feature collection with 49 features and 3 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: -2345385 ymin: 279478.7 xmax: 2250123 ymax: 3164337
## projected CRS:  NAD83 / Conus Albers
## # A tibble: 49 x 4
##    ST    meanElev stdvElev                                              geometry
##    <chr>    <dbl>    <dbl>                                        <GEOMETRY [m]>
##  1 AL       156.      92.4 MULTIPOINT ((712433.5 1069506), (712934.2 1147641), ~
##  2 AR       137.     104.  MULTIPOINT ((130060.1 1459923), (135039.4 1469705), ~
##  3 AZ      1025.     616.  MULTIPOINT ((-1743743 1219525), (-1735191 1229715), ~
##  4 CA       248.     370.  MULTIPOINT ((-2345385 2109126), (-2337473 2302033), ~
##  5 CO      1852.     506.  MULTIPOINT ((-1122675 1712945), (-1115604 1649092), ~
##  6 CT        80.9     75.6 MULTIPOINT ((1846420 2335730), (1851766 2264549), (1~
##  7 DC        17.3     NA                                 POINT (1620172 1926646)
##  8 DE        15.1     19.6 MULTIPOINT ((1707720 2033320), (1710851 2040225), (1~
##  9 FL        13.7     16.3 MULTIPOINT ((827611.7 887086.3), (831081.8 916217.6)~
## 10 GA       185.     127.  MULTIPOINT ((949183.4 1365255), (956970 1322049), (9~
## # ... with 39 more rows

Question 3

tmap_mode("plot")
## tmap mode set to plotting
States <- Counties_NoAlHw %>% group_by(STATE_NAME) %>% summarise(Age_5_17 = sum(AGE_5_17),
                                                                 Age_65_up = sum(AGE_65_UP),
                                                                 Age_Diff = (Age_5_17) - (Age_65_up))
## `summarise()` ungrouping output (override with `.groups` argument)
tm_shape(States)+tm_fill(col = "Age_Diff", palette = "YlGn", midpoint = NA)+
  tm_borders()+
  tm_layout(title = "Map of Age Difference by States", title.position = c('center', 'top'))+
  tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
  tm_scale_bar(position = c('center', 'bottom'), text.size = 0.8)