rm(list = ls()) # Clears the global environment for a fresh start
cat('\f') # Cleans the console
setwd("C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam")
getwd()
## [1] "C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam"
This code allows to load multiple library at once.
my_library <- c("sf", "dplyr", "RColorBrewer", "raster", "tmap", "leaflet", "rgdal")
lapply(my_library, library, character.only = TRUE)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/hasan/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/hasan/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
unzip(zipfile = "ExamData.zip", exdir = getwd())
path = "C:/Rizwan/Education/UofM PhD Work/Seminar in Earth Sciences/Exams/Final_Exam/ExamData"
list.files(path, pattern=NULL, all.files=FALSE,
full.names=FALSE)
## [1] "cities1.csv" "SeminarR.gdb" "Terrain1.tfw"
## [4] "Terrain1.tif" "Terrain1.tif.aux.xml" "Terrain1.tif.ovr"
dsn1 <- "ExamData/SeminarR.gdb"
dsn2 <- "ExamData/Terrain1.tif"
USA <- st_read(dsn1)
## Multiple layers are present in data source C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb, reading layer `USA_counties'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `USA_counties' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 3140 features and 53 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -6293474 ymin: 311822.4 xmax: 2256319 ymax: 6198811
## projected CRS: NAD83 / Conus Albers
Terr.USA <- raster(dsn2)
Data <- as_tibble(read.csv("ExamData/cities1.csv", header = TRUE))
str(USA)
## Classes 'sf' and 'data.frame': 3140 obs. of 54 variables:
## $ NAME : chr "Lake of the Woods" "Ferry" "Stevens" "Okanogan" ...
## $ STATE_NAME : chr "Minnesota" "Washington" "Washington" "Washington" ...
## $ STATE_FIPS : chr "27" "53" "53" "53" ...
## $ CNTY_FIPS : chr "077" "019" "065" "047" ...
## $ FIPS : chr "27077" "53019" "53065" "53047" ...
## $ POP1990 : int 4076 6295 30948 33350 8915 8332 17481 59218 12121 5046 ...
## $ POP1999 : int 4597 7150 39965 38596 11788 9840 18691 72458 12524 4653 ...
## $ POP90_SQMI : int 2 3 12 6 6 7 5 11 4 3 ...
## $ HOUSEHOLDS : int 1576 2247 11241 12654 3395 2857 6668 22834 3816 1922 ...
## $ MALES : int 2037 3280 15454 16828 4426 4252 8777 29316 5985 2486 ...
## $ FEMALES : int 2039 3015 15494 16522 4489 4080 8704 29902 6136 2560 ...
## $ WHITE : int 4042 5084 28747 27615 8640 7950 17103 57897 5258 4900 ...
## $ BLACK : int 1 20 65 52 12 3 11 56 11 5 ...
## $ AMERI_ES : int 19 1131 1807 3597 206 150 282 880 6823 118 ...
## $ ASIAN_PI : int 10 24 179 166 25 26 54 240 10 16 ...
## $ OTHER : int 4 36 150 1920 32 203 31 145 19 7 ...
## $ HISPANIC : int 25 85 483 2779 120 310 197 616 78 36 ...
## $ AGE_UNDER5 : int 337 486 2271 2536 660 635 1250 4211 1337 367 ...
## $ AGE_5_17 : int 791 1499 7486 7051 1963 2065 3979 12538 3159 1110 ...
## $ AGE_18_29 : int 549 887 3586 4492 996 1138 2175 7480 1892 606 ...
## $ AGE_30_49 : int 1122 1929 9605 9749 2670 2351 5410 19245 3209 1457 ...
## $ AGE_50_64 : int 584 827 4145 4890 1384 1121 2525 8028 1364 687 ...
## $ AGE_65_UP : int 693 667 3855 4632 1242 1022 2142 7716 1160 819 ...
## $ NEVERMARRY : int 538 1118 4037 5042 1124 1247 2265 8150 2275 723 ...
## $ MARRIED : int 2102 2741 14795 15320 4349 3935 8604 28719 4526 2432 ...
## $ SEPARATED : int 24 82 384 561 114 89 218 713 155 46 ...
## $ WIDOWED : int 248 251 1486 1889 507 391 846 3004 621 294 ...
## $ DIVORCED : int 200 498 2096 2406 641 480 1177 4489 679 306 ...
## $ HSEHLD_1_M : int 185 302 1146 1410 373 300 820 2484 394 264 ...
## $ HSEHLD_1_F : int 184 195 1210 1633 402 299 734 3019 435 302 ...
## $ MARHH_CHD : int 465 652 3618 3355 961 938 2075 6721 1258 542 ...
## $ MARHH_NO_C : int 565 660 3591 3946 1157 928 2127 7223 824 594 ...
## $ MHH_CHILD : int 26 90 292 401 75 55 166 403 162 35 ...
## $ FHH_CHILD : int 57 169 731 933 229 164 369 1400 483 102 ...
## $ HSE_UNITS : int 3050 3239 14601 16629 5404 3242 8002 26979 4797 2354 ...
## $ VACANT : int 1474 992 3360 3975 2009 385 1334 4145 981 432 ...
## $ OWNER_OCC : int 1332 1568 8566 8439 2500 2237 4888 16131 2325 1381 ...
## $ RENTER_OCC : int 244 679 2675 4215 895 620 1780 6703 1491 541 ...
## $ MEDIAN_VAL : int 40900 50100 55900 50300 49500 49500 48900 64200 43800 39100 ...
## $ MEDIANRENT : int 185 197 231 222 237 217 205 272 147 189 ...
## $ UNITS_1DET : int 1927 2128 10388 11281 3915 2393 5349 18087 3172 1713 ...
## $ UNITS_1ATT : int 14 10 109 147 29 7 44 578 158 19 ...
## $ UNITS2 : int 24 40 133 346 41 31 103 700 226 47 ...
## $ UNITS3_9 : int 29 51 324 816 97 107 219 1607 212 121 ...
## $ UNITS10_49 : int 80 44 264 286 89 34 198 976 149 84 ...
## $ UNITS50_UP : int 0 0 0 0 0 0 0 267 0 0 ...
## $ MOBILEHOME : int 937 936 3264 3431 1185 643 1941 4463 815 341 ...
## $ NO_FARMS87 : int 222 218 1073 1476 227 297 245 825 405 393 ...
## $ AVG_SIZE87 : int 536 3489 490 907 276 267 251 313 4065 2879 ...
## $ CROP_ACR87 : int 83787 29482 131700 144053 22923 51806 19418 105359 484202 671593 ...
## $ AVG_SALE87 : int 27958 22155 18138 71970 10367 29463 9346 24714 85215 74509 ...
## $ Shape_Length: num 370200 361955 453727 582790 298910 ...
## $ Shape_Area : num 4.62e+09 5.91e+09 6.55e+09 1.37e+10 3.74e+09 ...
## $ Shape :sfc_MULTIPOLYGON of length 3140; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:24, 1:2] 49032 49036 67391 67147 64277 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "Shape"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:53] "NAME" "STATE_NAME" "STATE_FIPS" "CNTY_FIPS" ...
str(Terr.USA)
## Formal class 'RasterLayer' [package "raster"] with 12 slots
## ..@ file :Formal class '.RasterFile' [package "raster"] with 13 slots
## .. .. ..@ name : chr "C:\\Rizwan\\Education\\UofM PhD Work\\Seminar in Earth Sciences\\Exams\\Final_Exam\\ExamData\\Terrain1.tif"
## .. .. ..@ datanotation: chr "FLT4S"
## .. .. ..@ byteorder : chr "little"
## .. .. ..@ nodatavalue : num -Inf
## .. .. ..@ NAchanged : logi FALSE
## .. .. ..@ nbands : int 1
## .. .. ..@ bandorder : chr "BIL"
## .. .. ..@ offset : int 0
## .. .. ..@ toptobottom : logi TRUE
## .. .. ..@ blockrows : int 128
## .. .. ..@ blockcols : int 128
## .. .. ..@ driver : chr "gdal"
## .. .. ..@ open : logi FALSE
## ..@ data :Formal class '.SingleLayerData' [package "raster"] with 13 slots
## .. .. ..@ values : logi(0)
## .. .. ..@ offset : num 0
## .. .. ..@ gain : num 1
## .. .. ..@ inmemory : logi FALSE
## .. .. ..@ fromdisk : logi TRUE
## .. .. ..@ isfactor : logi FALSE
## .. .. ..@ attributes: list()
## .. .. ..@ haveminmax: logi TRUE
## .. .. ..@ min : num -84.7
## .. .. ..@ max : num 4191
## .. .. ..@ band : int 1
## .. .. ..@ unit : chr ""
## .. .. ..@ names : chr "Terrain1"
## ..@ legend :Formal class '.RasterLegend' [package "raster"] with 5 slots
## .. .. ..@ type : chr(0)
## .. .. ..@ values : logi(0)
## .. .. ..@ color : logi(0)
## .. .. ..@ names : logi(0)
## .. .. ..@ colortable: logi(0)
## ..@ title : chr(0)
## ..@ extent :Formal class 'Extent' [package "raster"] with 4 slots
## .. .. ..@ xmin: num -2453944
## .. .. ..@ xmax: num 2315056
## .. .. ..@ ymin: num -252919
## .. .. ..@ ymax: num 3723081
## ..@ rotated : logi FALSE
## ..@ rotation:Formal class '.Rotation' [package "raster"] with 2 slots
## .. .. ..@ geotrans: num(0)
## .. .. ..@ transfun:function ()
## ..@ ncols : int 4769
## ..@ nrows : int 3976
## ..@ crs :Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr "+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs"
## ..@ history : list()
## ..@ z : list()
head(Data)
## # A tibble: 6 x 9
## FID AREANAME CLASS ST STFIPS HOUSEUNITS POPULATION lat long
## <int> <chr> <chr> <chr> <int> <int> <int> <dbl> <dbl>
## 1 0 Abbeville city AL 1 1320 3173 31.6 -85.3
## 2 1 Adamsville city AL 1 1554 4161 33.6 -87.0
## 3 2 Addison town AL 1 286 626 34.2 -87.2
## 4 3 Akron town AL 1 220 468 32.9 -87.7
## 5 4 Alabaster city AL 1 5144 14732 33.2 -86.8
## 6 5 Albertville city AL 1 6238 14507 34.3 -86.2
cat("Number of NA value in the data table is: ", sum(is.na(Data)), sep = '')
## Number of NA value in the data table is: 0
LayerList <- st_layers(dsn1)
LayerList
## Driver: OpenFileGDB
## Available layers:
## layer_name geometry_type features fields
## 1 USA_counties Multi Polygon 3140 53
## 2 USA_roads Multi Line String 679 8
class(LayerList)
## [1] "sf_layers"
LayerList[1]
## $name
## [1] "USA_counties" "USA_roads"
Name = LayerList$name
Layers <- lapply(c(1:length(Name)), function(i) {
st_read(dsn1, layer = Name[i])
})
## Reading layer `USA_counties' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 3140 features and 53 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -6293474 ymin: 311822.4 xmax: 2256319 ymax: 6198811
## projected CRS: NAD83 / Conus Albers
## Reading layer `USA_roads' from data source `C:\Rizwan\Education\UofM PhD Work\Seminar in Earth Sciences\Exams\Final_Exam\ExamData\SeminarR.gdb' using driver `OpenFileGDB'
## Simple feature collection with 679 features and 8 fields
## geometry type: MULTILINESTRING
## dimension: XY
## bbox: xmin: -6146140 ymin: 316215.5 xmax: 2242259 ymax: 5729382
## projected CRS: NAD83 / Conus Albers
names(Layers) <- Name
Counties <- Layers$USA_counties
Roads <- Layers$USA_roads
Counties_NoAlHw <- subset(Counties, Counties$STATE_NAME != "Alaska" & Counties$STATE_NAME != "Hawaii")
tm_shape(Counties_NoAlHw)+tm_polygons()+
tm_shape(Roads)+tm_lines(col = 'blue')+
tm_layout(title = " United States of America", title.position = c('center', 'top'))+
tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
tm_scale_bar(position = c('left', 'bottom'), text.size = 1)
# Converting the data frame to a sf object
cities <- st_as_sf(Data, coords = c('long', 'lat'), crs = 4269)
cities_NoAlHw <- subset(cities, cities$ST != "AK" & cities$ST != "HI")
# Transforming projection to match the counties/roads projection system
cities_transform <- st_transform(cities_NoAlHw, st_crs(Counties))
tm_shape(Counties_NoAlHw)+tm_polygons()+
tm_shape(Roads)+tm_lines(col = 'blue')+
tm_shape(cities_transform)+tm_dots()+
tm_layout(title = "Cities of the USA", title.position = c('center', 'top'))+
tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
tm_scale_bar(position = c('left', 'bottom'), text.size = 1)
Roads_join <- Roads %>% st_buffer(20000) %>% st_join(cities_transform)
nCities <- Roads_join %>% dplyr::select(CLASS, ST, POPULATION)%>% group_by(CLASS) %>%
summarise(NumberCities = n()) %>% arrange(desc(NumberCities))
## `summarise()` ungrouping output (override with `.groups` argument)
Tot.Cities <- as.data.frame(nCities[1,1:2])
Tot.Cities
## CLASS NumberCities Shape
## 1 city 11693 POLYGON ((-172664.1 313901....
byStates <- Roads_join %>% dplyr::select(CLASS, POPULATION, ST) %>%
group_by(ST) %>% summarise(NumCity = n(),
meanPop = mean(POPULATION))
## `summarise()` ungrouping output (override with `.groups` argument)
byStates
## Simple feature collection with 50 features and 3 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: -6166138 ymin: 296217 xmax: 2262258 ymax: 5749381
## projected CRS: NAD83 / Conus Albers
## # A tibble: 50 x 4
## ST NumCity meanPop Shape
## <chr> <int> <dbl> <POLYGON [m]>
## 1 AL 431 14918. ((605172 844872, 605664.8 845040.9, 607639.6 845718.1,~
## 2 AR 196 8479. ((519860 1341225, 511849.2 1340868, 502569.7 1339073, ~
## 3 AZ 118 61477. ((-1306491 1059745, -1306767 1059910, -1307640 1060496~
## 4 CA 1504 59945. ((-1799446 1283609, -1784884 1281231, -1783792 1281022~
## 5 CO 291 26914. ((-898139.5 1419245, -900468.6 1417787, -901400.2 1417~
## 6 CT 226 22421. ((1818971 2189031, 1818460 2189958, 1817999 2190910, 1~
## 7 DC 13 606900 ((1575079 1836797, 1574722 1837460, 1574276 1838403, 1~
## 8 DE 83 9860. ((1656571 2017097, 1656952 2017930, 1657486 2018938, 1~
## 9 FL 1503 19044. ((1610630 448013.4, 1611450 443844.5, 1611959 441435.4~
## 10 GA 540 13737. ((1343886 954809.4, 1343753 955305, 1343523 956404.4, ~
## # ... with 40 more rows
cities_transform$Elevation <- extract(Terr.USA, cities_transform)
elev_sum <- cities_transform %>% dplyr::select(ST, Elevation) %>%
group_by(ST) %>%
summarise(meanElev = mean(Elevation, na.rm = TRUE),
stdvElev = sd(Elevation, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
elev_sum
## Simple feature collection with 49 features and 3 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: -2345385 ymin: 279478.7 xmax: 2250123 ymax: 3164337
## projected CRS: NAD83 / Conus Albers
## # A tibble: 49 x 4
## ST meanElev stdvElev geometry
## <chr> <dbl> <dbl> <GEOMETRY [m]>
## 1 AL 156. 92.4 MULTIPOINT ((712433.5 1069506), (712934.2 1147641), ~
## 2 AR 137. 104. MULTIPOINT ((130060.1 1459923), (135039.4 1469705), ~
## 3 AZ 1025. 616. MULTIPOINT ((-1743743 1219525), (-1735191 1229715), ~
## 4 CA 248. 370. MULTIPOINT ((-2345385 2109126), (-2337473 2302033), ~
## 5 CO 1852. 506. MULTIPOINT ((-1122675 1712945), (-1115604 1649092), ~
## 6 CT 80.9 75.6 MULTIPOINT ((1846420 2335730), (1851766 2264549), (1~
## 7 DC 17.3 NA POINT (1620172 1926646)
## 8 DE 15.1 19.6 MULTIPOINT ((1707720 2033320), (1710851 2040225), (1~
## 9 FL 13.7 16.3 MULTIPOINT ((827611.7 887086.3), (831081.8 916217.6)~
## 10 GA 185. 127. MULTIPOINT ((949183.4 1365255), (956970 1322049), (9~
## # ... with 39 more rows
tmap_mode("plot")
## tmap mode set to plotting
States <- Counties_NoAlHw %>% group_by(STATE_NAME) %>% summarise(Age_5_17 = sum(AGE_5_17),
Age_65_up = sum(AGE_65_UP),
Age_Diff = (Age_5_17) - (Age_65_up))
## `summarise()` ungrouping output (override with `.groups` argument)
tm_shape(States)+tm_fill(col = "Age_Diff", palette = "YlGn", midpoint = NA)+
tm_borders()+
tm_layout(title = "Map of Age Difference by States", title.position = c('center', 'top'))+
tm_compass(type = "8star", position = c('right', 'bottom'), text.size = 1)+
tm_scale_bar(position = c('center', 'bottom'), text.size = 0.8)