Problem Set 2

The object us_states from the spData package is a simple feature data frame from the U.S. Census Bureau. The variables include the name, region, area, and population.

  1. Create a new data frame containing only the population information. (10)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp)
library(RColorBrewer)
library(tmaptools)
library(rgeos)
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-2 
##  Polygon checking: TRUE
library(rgdal)
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/JoshuaOsula/OneDrive - Jacksonville State University/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/JoshuaOsula/OneDrive - Jacksonville State University/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(ggplot2)
usppl = us_states %>%
  select(total_pop_10, total_pop_15)
usppl
## Simple feature collection with 49 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    total_pop_10 total_pop_15                       geometry
## 1       4712651      4830620 MULTIPOLYGON (((-88.20006 3...
## 2       6246816      6641928 MULTIPOLYGON (((-114.7196 3...
## 3       4887061      5278906 MULTIPOLYGON (((-109.0501 4...
## 4       3545837      3593222 MULTIPOLYGON (((-73.48731 4...
## 5      18511620     19645772 MULTIPOLYGON (((-81.81169 2...
## 6       9468815     10006693 MULTIPOLYGON (((-85.60516 3...
## 7       1526797      1616547 MULTIPOLYGON (((-116.916 45...
## 8       6417398      6568645 MULTIPOLYGON (((-87.52404 4...
## 9       2809329      2892987 MULTIPOLYGON (((-102.0517 4...
## 10      4429940      4625253 MULTIPOLYGON (((-92.01783 2...
  1. Create a new data frame containing only states from the South region. (10)
Sreg = us_states %>%
  filter(REGION == "South")
head(Sreg)
## Simple feature collection with 6 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -103.0024 ymin: 24.55868 xmax: -78.54109 ymax: 37.0001
## geographic CRS: NAD83
##   GEOID           NAME REGION             AREA total_pop_10 total_pop_15
## 1    01        Alabama  South 133709.27 [km^2]      4712651      4830620
## 2    12        Florida  South 151052.01 [km^2]     18511620     19645772
## 3    13        Georgia  South 152725.21 [km^2]      9468815     10006693
## 4    22      Louisiana  South 122345.76 [km^2]      4429940      4625253
## 5    40       Oklahoma  South 180971.40 [km^2]      3675339      3849733
## 6    45 South Carolina  South  80903.58 [km^2]      4511428      4777576
##                         geometry
## 1 MULTIPOLYGON (((-88.20006 3...
## 2 MULTIPOLYGON (((-81.81169 2...
## 3 MULTIPOLYGON (((-85.60516 3...
## 4 MULTIPOLYGON (((-92.01783 2...
## 5 MULTIPOLYGON (((-103.0022 3...
## 6 MULTIPOLYGON (((-83.10861 3...
  1. Create a new data frame containing only states from the West region having area less then 250,000 square km and a 2015 population more than 5,000,000 residents. Hint: you will need to use as.numeric(AREA) to remove the units. (10)
Wreg = us_states %>%
  filter(REGION == "West", as.numeric(AREA) < 250000, total_pop_15 > 5000000)
Wreg
## Simple feature collection with 1 feature and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
##   GEOID       NAME REGION          AREA total_pop_10 total_pop_15
## 1    53 Washington   West 175436 [km^2]      6561297      6985464
##                         geometry
## 1 MULTIPOLYGON (((-122.7699 4...
  1. What was the total population of the Midwest region in 2010 and 2015? (20)
USppl = us_states %>%
  select(total_pop_10, total_pop_15, "REGION") %>%
  filter (REGION == "Midwest") %>%
  summarise(t10 = sum(total_pop_10), 
            t15 = sum(total_pop_15)) %>%
  glimpse()
## Rows: 1
## Columns: 3
## $ t10      <dbl> 66514091
## $ t15      <dbl> 67546398
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-87.80048 4...
  1. How many states are in each region? (20)
us_states %>%
  group_by(REGION) %>%
  summarize(number_of_states = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
##   REGION   number_of_states                                             geometry
##   <fct>               <int>                                   <MULTIPOLYGON [°]>
## 1 Norteast                9 (((-75.55945 39.62981, -75.50974 39.68611, -75.4150~
## 2 Midwest                12 (((-87.80048 42.49192, -87.83477 42.30152, -87.8000~
## 3 South                  17 (((-81.81169 24.56874, -81.74565 24.65988, -81.4435~
## 4 West                   11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32~

Extra Credit. Make a bar chart showing the total area in millions of square kilometers by region. Hint: include stat = "identity" in the geom_bar() function. (10)

us_states %>%
  group_by(REGION) %>%
  summarize(AREA = n()) %>%
  ggplot(aes(x = REGION, y = AREA, fill = AREA)) +
  geom_bar(stat="identity", fill = "yellowgreen") +
  xlab("Region") + 
  ylab("Total Area in Millions km^2") +
  geom_text(aes(label = AREA), just = -.5, size = 3.5) +
  theme_minimal() + 
  theme(legend.position = "none")
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown parameters: just

  1. Create a map using the Sac.metro.tracts data from lesson 7 of the medincome by county and then compare it with the map of total population for the counties. Include a north arrow, legend, neatline, and scale bar. (30)
download.file(url = "https://raw.githubusercontent.com/crd150/data/master/SacramentoMetroTracts.zip", destfile = "SacramentoMetroTracts.zip")
unzip(zipfile = "SacramentoMetroTracts.zip")
sac.metro.tracts <- st_read("SacMetroTractsIncome.shp", stringsAsFactors = FALSE)
## Reading layer `SacMetroTractsIncome' from data source `C:\R\SacMetroTractsIncome.shp' using driver `ESRI Shapefile'
## Simple feature collection with 486 features and 6 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8773 ymax: 39.31645
## geographic CRS: NAD83
ca.race <- read.csv("https://raw.githubusercontent.com/crd150/data/master/californiatractsrace.csv")
sac.metro.tracts$GEOID = as.numeric(sac.metro.tracts$GEOID)
sac.metro.tracts <- sac.metro.tracts %>%
left_join(ca.race, by = "GEOID")
sac.metro.tracts
## Simple feature collection with 486 features and 10 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8773 ymax: 39.31645
## geographic CRS: NAD83
## First 10 features:
##         GEOID               tract           county      state medincome totp
## 1  6017030200    Census Tract 302 El Dorado County California     54071 5299
## 2  6017030301 Census Tract 303.01 El Dorado County California     48269 2874
## 3  6017030302 Census Tract 303.02 El Dorado County California     36782 2718
## 4  6017030401 Census Tract 304.01 El Dorado County California     56339 3710
## 5  6017030402 Census Tract 304.02 El Dorado County California     37868 3972
## 6  6017030502 Census Tract 305.02 El Dorado County California     89583 2638
## 7  6017030504 Census Tract 305.04 El Dorado County California     70435 2440
## 8  6017030505 Census Tract 305.05 El Dorado County California     63194 2352
## 9  6017030601 Census Tract 306.01 El Dorado County California     90517 5162
## 10 6017030602 Census Tract 306.02 El Dorado County California     67168 7325
##     pnhwhite      pnhasn      pnhblk      phisp                       geometry
## 1  0.5423665 0.040007549 0.010190602 0.36818268 POLYGON ((-120.0182 38.9383...
## 2  0.6022965 0.013569937 0.003479471 0.36986778 POLYGON ((-119.99 38.92282,...
## 3  0.7671082 0.009933775 0.009197940 0.20161884 POLYGON ((-119.9991 38.9376...
## 4  0.8652291 0.021563342 0.004043127 0.09326146 POLYGON ((-120.0229 38.9340...
## 5  0.6724572 0.045317221 0.019385700 0.23136959 POLYGON ((-120.0271 38.8884...
## 6  0.8931008 0.002274450 0.003032600 0.07429871 POLYGON ((-120.0458 38.9227...
## 7  0.7754098 0.043852459 0.000000000 0.11926230 POLYGON ((-120.0055 38.8751...
## 8  0.8728741 0.004676871 0.007227891 0.07738095 POLYGON ((-120.0509 38.7499...
## 9  0.9254165 0.007555211 0.000000000 0.02828361 POLYGON ((-121.141 38.71198...
## 10 0.8647099 0.003822526 0.004232082 0.10662116 POLYGON ((-120.9813 38.8511...
spdf.sac <- as(sac.metro.tracts, "Spatial")
spdf.sac <- spTransform(spdf.sac,
                        CRS = CRS("+proj=merc +ellps+GRS80 +units=m"))
is.projected(spdf.sac)
## [1] TRUE
slotNames(spdf.sac)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
plot(spdf.sac)

spdf.sac@bbox
##         min       max
## x -13627960 -13344679
## y   4555708   4740025
Jmap1 = tm_shape(spdf.sac)+
  tm_polygons("totp",
  style = "quantile",
  palette = "seq") +
  tm_legend(legend.outside = TRUE)+
  tm_layout(title = "Total Population",
            title.size = 1.1,
            title.position = c("center", "top")) +
  tm_layout(inner.margins = c(0.06, 0.10, 0.18, 0.08)) +
  tm_scale_bar(color.dark = "black",
               position = c("right, bottom")) +
  tm_compass(type = "4star", size = 4, text.size = 0.5,
             color.dark = "gray40", text.color = "black",
             position = c("left", "top"))

  
Jmap2 = tm_shape(spdf.sac)+
  tm_polygons("medincome",
  style = "quantile",
  palette = "seq") +
  tm_legend(legend.outside = TRUE) +
  tm_layout(title = "medincome",
            title.size = 1.1,
            title.position = c("center", "top")) +
  tm_layout(inner.margins = c(0.06, 0.10, 0.18, 0.08)) +  
  tm_scale_bar(color.dark = "black",
               position = c("right, bottom")) +
  tm_compass(type = "4star", size = 4, text.size = 0.5,
             color.dark = "gray40", text.color = "black",
             position = c("left", "top"))