Problem Set 2

The object us_states from the spData package is a simple feature data frame from the U.S. Census Bureau. The variables include the name, region, area, and population.

  1. Create a new data frame containing only the population information. (10)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp)
library(RColorBrewer)
library(tmaptools)
library(rgeos)
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-2 
##  Polygon checking: TRUE
library(rgdal)
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/JoshuaOsula/OneDrive - Jacksonville State University/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/JoshuaOsula/OneDrive - Jacksonville State University/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(ggplot2)
usppl = us_states %>%
  select(total_pop_10, total_pop_15)
usppl
## Simple feature collection with 49 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    total_pop_10 total_pop_15                       geometry
## 1       4712651      4830620 MULTIPOLYGON (((-88.20006 3...
## 2       6246816      6641928 MULTIPOLYGON (((-114.7196 3...
## 3       4887061      5278906 MULTIPOLYGON (((-109.0501 4...
## 4       3545837      3593222 MULTIPOLYGON (((-73.48731 4...
## 5      18511620     19645772 MULTIPOLYGON (((-81.81169 2...
## 6       9468815     10006693 MULTIPOLYGON (((-85.60516 3...
## 7       1526797      1616547 MULTIPOLYGON (((-116.916 45...
## 8       6417398      6568645 MULTIPOLYGON (((-87.52404 4...
## 9       2809329      2892987 MULTIPOLYGON (((-102.0517 4...
## 10      4429940      4625253 MULTIPOLYGON (((-92.01783 2...
  1. Create a new data frame containing only states from the South region. (10)
Sreg = us_states %>%
  filter(REGION == "South")
head(Sreg)
## Simple feature collection with 6 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -103.0024 ymin: 24.55868 xmax: -78.54109 ymax: 37.0001
## geographic CRS: NAD83
##   GEOID           NAME REGION             AREA total_pop_10 total_pop_15
## 1    01        Alabama  South 133709.27 [km^2]      4712651      4830620
## 2    12        Florida  South 151052.01 [km^2]     18511620     19645772
## 3    13        Georgia  South 152725.21 [km^2]      9468815     10006693
## 4    22      Louisiana  South 122345.76 [km^2]      4429940      4625253
## 5    40       Oklahoma  South 180971.40 [km^2]      3675339      3849733
## 6    45 South Carolina  South  80903.58 [km^2]      4511428      4777576
##                         geometry
## 1 MULTIPOLYGON (((-88.20006 3...
## 2 MULTIPOLYGON (((-81.81169 2...
## 3 MULTIPOLYGON (((-85.60516 3...
## 4 MULTIPOLYGON (((-92.01783 2...
## 5 MULTIPOLYGON (((-103.0022 3...
## 6 MULTIPOLYGON (((-83.10861 3...
  1. Create a new data frame containing only states from the West region having area less then 250,000 square km and a 2015 population more than 5,000,000 residents. Hint: you will need to use as.numeric(AREA) to remove the units. (10)
Wreg = us_states %>%
  filter(REGION == "West", as.numeric(AREA) < 250000, total_pop_15 > 5000000)
Wreg
## Simple feature collection with 1 feature and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
##   GEOID       NAME REGION          AREA total_pop_10 total_pop_15
## 1    53 Washington   West 175436 [km^2]      6561297      6985464
##                         geometry
## 1 MULTIPOLYGON (((-122.7699 4...
  1. What was the total population of the Midwest region in 2010 and 2015? (20)
USppl = us_states %>%
  select(total_pop_10, total_pop_15, "REGION") %>%
  filter (REGION == "Midwest") %>%
  summarise(t10 = sum(total_pop_10), 
            t15 = sum(total_pop_15)) %>%
  glimpse()
## Rows: 1
## Columns: 3
## $ t10      <dbl> 66514091
## $ t15      <dbl> 67546398
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-87.80048 4...
  1. How many states are in each region? (20)
us_states %>%
  group_by(REGION) %>%
  summarize(number_of_states = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
##   REGION   number_of_states                                             geometry
##   <fct>               <int>                                   <MULTIPOLYGON [°]>
## 1 Norteast                9 (((-75.55945 39.62981, -75.50974 39.68611, -75.4150~
## 2 Midwest                12 (((-87.80048 42.49192, -87.83477 42.30152, -87.8000~
## 3 South                  17 (((-81.81169 24.56874, -81.74565 24.65988, -81.4435~
## 4 West                   11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32~

Extra Credit. Make a bar chart showing the total area in millions of square kilometers by region. Hint: include stat = "identity" in the geom_bar() function. (10)

us_states %>%
  group_by(REGION) %>%
  summarize(AREA = n()) %>%
  ggplot(aes(x = REGION, y = AREA, fill = AREA)) +
  geom_bar(stat="identity", fill = "yellowgreen") +
  xlab("Region") + 
  ylab("Total Area in Millions km^2") +
  geom_text(aes(label = AREA), just = -.5, size = 3.5) +
  theme_minimal() + 
  theme(legend.position = "none")
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown parameters: just

  1. Create a map using the Sac.metro.tracts data from lesson 7 of the medincome by county and then compare it with the map of total population for the counties. Include a north arrow, legend, neatline, and scale bar. (30)
download.file(url = "https://raw.githubusercontent.com/crd150/data/master/SacramentoMetroTracts.zip", destfile = "SacramentoMetroTracts.zip")
unzip(zipfile = "SacramentoMetroTracts.zip")
sac.metro.tracts <- st_read("SacMetroTractsIncome.shp", stringsAsFactors = FALSE)
## Reading layer `SacMetroTractsIncome' from data source `C:\R\SacMetroTractsIncome.shp' using driver `ESRI Shapefile'
## Simple feature collection with 486 features and 6 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8773 ymax: 39.31645
## geographic CRS: NAD83
Jmap1 = tm_shape(sac.metro.tracts) +
  tm_polygons("medincome", 
              style = "quantile",
              palette = "seq") +
  tm_layout(legend.outside = TRUE) + 
  tm_scale_bar() +
  tm_compass() + 
  tm_layout(frame.double.line = TRUE)

Jmap2 = tm_shape(sac.metro.tracts) +
  tm_borders() +
tm_fill("totp",
          title = "Total Population \nby County") + 
  tm_layout(legend.outside = TRUE) + 
  tm_scale_bar() +
  tm_compass() + 
  tm_layout(frame.double.line = TRUE)

tmap_arrange(Jmap1, Jmap2)