Chapter 3 Questions (Attribute Data Operations)
#1. Create a new object called us_states_name that contains only the NAME
# column from the us_states object. What is the class of the new object &
# what makes it geographic?
us_states_name <- us_states %>% dplyr::select(NAME)
class(us_states_name)
## [1] "sf" "data.frame"
# It is geographic because it has geometry and attributes.
#2. Select columns from the us_states object which contain population data.
# Obtain the same result using a different command.
popData1 <- us_states %>% dplyr::select(total_pop_10, total_pop_15)
popData2 <- us_states %>% dplyr::select(contains("total_pop"))
popData3 <- us_states %>% dplyr::select(starts_with("total_pop"))
#3. Find all states with the following characteristics.
#a) Belong to the Midwest region.
us_states %>% filter(REGION == "Midwest")
## Simple feature collection with 12 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -104.0577 ymin: 35.99568 xmax: -80.51869 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 18 Indiana Midwest 93648.4 [km^2] 6417398 6568645
## 2 20 Kansas Midwest 213037.1 [km^2] 2809329 2892987
## 3 27 Minnesota Midwest 218566.5 [km^2] 5241914 5419171
## 4 29 Missouri Midwest 180716.3 [km^2] 5922314 6045448
## 5 38 North Dakota Midwest 183177.8 [km^2] 659858 721640
## 6 46 South Dakota Midwest 199766.8 [km^2] 799462 843190
## 7 17 Illinois Midwest 145993.1 [km^2] 12745359 12873761
## 8 19 Iowa Midwest 145743.7 [km^2] 3016267 3093526
## 9 26 Michigan Midwest 151119.0 [km^2] 9952687 9900571
## 10 31 Nebraska Midwest 200272.3 [km^2] 1799125 1869365
## geometry
## 1 MULTIPOLYGON (((-87.52404 4...
## 2 MULTIPOLYGON (((-102.0517 4...
## 3 MULTIPOLYGON (((-97.22904 4...
## 4 MULTIPOLYGON (((-95.76565 4...
## 5 MULTIPOLYGON (((-104.0487 4...
## 6 MULTIPOLYGON (((-104.0577 4...
## 7 MULTIPOLYGON (((-91.41942 4...
## 8 MULTIPOLYGON (((-96.45326 4...
## 9 MULTIPOLYGON (((-85.63002 4...
## 10 MULTIPOLYGON (((-104.0531 4...
#b) Belong to the West region, have an area below 250,000 km^2^ *and* in 2015
# a population greater than 5,000,000 residents.
us_states %>% filter(REGION == "West", AREA < units::set_units(250000, km^2),
total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 53 Washington West 175436 [km^2] 6561297 6985464
## geometry
## 1 MULTIPOLYGON (((-122.7699 4...
#c) Belong to the South region, had an area larger than 150,000 km^2^ or a
# total population in 2015 larger than 7,000,000 residents.
us_states %>% filter(REGION == "South", AREA > units::set_units(150000, km^2),
total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 12 Florida South 151052.0 [km^2] 18511620 19645772
## 2 13 Georgia South 152725.2 [km^2] 9468815 10006693
## 3 48 Texas South 687714.3 [km^2] 24311891 26538614
## geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
#4. What was the total population in 2015 in the us_states dataset?
# What was the minimum and maximum total population in 2015?
us_states %>% summarize(total_pop = sum(total_pop_15, na.rm = TRUE),
min_pop = min(total_pop_15),
max_pop = max(total_pop_15))
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## total_pop min_pop max_pop geometry
## 1 314375347 579679 38421464 MULTIPOLYGON (((-81.81169 2...
# The total population in 2015 was 314375347.
# The minimum population was 579679, and the maximum was 38421464.
#5. How many states are there in each region?
us_states %>% group_by(REGION) %>%
summarize(num_of_states = n()) # There are 9 states in the Northeast region,
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
## REGION num_of_states geometry
## <fct> <int> <MULTIPOLYGON [°]>
## 1 Norteast 9 (((-75.55945 39.62981, -75.50974 39.68611, -75.41506 3~
## 2 Midwest 12 (((-87.80048 42.49192, -87.83477 42.30152, -87.80007 4~
## 3 South 17 (((-81.81169 24.56874, -81.74565 24.65988, -81.44351 2~
## 4 West 11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32.85~
# 12 in the Midwest, 17 in the South, and 11 in the West.
#6. What was the minimum and maximum total population in 2015 in each region?
# What was the total population in 2015 in each region?
us_states %>%
group_by(REGION) %>%
summarize(min_pop = min(total_pop_15, na.rm = TRUE),
max_pop = max(total_pop_15),
tot_pop = sum(total_pop_15))
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 5
## REGION min_pop max_pop tot_pop geometry
## <fct> <dbl> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 Nortea~ 626604 19673174 5.60e7 (((-75.55945 39.62981, -75.50974 39.68611, -~
## 2 Midwest 721640 12873761 6.75e7 (((-87.80048 42.49192, -87.83477 42.30152, -~
## 3 South 647484 26538614 1.19e8 (((-81.81169 24.56874, -81.74565 24.65988, -~
## 4 West 579679 38421464 7.23e7 (((-118.6055 33.031, -118.37 32.83927, -118.~
#7. Add variables from us_states_df to us_states, and create a new object
# called us_states_stats. What function did you use and why? Which variable
# is the key in both datasets? What is the class of the new object
us_states_stats <- us_states %>%
left_join(us_states_df, by = c("NAME" = "state"))
class(us_states_stats) # The new object is of class sf and data.frame.
## [1] "sf" "data.frame"
# I used left_join because it returns only the 49 states that are in the
# us_states_stats dataset on the left, rather than all 51 that are included in
# us_states. The geometry variable is key in both datasets.
#8. us_states_df has two more rows than us_states. How can you find them?
us_states_df %>%
anti_join(st_drop_geometry(us_states), by = c("state" = "NAME"))
## # A tibble: 2 x 5
## state median_income_10 median_income_15 poverty_level_10 poverty_level_15
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alaska 29509 31455 64245 72957
## 2 Hawaii 29945 31051 124627 153944
#9. What was the population density in 2015 in each state? What was the
# population density in 2010 in each state?
us_States2 <- us_states %>%
mutate(popDen15 = total_pop_15/AREA,
popDens10 = total_pop_10/AREA)
# changing units to better evaluate
(us_States2$popDen15 > units::set_units(30, 1/km^2))
## [1] TRUE FALSE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE
## [13] TRUE FALSE FALSE TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE FALSE
## [25] FALSE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE
## [37] FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
## [49] FALSE
#10. How much has population density changed between 2010 and 2015 in each
# state? Calculate the change in percentages and map them.
us_popDen_change <- us_States2 %>%
mutate(popDen_diff_10_15 = popDen15 - popDens10,
popDen_diff_10_15p = (popDen_diff_10_15/popDen15) * 100)
plot(us_popDen_change["popDen_diff_10_15p"])

#11. Change the columns names in us_states to lowercase.
us_states %>%
setNames(tolower(colnames(.)))
#12. Using us_states and us_states_df create a new object called us_states_sel.
# The new object should have only two variables - median_income_15 and
# geometry. Change the name of the median_income_15 column to Income.
us_states_sel <- us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
dplyr::select(Income = median_income_15)
#13.Calculate the change in median income between 2010 and 2015 for each state.
us_incomeChange <- us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
mutate(incomeChange = median_income_15 - median_income_10)
us_incomeChange_reg <- us_incomeChange %>%
group_by(REGION) %>%
summarize(min_incomeChange = min(incomeChange),
mean_incomeChange = mean(incomeChange),
max_incomeChange = max(incomeChange))
## `summarise()` ungrouping output (override with `.groups` argument)
us_incomeChange_reg %>%
filter(mean_incomeChange == max(mean_incomeChange)) %>%
pull(REGION) %>%
as.character()
## [1] "Midwest"
#14. Create a raster from scratch with nine rows and columns and a resolution
# of 0.5 decimal degrees (WGS84). Fill it with random numbers. Extract the
# values of the four corner cells.
r <- raster(nrow = 9, ncol = 9, res = 0.5, xmn = 0, xmx = 4.5,
ymn = 0, ymx = 4.5, vals = rnorm(81))
r[c(2, 11, 81 - 7, 81)]
## [1] 0.84233407 1.25149683 -0.05961189 1.57170644
r[c(1, nrow(r)), c(1, ncol(r))]
## [1] -1.779550 -1.898186 -1.003249 1.571706
#15. What is the most common class of our example raster grain (hint: modal())?
grainSize <- c("clay", "silt", "sand")
grain <- raster(nrow = 6, ncol = 6, res = 0.5,
xmn = -1.5, xmx = 1.5, ymn = -1.5, ymx = 1.5,
vals = factor(sample(grainSize, 36, replace = TRUE),
levels = grainSize))
cellStats(grain, modal) %>%
factorValues(grain, .)
## VALUE
## 1 sand
factorValues(grain, modal(values(grain))) #sand
## VALUE
## 1 sand
#16. Plot the histogram and the boxplot of the
# data(dem, package = "spDataLarge") raster.
data(dem, package = "spDataLarge")
par(mfrow = c(1, 2))
hist(dem)
boxplot(dem)

Chapter 4 Questions (Spatial Data Operations)
#1. It was established in this section that Canterbury was the region of
# New Zealand containing most of 100 highest points in the country.
# How many of these high points does Canterbury Region contain?
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(nz) + qtm(nz_height)
canterbury <- nz %>% filter(Name == "Canterbury")
canterburyHeight = nz_height[canterbury, ]
nrow(canterburyHeight) # It has 70 high points.
## [1] 70
#2. Which region has the second highest number of nz_height points in, and how
# many does it have?
nz_heightCount = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_heightCount$elevation)
nz_height_combined %>%
st_set_geometry(NULL) %>%
dplyr::select(Name, count) %>%
arrange(desc(count)) %>%
slice(2) # The West Coast has the second highest number of high points, 22.
## Name count
## 1 West Coast 22
#3. Generalizing the question to all regions: how many of New Zealand’s 16
# regions contain points which belong to the top 100 highest points in the
# country? Which regions?
nz_heightCount = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_heightCount$elevation)
nz_height_combined %>%
st_set_geometry(NULL) %>%
dplyr::select(Name, count) %>%
arrange(desc(count)) %>%
na.omit()
## Name count
## 1 Canterbury 70
## 2 West Coast 22
## 3 Waikato 3
## 4 Manawatu-Wanganui 2
## 5 Otago 2
## 6 Southland 1
## 7 Marlborough 1
#4. Use data(dem, package = "spDataLarge"), and reclassify the elevation in
# three classes: low, medium and high. Secondly, attach the NDVI raster
# (data(ndvi, package = "spDataLarge")) and compute the mean NDVI and the
# mean elevation for each altitudinal class.
library(classInt)
data(ndvi, package = "spDataLarge")
summary(dem)
## dem
## Min. 238
## 1st Qu. 366
## Median 478
## 3rd Qu. 748
## Max. 1094
## NA's 0
brk = classIntervals(values(dem), n = 3)$brk
reclass <- matrix(c(brk[1] - 1, brk[2], 1, brk[2], brk[3], 2, brk[3],
brk[4], 3), ncol = 3, byrow = TRUE)
# reclassifying the altitudinal raster
reclass_alt <- reclassify(dem, rcl = reclass)
# computing the means for each class
zonal(stack(dem, ndvi), reclass_alt, fun = "mean")
## zone dem ndvi
## [1,] 1 329.7571 -0.3473349
## [2,] 2 492.8862 -0.1311101
## [3,] 3 846.9908 -0.2944226