## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
## Warning: package 'sp' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
## 
##     extract

Chapter 3 Questions (Attribute Data Operations)

#1. Create a new object called us_states_name that contains only the NAME
  # column from the us_states object. What is the class of the new object & 
  # what makes it geographic?

us_states_name <- us_states %>% dplyr::select(NAME)
class(us_states_name) 
## [1] "sf"         "data.frame"
# It is geographic because it has geometry and attributes.

#2. Select columns from the us_states object which contain population data. 
  # Obtain the same result using a different command.
popData1 <- us_states %>% dplyr::select(total_pop_10, total_pop_15)
popData2 <- us_states %>% dplyr::select(contains("total_pop"))
popData3 <- us_states %>% dplyr::select(starts_with("total_pop"))

#3. Find all states with the following characteristics.
  #a) Belong to the Midwest region.
us_states %>% filter(REGION == "Midwest")
## Simple feature collection with 12 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -104.0577 ymin: 35.99568 xmax: -80.51869 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    GEOID         NAME  REGION            AREA total_pop_10 total_pop_15
## 1     18      Indiana Midwest  93648.4 [km^2]      6417398      6568645
## 2     20       Kansas Midwest 213037.1 [km^2]      2809329      2892987
## 3     27    Minnesota Midwest 218566.5 [km^2]      5241914      5419171
## 4     29     Missouri Midwest 180716.3 [km^2]      5922314      6045448
## 5     38 North Dakota Midwest 183177.8 [km^2]       659858       721640
## 6     46 South Dakota Midwest 199766.8 [km^2]       799462       843190
## 7     17     Illinois Midwest 145993.1 [km^2]     12745359     12873761
## 8     19         Iowa Midwest 145743.7 [km^2]      3016267      3093526
## 9     26     Michigan Midwest 151119.0 [km^2]      9952687      9900571
## 10    31     Nebraska Midwest 200272.3 [km^2]      1799125      1869365
##                          geometry
## 1  MULTIPOLYGON (((-87.52404 4...
## 2  MULTIPOLYGON (((-102.0517 4...
## 3  MULTIPOLYGON (((-97.22904 4...
## 4  MULTIPOLYGON (((-95.76565 4...
## 5  MULTIPOLYGON (((-104.0487 4...
## 6  MULTIPOLYGON (((-104.0577 4...
## 7  MULTIPOLYGON (((-91.41942 4...
## 8  MULTIPOLYGON (((-96.45326 4...
## 9  MULTIPOLYGON (((-85.63002 4...
## 10 MULTIPOLYGON (((-104.0531 4...
  #b) Belong to the West region, have an area below 250,000 km^2^ *and* in 2015 
   # a population greater than 5,000,000 residents.
us_states %>% filter(REGION == "West", AREA < units::set_units(250000, km^2),
                     total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
##   GEOID       NAME REGION          AREA total_pop_10 total_pop_15
## 1    53 Washington   West 175436 [km^2]      6561297      6985464
##                         geometry
## 1 MULTIPOLYGON (((-122.7699 4...
  #c) Belong to the South region, had an area larger than 150,000 km^2^ or a 
    # total population in 2015 larger than 7,000,000 residents.
us_states %>% filter(REGION == "South", AREA > units::set_units(150000, km^2), 
                     total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
##   GEOID    NAME REGION            AREA total_pop_10 total_pop_15
## 1    12 Florida  South 151052.0 [km^2]     18511620     19645772
## 2    13 Georgia  South 152725.2 [km^2]      9468815     10006693
## 3    48   Texas  South 687714.3 [km^2]     24311891     26538614
##                         geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
#4. What was the total population in 2015 in the us_states dataset? 
  # What was the minimum and maximum total population in 2015?
us_states %>% summarize(total_pop = sum(total_pop_15, na.rm = TRUE),
                        min_pop = min(total_pop_15),
                        max_pop = max(total_pop_15))
## Simple feature collection with 1 feature and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
##   total_pop min_pop  max_pop                       geometry
## 1 314375347  579679 38421464 MULTIPOLYGON (((-81.81169 2...
# The total population in 2015 was 314375347. 
# The minimum population was 579679, and the maximum was 38421464.

#5. How many states are there in each region?
us_states %>% group_by(REGION) %>%
  summarize(num_of_states = n()) # There are 9 states in the Northeast region, 
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
##   REGION   num_of_states                                                geometry
##   <fct>            <int>                                      <MULTIPOLYGON [°]>
## 1 Norteast             9 (((-75.55945 39.62981, -75.50974 39.68611, -75.41506 3~
## 2 Midwest             12 (((-87.80048 42.49192, -87.83477 42.30152, -87.80007 4~
## 3 South               17 (((-81.81169 24.56874, -81.74565 24.65988, -81.44351 2~
## 4 West                11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32.85~
# 12 in the Midwest, 17 in the South, and 11 in the West.

#6. What was the minimum and maximum total population in 2015 in each region? 
  # What was the total population in 2015 in each region?
us_states %>%
  group_by(REGION) %>%
  summarize(min_pop = min(total_pop_15, na.rm = TRUE),
            max_pop = max(total_pop_15),
            tot_pop = sum(total_pop_15))
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 4 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 5
##   REGION  min_pop  max_pop tot_pop                                      geometry
##   <fct>     <dbl>    <dbl>   <dbl>                            <MULTIPOLYGON [°]>
## 1 Nortea~  626604 19673174  5.60e7 (((-75.55945 39.62981, -75.50974 39.68611, -~
## 2 Midwest  721640 12873761  6.75e7 (((-87.80048 42.49192, -87.83477 42.30152, -~
## 3 South    647484 26538614  1.19e8 (((-81.81169 24.56874, -81.74565 24.65988, -~
## 4 West     579679 38421464  7.23e7 (((-118.6055 33.031, -118.37 32.83927, -118.~
#7. Add variables from us_states_df to us_states, and create a new object 
  # called us_states_stats. What function did you use and why? Which variable 
  # is the key in both datasets? What is the class of the new object
us_states_stats <-  us_states %>%
  left_join(us_states_df, by = c("NAME" = "state"))
class(us_states_stats) # The new object is of class sf and data.frame.
## [1] "sf"         "data.frame"
# I used left_join because it returns only the 49 states that are in the 
# us_states_stats dataset on the left, rather than all 51 that are included in 
# us_states. The geometry variable is key in both datasets.

#8. us_states_df has two more rows than us_states. How can you find them?
us_states_df %>%
  anti_join(st_drop_geometry(us_states), by = c("state" = "NAME"))
## # A tibble: 2 x 5
##   state  median_income_10 median_income_15 poverty_level_10 poverty_level_15
##   <chr>             <dbl>            <dbl>            <dbl>            <dbl>
## 1 Alaska            29509            31455            64245            72957
## 2 Hawaii            29945            31051           124627           153944
#9. What was the population density in 2015 in each state? What was the 
  # population density in 2010 in each state?
us_States2 <-  us_states %>%
  mutate(popDen15 = total_pop_15/AREA,
         popDens10 = total_pop_10/AREA)
# changing units to better evaluate
(us_States2$popDen15 > units::set_units(30, 1/km^2))
##  [1]  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE
## [13]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE
## [25] FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE
## [37] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
## [49] FALSE
#10. How much has population density changed between 2010 and 2015 in each 
  # state? Calculate the change in percentages and map them.
us_popDen_change <-  us_States2 %>%
  mutate(popDen_diff_10_15 = popDen15 - popDens10,
         popDen_diff_10_15p = (popDen_diff_10_15/popDen15) * 100)
plot(us_popDen_change["popDen_diff_10_15p"])

#11. Change the columns names in us_states to lowercase. 
us_states %>%
  setNames(tolower(colnames(.)))
#12. Using us_states and us_states_df create a new object called us_states_sel.
  # The new object should have only two variables - median_income_15 and 
  # geometry. Change the name of the median_income_15 column to Income.
us_states_sel <-  us_states %>%
  left_join(us_states_df, by = c("NAME" = "state")) %>%
  dplyr::select(Income = median_income_15)

#13.Calculate the change in median income between 2010 and 2015 for each state.
us_incomeChange <-  us_states %>%
  left_join(us_states_df, by = c("NAME" = "state")) %>%
  mutate(incomeChange = median_income_15 - median_income_10) 

us_incomeChange_reg <-  us_incomeChange %>%
  group_by(REGION) %>%
  summarize(min_incomeChange = min(incomeChange),
            mean_incomeChange = mean(incomeChange),
            max_incomeChange = max(incomeChange))
## `summarise()` ungrouping output (override with `.groups` argument)
us_incomeChange_reg %>%
  filter(mean_incomeChange == max(mean_incomeChange)) %>%
  pull(REGION) %>%
  as.character()
## [1] "Midwest"
#14. Create a raster from scratch with nine rows and columns and a resolution 
  # of 0.5 decimal degrees (WGS84). Fill it with random numbers. Extract the 
  # values of the four corner cells.
r <-  raster(nrow = 9, ncol = 9, res = 0.5, xmn = 0, xmx = 4.5,
           ymn = 0, ymx = 4.5, vals = rnorm(81))
r[c(2, 11, 81 - 7, 81)]
## [1]  0.84233407  1.25149683 -0.05961189  1.57170644
r[c(1, nrow(r)), c(1, ncol(r))]
## [1] -1.779550 -1.898186 -1.003249  1.571706
#15. What is the most common class of our example raster grain (hint: modal())?
grainSize <-  c("clay", "silt", "sand")
grain <-  raster(nrow = 6, ncol = 6, res = 0.5, 
               xmn = -1.5, xmx = 1.5, ymn = -1.5, ymx = 1.5,
               vals = factor(sample(grainSize, 36, replace = TRUE), 
                             levels = grainSize))
cellStats(grain, modal) %>%
  factorValues(grain, .)
##   VALUE
## 1  sand
factorValues(grain, modal(values(grain))) #sand
##   VALUE
## 1  sand
#16. Plot the histogram and the boxplot of the 
  # data(dem, package = "spDataLarge") raster.
data(dem, package = "spDataLarge")
par(mfrow = c(1, 2))
hist(dem)
boxplot(dem)

Chapter 4 Questions (Spatial Data Operations)

#1. It was established in this section that Canterbury was the region of 
  # New Zealand containing most of 100 highest points in the country. 
  # How many of these high points does Canterbury Region contain?
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(nz) + qtm(nz_height)
canterbury <-  nz %>% filter(Name == "Canterbury")
canterburyHeight = nz_height[canterbury, ]
nrow(canterburyHeight) # It has 70 high points.
## [1] 70
#2. Which region has the second highest number of nz_height points in, and how 
  # many does it have?
nz_heightCount = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_heightCount$elevation)
nz_height_combined %>% 
  st_set_geometry(NULL) %>% 
  dplyr::select(Name, count) %>% 
  arrange(desc(count)) %>% 
  slice(2) # The West Coast has the second highest number of high points, 22.
##         Name count
## 1 West Coast    22
#3. Generalizing the question to all regions: how many of New Zealand’s 16 
  # regions contain points which belong to the top 100 highest points in the 
  # country? Which regions?
nz_heightCount = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_heightCount$elevation)
nz_height_combined %>% 
  st_set_geometry(NULL) %>% 
  dplyr::select(Name, count) %>% 
  arrange(desc(count)) %>% 
  na.omit()
##                Name count
## 1        Canterbury    70
## 2        West Coast    22
## 3           Waikato     3
## 4 Manawatu-Wanganui     2
## 5             Otago     2
## 6         Southland     1
## 7       Marlborough     1
#4. Use data(dem, package = "spDataLarge"), and reclassify the elevation in 
  # three classes: low, medium and high. Secondly, attach the NDVI raster 
  # (data(ndvi, package = "spDataLarge")) and compute the mean NDVI and the 
  # mean elevation for each altitudinal class.
library(classInt)
data(ndvi, package = "spDataLarge")
summary(dem)
##          dem
## Min.     238
## 1st Qu.  366
## Median   478
## 3rd Qu.  748
## Max.    1094
## NA's       0
brk = classIntervals(values(dem), n = 3)$brk
reclass <-  matrix(c(brk[1] - 1, brk[2], 1, brk[2], brk[3], 2, brk[3], 
                     brk[4], 3), ncol = 3, byrow = TRUE)
# reclassifying the altitudinal raster
reclass_alt <-  reclassify(dem, rcl = reclass)
# computing the means for each class
zonal(stack(dem, ndvi), reclass_alt, fun = "mean")
##      zone      dem       ndvi
## [1,]    1 329.7571 -0.3473349
## [2,]    2 492.8862 -0.1311101
## [3,]    3 846.9908 -0.2944226