R Markdown
library(sf)
## Warning: package 'sf' was built under R version 3.6.3
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(raster)
## Warning: package 'raster' was built under R version 3.6.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
##
## extract
library(spData)
## Warning: package 'spData' was built under R version 3.6.3
library(spData)
data(us_states)
data(us_states_df)
# 1. what is the class of the new object and what makes it geographic?
us_states_name = us_states %>% dplyr::select(NAME)
class(us_states_name)
## [1] "sf" "data.frame"
attributes(us_states_name)
## $names
## [1] "NAME" "geometry"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
##
## $class
## [1] "sf" "data.frame"
##
## $sf_column
## [1] "geometry"
##
## $agr
## NAME
## <NA>
## Levels: constant aggregate identity
# Classes are sf and data.frame
# sf class with crs makes it geographic
# 2. Select columns from the us_states object which contain population data. #Obtain the same result using a different command
us_states %>% dplyr::select(total_pop_10, total_pop_15)
## Simple feature collection with 49 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
## total_pop_10 total_pop_15 geometry
## 1 4712651 4830620 MULTIPOLYGON (((-88.20006 3...
## 2 6246816 6641928 MULTIPOLYGON (((-114.7196 3...
## 3 4887061 5278906 MULTIPOLYGON (((-109.0501 4...
## 4 3545837 3593222 MULTIPOLYGON (((-73.48731 4...
## 5 18511620 19645772 MULTIPOLYGON (((-81.81169 2...
## 6 9468815 10006693 MULTIPOLYGON (((-85.60516 3...
## 7 1526797 1616547 MULTIPOLYGON (((-116.916 45...
## 8 6417398 6568645 MULTIPOLYGON (((-87.52404 4...
## 9 2809329 2892987 MULTIPOLYGON (((-102.0517 4...
## 10 4429940 4625253 MULTIPOLYGON (((-92.01783 2...
# 3. find all states w the following characteristics
#Belong to the Midwest region.
us_states %>%
filter(REGION == "Midwest")
## Simple feature collection with 12 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -104.0577 ymin: 35.99568 xmax: -80.51869 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 18 Indiana Midwest 93648.4 [km^2] 6417398 6568645
## 2 20 Kansas Midwest 213037.1 [km^2] 2809329 2892987
## 3 27 Minnesota Midwest 218566.5 [km^2] 5241914 5419171
## 4 29 Missouri Midwest 180716.3 [km^2] 5922314 6045448
## 5 38 North Dakota Midwest 183177.8 [km^2] 659858 721640
## 6 46 South Dakota Midwest 199766.8 [km^2] 799462 843190
## 7 17 Illinois Midwest 145993.1 [km^2] 12745359 12873761
## 8 19 Iowa Midwest 145743.7 [km^2] 3016267 3093526
## 9 26 Michigan Midwest 151119.0 [km^2] 9952687 9900571
## 10 31 Nebraska Midwest 200272.3 [km^2] 1799125 1869365
## geometry
## 1 MULTIPOLYGON (((-87.52404 4...
## 2 MULTIPOLYGON (((-102.0517 4...
## 3 MULTIPOLYGON (((-97.22904 4...
## 4 MULTIPOLYGON (((-95.76565 4...
## 5 MULTIPOLYGON (((-104.0487 4...
## 6 MULTIPOLYGON (((-104.0577 4...
## 7 MULTIPOLYGON (((-91.41942 4...
## 8 MULTIPOLYGON (((-96.45326 4...
## 9 MULTIPOLYGON (((-85.63002 4...
## 10 MULTIPOLYGON (((-104.0531 4...
#Belong to the West region, have an area below 250,000 km^2^ *and* in 2015 a population greater than 5,000,000 residents (hint: you may need to use the function `units::set_units()` or `as.numeric()`).
us_states %>% filter(REGION == "West", AREA < units::set_units(250000, km^2),total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 53 Washington West 175436 [km^2] 6561297 6985464
## geometry
## 1 MULTIPOLYGON (((-122.7699 4...
us_states %>% filter(REGION == "West", as.numeric(AREA) < 250000,total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 53 Washington West 175436 [km^2] 6561297 6985464
## geometry
## 1 MULTIPOLYGON (((-122.7699 4...
#Belong to the South region, had an area larger than 150,000 km^2^ or a total population in 2015 larger than 7,000,000 residents.
us_states %>% filter(REGION == "South", AREA > units::set_units(150000, km^2), total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 12 Florida South 151052.0 [km^2] 18511620 19645772
## 2 13 Georgia South 152725.2 [km^2] 9468815 10006693
## 3 48 Texas South 687714.3 [km^2] 24311891 26538614
## geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
us_states %>% filter(REGION == "South", as.numeric(AREA) > 150000, total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
## GEOID NAME REGION AREA total_pop_10 total_pop_15
## 1 12 Florida South 151052.0 [km^2] 18511620 19645772
## 2 13 Georgia South 152725.2 [km^2] 9468815 10006693
## 3 48 Texas South 687714.3 [km^2] 24311891 26538614
## geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
# 4. what was total pop in 2015 in the us_states dataset? what was the min and max total pop in 2015
us_states %>% summarize(total_pop = sum(total_pop_15),
min_pop = min(total_pop_15),
max_pop = max(total_pop_15))
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## total_pop min_pop max_pop geometry
## 1 314375347 579679 38421464 MULTIPOLYGON (((-81.81169 2...
# how many states in each region
us_states %>%
group_by(REGION) %>%
summarize(nr_of_states = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
## REGION nr_of_states geometry
## * <fct> <int> <MULTIPOLYGON [°]>
## 1 Norteast 9 (((-75.55945 39.62981, -75.50974 39.68611, -75.41506 39~
## 2 Midwest 12 (((-87.80048 42.49192, -87.83477 42.30152, -87.80007 42~
## 3 South 17 (((-81.81169 24.56874, -81.74565 24.65988, -81.44351 24~
## 4 West 11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32.851~
# Northeast = 9, Midwest = 12, South = 17, West = 11
# 6. What was the minimum and maximum total population in 2015 in each region? #What was the total population in 2015 in each region?
us_states %>%
group_by(REGION) %>%
summarize(min_pop = min(total_pop_15),
max_pop = max(total_pop_15),
tot_pop = sum(total_pop_15))
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 5
## REGION min_pop max_pop tot_pop geometry
## * <fct> <dbl> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 Nortea~ 626604 19673174 5.60e7 (((-75.55945 39.62981, -75.50974 39.68611, -~
## 2 Midwest 721640 12873761 6.75e7 (((-87.80048 42.49192, -87.83477 42.30152, -~
## 3 South 647484 26538614 1.19e8 (((-81.81169 24.56874, -81.74565 24.65988, -~
## 4 West 579679 38421464 7.23e7 (((-118.6055 33.031, -118.37 32.83927, -118.~
# Northeast:Min=626604,Max=1.97e7; Midwest:Min=721640,Max=1.29e7; South:Min=647484,Max=2.65e7; West:Min=579679,Max=3.84e7
#Northeast=5.60e7;Midwest=6.75e7;South=1.19e8;West=7.23e7
# 7. Add variables from us_states_df to us_states, and create a new object called us_states_stats. #What function did you use and why? Which variable is the key in both datasets? #What is the class of the new object?
us_states_stats = us_states %>%
left_join(us_states_df, by = c("NAME" = "state"))
class(us_states_stats)
## [1] "sf" "data.frame"
# left_join because it combines key variables. it is "name"
# 8. us_states_df has two more rows than us_states. How can you find them? #(hint: try to use the dplyr::anti_join() function)
us_states_df %>%
anti_join(st_drop_geometry(us_states), by = c("state" = "NAME"))
## # A tibble: 2 x 5
## state median_income_10 median_income_15 poverty_level_10 poverty_level_15
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alaska 29509 31455 64245 72957
## 2 Hawaii 29945 31051 124627 153944
# 9. What was the population density in 2015 in each state? #What was the population density in 2010 in each state?
us_states2 = us_states %>%
mutate(pop_dens_15 = total_pop_15/AREA,
pop_dens_10 = total_pop_10/AREA)
# 10. How much has population density changed between 2010 and 2015 in each state? #Calculate the change in percentages and map them.
us_popdens_change = us_states2 %>%
mutate(pop_dens_diff_10_15 = pop_dens_15 - pop_dens_10,
pop_dens_diff_10_15p = (pop_dens_diff_10_15/pop_dens_15) * 100)
plot(us_popdens_change["pop_dens_diff_10_15p"])

# 11. Change the columns names in us_states to lowercase. #(Hint: helper functions - tolower() and colnames() may help).
us_states %>%
setNames(tolower(colnames(.)))
## Simple feature collection with 49 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
## geoid name region area total_pop_10 total_pop_15
## 1 01 Alabama South 133709.27 [km^2] 4712651 4830620
## 2 04 Arizona West 295281.25 [km^2] 6246816 6641928
## 3 08 Colorado West 269573.06 [km^2] 4887061 5278906
## 4 09 Connecticut Norteast 12976.59 [km^2] 3545837 3593222
## 5 12 Florida South 151052.01 [km^2] 18511620 19645772
## 6 13 Georgia South 152725.21 [km^2] 9468815 10006693
## 7 16 Idaho West 216512.66 [km^2] 1526797 1616547
## 8 18 Indiana Midwest 93648.40 [km^2] 6417398 6568645
## 9 20 Kansas Midwest 213037.08 [km^2] 2809329 2892987
## 10 22 Louisiana South 122345.76 [km^2] 4429940 4625253
## geometry
## 1 MULTIPOLYGON (((-88.20006 3...
## 2 MULTIPOLYGON (((-114.7196 3...
## 3 MULTIPOLYGON (((-109.0501 4...
## 4 MULTIPOLYGON (((-73.48731 4...
## 5 MULTIPOLYGON (((-81.81169 2...
## 6 MULTIPOLYGON (((-85.60516 3...
## 7 MULTIPOLYGON (((-116.916 45...
## 8 MULTIPOLYGON (((-87.52404 4...
## 9 MULTIPOLYGON (((-102.0517 4...
## 10 MULTIPOLYGON (((-92.01783 2...
# 12. Using us_states and us_states_df create a new object called us_states_sel. #The new object should have only two variables - median_income_15 and geometry. #Change the name of the median_income_15 column to Income.
us_states_sel = us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
dplyr::select(Income = median_income_15)
# 13. Calculate the change in median income between 2010 and 2015 for each state. #Bonus: what was the minimum, average and maximum median income in 2015 for each region? #What is the region with the largest increase of the median income?
us_income_change = us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
mutate(income_change = median_income_15 - median_income_10)
us_income_change_reg = us_income_change %>%
group_by(REGION) %>%
summarize(min_income_change = min(income_change),
mean_income_change = mean(income_change),
max_income_change = max(income_change))
## `summarise()` ungrouping output (override with `.groups` argument)
# 14. Create a raster from scratch with nine rows and columns and a resolution of 0.5 decimal degrees (WGS84). #Fill it with random numbers. Extract the values of the four corner cells.
r = raster(nrow = 9, ncol = 9, res = 0.5, xmn = 0, xmx = 4.5,
ymn = 0, ymx = 4.5, vals = rnorm(81))
r[c(1, 9, 81 - 9, 81)]
## [1] 1.6187955 0.3095989 -2.0557295 0.2341100
r[c(1, nrow(r)), c(1, ncol(r))]
## [1] 1.6187955 0.3095989 0.2469161 0.2341100
# 15. What is the most common class of our example raster grain (hint: modal())?
grain_size = c("clay", "silt", "sand")
grain = raster(nrow = 6, ncol = 6, res = 0.5,
xmn = -1.5, xmx = 1.5, ymn = -1.5, ymx = 1.5,
vals = factor(sample(grain_size, 36, replace = TRUE),
levels = grain_size))
cellStats(grain, modal) %>%
factorValues(grain, .)
## VALUE
## 1 sand
factorValues(grain, modal(values(grain)))
## VALUE
## 1 sand
# sand
# 16.Plot the histogram and the boxplot of the data(dem, package = “spDataLarge”) raster.
install.packages("spDataLarge", repos = "https://nowosad.github.io/drat/", type = "source")
## Installing package into 'C:/Users/rache/Documents/R/win-library/3.6'
## (as 'lib' is unspecified)
data(dem, package = "spDataLarge")
par(mfrow = c(1, 2))
hist(dem)
boxplot(dem)

library(sf)
library(raster)
library(dplyr)
library(spData)
# Chapter 4
#1. It was established in Section 4.2 that Canterbury was the region of New Zealand containing most of the 100 highest points in the country. #How many of these high points does the Canterbury region contain?
library(tmap)
## Warning: package 'tmap' was built under R version 3.6.3
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(nz) + qtm(nz_height)
canterbury = nz %>% filter(Name == "Canterbury")
canterbury_height = nz_height[canterbury, ]
nrow(canterbury_height)
## [1] 70
# 70
# 2. Which region has the second highest number of nz_height points in, and how many does it have?
nz_height_count = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_height_count$elevation)
nz_height_combined %>%
st_set_geometry(NULL) %>%
dplyr::select(Name, count) %>%
arrange(desc(count)) %>%
slice(2)
## Name count
## 1 West Coast 22
# west coast; 22
# 3.Generalizing the question to all regions: #how many of New Zealand’s 16 regions contain points which belong to the top 100 highest points in the country? #Which regions? #Bonus: create a table listing these regions in order of the number of points and their name.
nz_height_count = aggregate(nz_height, nz, length)
# 4. Use data(dem, package = “spDataLarge”), and reclassify the elevation in three classes: low, medium and high. #Secondly, attach the NDVI raster (data(ndvi, package = “spDataLarge”)) and compute the mean NDVI and the mean elevation for each altitudinal class.
data(dem, package = "spDataLarge")
data(ndvi, package = "spDataLarge")
summary(dem)
## dem
## Min. 238
## 1st Qu. 366
## Median 478
## 3rd Qu. 748
## Max. 1094
## NA's 0
#keep getting errors for classInt package