rm(list = ls()) # Clears the global environment for a fresh start
cat('\f') # Cleans the console
my_libtrary <- c("sf", "dplyr", "spData", "raster", "stringr", "tidyr", "tmap", "classInt")
lapply(my_libtrary, library, character.only = TRUE)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'spData' was built under R version 4.0.3
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
##
## extract
For these exercises we used the us_states and us_states_df datasets from the spData package.
data("us_states")
data("us_states_df")
The class of the new object is us_states which automatically added while performing the selection makes the new object geographic.
us_states_name = us_states %>% dplyr::select(NAME)
class(us_states_name)
## [1] "sf" "data.frame"
# Method 1
pop_1 = us_states[5:6]
# Method 1
pop_2 = us_states %>% dplyr::select(total_pop_10, total_pop_15)
# Method 3
pop_3 = us_states %>% dplyr::select(contains("pop"))
Belong to the Midwest region:
us_states_midwest = us_states %>% filter(REGION == "Midwest")
Belong to the West region, have an area below 250,000 km2 and in 2015 a population greater than 5,000,000 residents.
us_states_west = us_states %>%
filter(REGION == "West", AREA < units::set_units(250000, km^2),total_pop_15 > 5000000)
Belong to the South region, had an area larger than 150,000 km2 or a total population in 2015 larger than 7,000,000 residents.
us_states_south = us_states %>%
filter(REGION == "South", AREA > units::set_units(150000, km^2), total_pop_15 > 7000000)
us_states %>% summarize(total_pop = sum(total_pop_15),
min_pop = min(total_pop_15),
max_pop = max(total_pop_15))
## Simple feature collection with 1 feature and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## total_pop min_pop max_pop geometry
## 1 314375347 579679 38421464 MULTIPOLYGON (((-81.81169 2...
THe total popiulation in 2015 is 314,375,347, the minimum population is 579,679, and the maximum population is 38,421,464.
us_states %>%
group_by(REGION) %>%
summarize(nr_of_states = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
## REGION nr_of_states geometry
## <fct> <int> <MULTIPOLYGON [°]>
## 1 Norteast 9 (((-75.55945 39.62981, -75.50974 39.68611, -75.41506 39~
## 2 Midwest 12 (((-87.80048 42.49192, -87.83477 42.30152, -87.80007 42~
## 3 South 17 (((-81.81169 24.56874, -81.74565 24.65988, -81.44351 24~
## 4 West 11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32.851~
Number of states per region: Region: #States: Northeast 9
Midwest 12
South 17
West 11
The output shows the minimum and maximum total population, and total population in 2015 in each region.
us_states %>%
group_by(REGION) %>%
summarize(min_pop = min(total_pop_15),
max_pop = max(total_pop_15),
tot_pop = sum(total_pop_15))
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 5
## REGION min_pop max_pop tot_pop geometry
## <fct> <dbl> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 Nortea~ 626604 19673174 5.60e7 (((-75.55945 39.62981, -75.50974 39.68611, -~
## 2 Midwest 721640 12873761 6.75e7 (((-87.80048 42.49192, -87.83477 42.30152, -~
## 3 South 647484 26538614 1.19e8 (((-81.81169 24.56874, -81.74565 24.65988, -~
## 4 West 579679 38421464 7.23e7 (((-118.6055 33.031, -118.37 32.83927, -118.~
We used the left_join function as it reserves the ,font color = “blue”>us_states dataset. The key variables in both datasets are NAME, and state. The class of the new object is <font color = "blue>sf dataframe.
us_states_stats = us_states %>%
left_join(us_states_df, by = c("NAME" = "state"))
class(us_states_stats)
## [1] "sf" "data.frame"
We can use the dplyr::anti_join function to find the answer.
us_states_df %>%
anti_join(st_drop_geometry(us_states), by = c("state" = "NAME"))
## # A tibble: 2 x 5
## state median_income_10 median_income_15 poverty_level_10 poverty_level_15
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alaska 29509 31455 64245 72957
## 2 Hawaii 29945 31051 124627 153944
The following will calculate the population density for each state in the year 2010 and 2015.
us_states2 = us_states %>%
mutate(pop_dens_15 = total_pop_15/AREA,
pop_dens_10 = total_pop_10/AREA)
The following will calculate the difference between the population density of year 2010 and 2010. It also computes the changes in percentages and mapped them using plot function.
us_popdens_change = us_states2 %>%
mutate(pop_dens_diff_10_15 = pop_dens_15 - pop_dens_10,
pop_dens_diff_10_15p = (pop_dens_diff_10_15/pop_dens_15) * 100)
plot(us_popdens_change["pop_dens_diff_10_15p"])
The following code will change the column names.
us_states %>%
setNames(tolower(colnames(.)))
## Simple feature collection with 49 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
## geoid name region area total_pop_10 total_pop_15
## 1 01 Alabama South 133709.27 [km^2] 4712651 4830620
## 2 04 Arizona West 295281.25 [km^2] 6246816 6641928
## 3 08 Colorado West 269573.06 [km^2] 4887061 5278906
## 4 09 Connecticut Norteast 12976.59 [km^2] 3545837 3593222
## 5 12 Florida South 151052.01 [km^2] 18511620 19645772
## 6 13 Georgia South 152725.21 [km^2] 9468815 10006693
## 7 16 Idaho West 216512.66 [km^2] 1526797 1616547
## 8 18 Indiana Midwest 93648.40 [km^2] 6417398 6568645
## 9 20 Kansas Midwest 213037.08 [km^2] 2809329 2892987
## 10 22 Louisiana South 122345.76 [km^2] 4429940 4625253
## geometry
## 1 MULTIPOLYGON (((-88.20006 3...
## 2 MULTIPOLYGON (((-114.7196 3...
## 3 MULTIPOLYGON (((-109.0501 4...
## 4 MULTIPOLYGON (((-73.48731 4...
## 5 MULTIPOLYGON (((-81.81169 2...
## 6 MULTIPOLYGON (((-85.60516 3...
## 7 MULTIPOLYGON (((-116.916 45...
## 8 MULTIPOLYGON (((-87.52404 4...
## 9 MULTIPOLYGON (((-102.0517 4...
## 10 MULTIPOLYGON (((-92.01783 2...
us_states_sel = us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
dplyr::select(Income = median_income_15)
# Income change per state
us_income_change = us_states %>%
left_join(us_states_df, by = c("NAME" = "state")) %>%
mutate(income_change = median_income_15 - median_income_10)
# Minimum, average, and maximum income change per region
us_income_change_reg = us_income_change %>%
group_by(REGION) %>%
summarize(min_income_change = min(income_change),
mean_income_change = mean(income_change),
max_income_change = max(income_change))
## `summarise()` ungrouping output (override with `.groups` argument)
# selecting region with largest increase of the median income
us_income_change_reg %>%
filter(mean_income_change == max(mean_income_change)) %>%
pull(REGION) %>%
as.character()
## [1] "Midwest"
The region with largest increase of the median income is the “Midwest”.
r = raster(nrow = 9, ncol = 9, res = 0.5, xmn = 0, xmx = 4.5,
ymn = 0, ymx = 4.5, vals = rnorm(81))
r[c(1, 9, 81 - 9, 81)]
## [1] 0.37201451 0.09911746 0.48155547 0.66006495
r[c(1, nrow(r)), c(1, ncol(r))]
## [1] 0.37201451 0.09911746 -0.76723987 0.66006495
grain_size = c("clay", "silt", "sand")
grain = raster(nrow = 6, ncol = 6, res = 0.5,
xmn = -1.5, xmx = 1.5, ymn = -1.5, ymx = 1.5,
vals = factor(sample(grain_size, 36, replace = TRUE),
levels = grain_size))
cellStats(grain, modal) %>%
factorValues(grain, .)
## VALUE
## 1 sand
factorValues(grain, modal(values(grain)))
## VALUE
## 1 sand
The most common class of the example raster is Clay.
Plot histogram and boxplot of the data(dem, package = "spDataLarge) raster.
data(dem, package = "spDataLarge")
par(mfrow = c(1, 2))
hist(dem)
boxplot(dem)
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(nz) + qtm(nz_height)
canterbury = nz %>% filter(Name == "Canterbury")
canterbury_height = nz_height[canterbury, ]
nrow(canterbury_height)
## [1] 70
The number of high points contained by the Canterbury region is 70.
nz_height_count = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_height_count$elevation)
nz_height_combined %>%
st_set_geometry(NULL) %>%
dplyr::select(Name, count) %>%
arrange(desc(count)) %>%
slice(2)
## Name count
## 1 West Coast 22
The second highest number of nz_height points is in West Coast and the number of points is 22.
nz_height_count = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_height_count$elevation)
tbl_region = nz_height_combined %>%
st_set_geometry(NULL) %>%
dplyr::select(Name, count) %>%
arrange(desc(count)) %>%
na.omit()
Only 7 of the 16 regions have top 100 highest points within the country. The regions are listed in the output above and listed as table in order of points and their names in tbl_region object.
data(dem, package = "spDataLarge")
data(ndvi, package = "spDataLarge")
summary(dem)
## dem
## Min. 238
## 1st Qu. 366
## Median 478
## 3rd Qu. 748
## Max. 1094
## NA's 0
brk = classIntervals(values(dem), n = 3)$brk
rcl = matrix(c(brk[1] - 1, brk[2], 1, brk[2], brk[3], 2, brk[3], brk[4], 3),
ncol = 3, byrow = TRUE)
recl = reclassify(dem, rcl = rcl)
zonal(stack(dem, ndvi), recl, fun = "mean")
## zone dem ndvi
## [1,] 1 329.7571 -0.3473349
## [2,] 2 492.8862 -0.1311101
## [3,] 3 846.9908 -0.2944226
The above reclassifies the elevation in three classes. The class 1 = Low, Class 2 = Medium, and class 3 = High. The output also shows the mean NDVI, and the mean elevation of each class.