R Markdown

library(sf)
## Warning: package 'sf' was built under R version 3.6.3
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(raster)
## Warning: package 'raster' was built under R version 3.6.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
## 
##     extract
library(spData)
## Warning: package 'spData' was built under R version 3.6.3
library(spData)
data(us_states)
data(us_states_df)

# 1. what is the class of the new object and what makes it geographic?

us_states_name = us_states %>% dplyr::select(NAME)
class(us_states_name)
## [1] "sf"         "data.frame"
attributes(us_states_name)
## $names
## [1] "NAME"     "geometry"
## 
## $row.names
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## 
## $class
## [1] "sf"         "data.frame"
## 
## $sf_column
## [1] "geometry"
## 
## $agr
## NAME 
## <NA> 
## Levels: constant aggregate identity
# Classes are sf and data.frame
# sf class with crs makes it geographic

# 2. Select columns from the us_states object which contain population data. #Obtain the same result using a different command 

us_states %>% dplyr::select(total_pop_10, total_pop_15)
## Simple feature collection with 49 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    total_pop_10 total_pop_15                       geometry
## 1       4712651      4830620 MULTIPOLYGON (((-88.20006 3...
## 2       6246816      6641928 MULTIPOLYGON (((-114.7196 3...
## 3       4887061      5278906 MULTIPOLYGON (((-109.0501 4...
## 4       3545837      3593222 MULTIPOLYGON (((-73.48731 4...
## 5      18511620     19645772 MULTIPOLYGON (((-81.81169 2...
## 6       9468815     10006693 MULTIPOLYGON (((-85.60516 3...
## 7       1526797      1616547 MULTIPOLYGON (((-116.916 45...
## 8       6417398      6568645 MULTIPOLYGON (((-87.52404 4...
## 9       2809329      2892987 MULTIPOLYGON (((-102.0517 4...
## 10      4429940      4625253 MULTIPOLYGON (((-92.01783 2...
# 3. find all states w the following characteristics

#Belong to the Midwest region.
us_states %>% 
  filter(REGION == "Midwest")
## Simple feature collection with 12 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -104.0577 ymin: 35.99568 xmax: -80.51869 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    GEOID         NAME  REGION            AREA total_pop_10 total_pop_15
## 1     18      Indiana Midwest  93648.4 [km^2]      6417398      6568645
## 2     20       Kansas Midwest 213037.1 [km^2]      2809329      2892987
## 3     27    Minnesota Midwest 218566.5 [km^2]      5241914      5419171
## 4     29     Missouri Midwest 180716.3 [km^2]      5922314      6045448
## 5     38 North Dakota Midwest 183177.8 [km^2]       659858       721640
## 6     46 South Dakota Midwest 199766.8 [km^2]       799462       843190
## 7     17     Illinois Midwest 145993.1 [km^2]     12745359     12873761
## 8     19         Iowa Midwest 145743.7 [km^2]      3016267      3093526
## 9     26     Michigan Midwest 151119.0 [km^2]      9952687      9900571
## 10    31     Nebraska Midwest 200272.3 [km^2]      1799125      1869365
##                          geometry
## 1  MULTIPOLYGON (((-87.52404 4...
## 2  MULTIPOLYGON (((-102.0517 4...
## 3  MULTIPOLYGON (((-97.22904 4...
## 4  MULTIPOLYGON (((-95.76565 4...
## 5  MULTIPOLYGON (((-104.0487 4...
## 6  MULTIPOLYGON (((-104.0577 4...
## 7  MULTIPOLYGON (((-91.41942 4...
## 8  MULTIPOLYGON (((-96.45326 4...
## 9  MULTIPOLYGON (((-85.63002 4...
## 10 MULTIPOLYGON (((-104.0531 4...
#Belong to the West region, have an area below 250,000 km^2^ *and* in 2015 a population greater than 5,000,000 residents (hint: you may need to use the function `units::set_units()` or `as.numeric()`).
us_states %>% filter(REGION == "West", AREA < units::set_units(250000, km^2),total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
##   GEOID       NAME REGION          AREA total_pop_10 total_pop_15
## 1    53 Washington   West 175436 [km^2]      6561297      6985464
##                         geometry
## 1 MULTIPOLYGON (((-122.7699 4...
us_states %>% filter(REGION == "West", as.numeric(AREA) < 250000,total_pop_15 > 5000000)
## Simple feature collection with 1 feature and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 45.54774 xmax: -116.916 ymax: 49.00236
## geographic CRS: NAD83
##   GEOID       NAME REGION          AREA total_pop_10 total_pop_15
## 1    53 Washington   West 175436 [km^2]      6561297      6985464
##                         geometry
## 1 MULTIPOLYGON (((-122.7699 4...
#Belong to the South region, had an area larger than 150,000 km^2^ or a total population in 2015 larger than 7,000,000 residents.
us_states %>% filter(REGION == "South", AREA > units::set_units(150000, km^2), total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
##   GEOID    NAME REGION            AREA total_pop_10 total_pop_15
## 1    12 Florida  South 151052.0 [km^2]     18511620     19645772
## 2    13 Georgia  South 152725.2 [km^2]      9468815     10006693
## 3    48   Texas  South 687714.3 [km^2]     24311891     26538614
##                         geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
us_states %>% filter(REGION == "South", as.numeric(AREA) > 150000, total_pop_15 > 7000000)
## Simple feature collection with 3 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -106.6359 ymin: 24.55868 xmax: -80.03136 ymax: 36.50044
## geographic CRS: NAD83
##   GEOID    NAME REGION            AREA total_pop_10 total_pop_15
## 1    12 Florida  South 151052.0 [km^2]     18511620     19645772
## 2    13 Georgia  South 152725.2 [km^2]      9468815     10006693
## 3    48   Texas  South 687714.3 [km^2]     24311891     26538614
##                         geometry
## 1 MULTIPOLYGON (((-81.81169 2...
## 2 MULTIPOLYGON (((-85.60516 3...
## 3 MULTIPOLYGON (((-103.0024 3...
# 4. what was total pop in 2015 in the us_states dataset? what was the min and max total pop in 2015

us_states %>% summarize(total_pop =   sum(total_pop_15), 
                      min_pop = min(total_pop_15),
                      max_pop = max(total_pop_15))
## Simple feature collection with 1 feature and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
##   total_pop min_pop  max_pop                       geometry
## 1 314375347  579679 38421464 MULTIPOLYGON (((-81.81169 2...
# how many states in each region 

us_states %>%
  group_by(REGION) %>%
  summarize(nr_of_states = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 3
##   REGION   nr_of_states                                                 geometry
## * <fct>           <int>                                       <MULTIPOLYGON [°]>
## 1 Norteast            9 (((-75.55945 39.62981, -75.50974 39.68611, -75.41506 39~
## 2 Midwest            12 (((-87.80048 42.49192, -87.83477 42.30152, -87.80007 42~
## 3 South              17 (((-81.81169 24.56874, -81.74565 24.65988, -81.44351 24~
## 4 West               11 (((-118.6055 33.031, -118.37 32.83927, -118.4963 32.851~
# Northeast = 9, Midwest = 12, South = 17, West = 11

# 6. What was the minimum and maximum total population in 2015 in each region? #What was the total population in 2015 in each region?

us_states %>%
  group_by(REGION) %>%
  summarize(min_pop = min(total_pop_15),
            max_pop = max(total_pop_15),
            tot_pop = sum(total_pop_15))
## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 4 features and 4 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## # A tibble: 4 x 5
##   REGION  min_pop  max_pop tot_pop                                      geometry
## * <fct>     <dbl>    <dbl>   <dbl>                            <MULTIPOLYGON [°]>
## 1 Nortea~  626604 19673174  5.60e7 (((-75.55945 39.62981, -75.50974 39.68611, -~
## 2 Midwest  721640 12873761  6.75e7 (((-87.80048 42.49192, -87.83477 42.30152, -~
## 3 South    647484 26538614  1.19e8 (((-81.81169 24.56874, -81.74565 24.65988, -~
## 4 West     579679 38421464  7.23e7 (((-118.6055 33.031, -118.37 32.83927, -118.~
# Northeast:Min=626604,Max=1.97e7; Midwest:Min=721640,Max=1.29e7; South:Min=647484,Max=2.65e7; West:Min=579679,Max=3.84e7
#Northeast=5.60e7;Midwest=6.75e7;South=1.19e8;West=7.23e7


# 7.  Add variables from us_states_df to us_states, and create a new object called us_states_stats. #What function did you use and why? Which variable is the key in both datasets? #What is the class of the new object?

us_states_stats = us_states %>%
  left_join(us_states_df, by = c("NAME" = "state"))
class(us_states_stats)
## [1] "sf"         "data.frame"
# left_join because it combines key variables.  it is "name"

# 8. us_states_df has two more rows than us_states. How can you find them? #(hint: try to use the dplyr::anti_join() function)

us_states_df %>%
  anti_join(st_drop_geometry(us_states), by = c("state" = "NAME"))
## # A tibble: 2 x 5
##   state  median_income_10 median_income_15 poverty_level_10 poverty_level_15
##   <chr>             <dbl>            <dbl>            <dbl>            <dbl>
## 1 Alaska            29509            31455            64245            72957
## 2 Hawaii            29945            31051           124627           153944
# 9. What was the population density in 2015 in each state? #What was the population density in 2010 in each state?

us_states2 = us_states %>%
  mutate(pop_dens_15 = total_pop_15/AREA,
         pop_dens_10 = total_pop_10/AREA)

# 10. How much has population density changed between 2010 and 2015 in each state? #Calculate the change in percentages and map them.

us_popdens_change = us_states2 %>%
  mutate(pop_dens_diff_10_15 = pop_dens_15 - pop_dens_10,
         pop_dens_diff_10_15p = (pop_dens_diff_10_15/pop_dens_15) * 100)
plot(us_popdens_change["pop_dens_diff_10_15p"])

# 11. Change the columns names in us_states to lowercase. #(Hint: helper functions - tolower() and colnames() may help).

us_states %>%
  setNames(tolower(colnames(.)))
## Simple feature collection with 49 features and 6 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7042 ymin: 24.55868 xmax: -66.9824 ymax: 49.38436
## geographic CRS: NAD83
## First 10 features:
##    geoid        name   region             area total_pop_10 total_pop_15
## 1     01     Alabama    South 133709.27 [km^2]      4712651      4830620
## 2     04     Arizona     West 295281.25 [km^2]      6246816      6641928
## 3     08    Colorado     West 269573.06 [km^2]      4887061      5278906
## 4     09 Connecticut Norteast  12976.59 [km^2]      3545837      3593222
## 5     12     Florida    South 151052.01 [km^2]     18511620     19645772
## 6     13     Georgia    South 152725.21 [km^2]      9468815     10006693
## 7     16       Idaho     West 216512.66 [km^2]      1526797      1616547
## 8     18     Indiana  Midwest  93648.40 [km^2]      6417398      6568645
## 9     20      Kansas  Midwest 213037.08 [km^2]      2809329      2892987
## 10    22   Louisiana    South 122345.76 [km^2]      4429940      4625253
##                          geometry
## 1  MULTIPOLYGON (((-88.20006 3...
## 2  MULTIPOLYGON (((-114.7196 3...
## 3  MULTIPOLYGON (((-109.0501 4...
## 4  MULTIPOLYGON (((-73.48731 4...
## 5  MULTIPOLYGON (((-81.81169 2...
## 6  MULTIPOLYGON (((-85.60516 3...
## 7  MULTIPOLYGON (((-116.916 45...
## 8  MULTIPOLYGON (((-87.52404 4...
## 9  MULTIPOLYGON (((-102.0517 4...
## 10 MULTIPOLYGON (((-92.01783 2...
# 12. Using us_states and us_states_df create a new object called us_states_sel. #The new object should have only two variables - median_income_15 and geometry. #Change the name of the median_income_15 column to Income.

us_states_sel = us_states %>%
  left_join(us_states_df, by = c("NAME" = "state")) %>%
  dplyr::select(Income = median_income_15)

# 13. Calculate the change in median income between 2010 and 2015 for each state. #Bonus: what was the minimum, average and maximum median income in 2015 for each region? #What is the region with the largest increase of the median income?

us_income_change = us_states %>%
  left_join(us_states_df, by = c("NAME" = "state")) %>%
  mutate(income_change = median_income_15 - median_income_10) 
us_income_change_reg = us_income_change %>%
  group_by(REGION) %>%
  summarize(min_income_change = min(income_change),
            mean_income_change = mean(income_change),
            max_income_change = max(income_change))
## `summarise()` ungrouping output (override with `.groups` argument)
# 14. Create a raster from scratch with nine rows and columns and a resolution of 0.5 decimal degrees (WGS84). #Fill it with random numbers. Extract the values of the four corner cells.

r = raster(nrow = 9, ncol = 9, res = 0.5, xmn = 0, xmx = 4.5,
           ymn = 0, ymx = 4.5, vals = rnorm(81))
r[c(1, 9, 81 - 9, 81)]
## [1]  1.6187955  0.3095989 -2.0557295  0.2341100
r[c(1, nrow(r)), c(1, ncol(r))]
## [1] 1.6187955 0.3095989 0.2469161 0.2341100
# 15. What is the most common class of our example raster grain (hint: modal())?

grain_size = c("clay", "silt", "sand")
grain = raster(nrow = 6, ncol = 6, res = 0.5, 
               xmn = -1.5, xmx = 1.5, ymn = -1.5, ymx = 1.5,
               vals = factor(sample(grain_size, 36, replace = TRUE), 
                             levels = grain_size))
cellStats(grain, modal) %>%
  factorValues(grain, .)
##   VALUE
## 1  sand
factorValues(grain, modal(values(grain)))
##   VALUE
## 1  sand
# sand

# 16.Plot the histogram and the boxplot of the data(dem, package = “spDataLarge”) raster.

install.packages("spDataLarge", repos = "https://nowosad.github.io/drat/", type = "source")
## Installing package into 'C:/Users/rache/Documents/R/win-library/3.6'
## (as 'lib' is unspecified)
data(dem, package = "spDataLarge")
par(mfrow = c(1, 2))
hist(dem)
boxplot(dem)

library(sf)
library(raster)
library(dplyr)
library(spData)

# Chapter 4

#1. It was established in Section 4.2 that Canterbury was the region of New Zealand containing most of the 100 highest points in the country. #How many of these high points does the Canterbury region contain?

library(tmap)
## Warning: package 'tmap' was built under R version 3.6.3
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(nz) + qtm(nz_height)
canterbury = nz %>% filter(Name == "Canterbury")
canterbury_height = nz_height[canterbury, ]
nrow(canterbury_height)
## [1] 70
# 70

# 2. Which region has the second highest number of nz_height points in, and how many does it have?

nz_height_count = aggregate(nz_height, nz, length)
nz_height_combined = cbind(nz, count = nz_height_count$elevation)
nz_height_combined %>% 
  st_set_geometry(NULL) %>% 
  dplyr::select(Name, count) %>% 
  arrange(desc(count)) %>% 
  slice(2)
##         Name count
## 1 West Coast    22
# west coast; 22

# 3.Generalizing the question to all regions: #how many of New Zealand’s 16 regions contain points which belong to the top 100 highest points in the country? #Which regions? #Bonus: create a table listing these regions in order of the number of points and their name.

nz_height_count = aggregate(nz_height, nz, length)


# 4. Use data(dem, package = “spDataLarge”), and reclassify the elevation in three classes: low, medium and high. #Secondly, attach the NDVI raster (data(ndvi, package = “spDataLarge”)) and compute the mean NDVI and the mean elevation for each altitudinal class.


data(dem, package = "spDataLarge")
data(ndvi, package = "spDataLarge")
summary(dem)
##          dem
## Min.     238
## 1st Qu.  366
## Median   478
## 3rd Qu.  748
## Max.    1094
## NA's       0
#keep getting errors for classInt package