Metadata

Purpose

What is the purpose of your function? What problem does it solve? When would you use it?

For my summer REU, one of our projects included analyzing many datasets about Ukraine. This function adds a new variable to a dataset of the names of Ukrainian oblasts.

Dependencies/Setup

Load any packages or other pre-processing steps that are necessary to make your function work in the chunk below. The chunk should be as parsimonious as possible.

library(sf)
library(tidyverse)
library(haven)
library(mosaic)
library(RColorBrewer)

Arguments

List the arguments your function takes. Specify the data type, default value (if applicable), and any other assumptions you are making about the value of each argument.

  • data: the name of the dataset
  • fill_var: a character vector that is the fill variable
  • title: a character vector that is the title of the map

Result

What does your function return?

  • ukraine_map returns a plot of Ukraine split up into regions based on the input variable fill_var.

Code

Paste your code in the chunk below.

Alternatively, if your code is already in a package, you may also show the code from source using the double- colon operator and omitting the parentheses (e.g., base::mean). This will only work if the package is installed, so be sure to lists the correct dependencies above.

#A function that uses different defaults for ggplot so I don't have to keep specifying these for every data set.

ukraine_map <- function(data, fill_var, title = NULL) { #defaults title to null
  ggplot(data = data, aes_string(fill = fill_var)) +
    geom_sf() +
    labs(title = title, fill = "Proportion") +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
}

Examples

Give an example of typical usage of your function. Describe in words what is being illustrated.

This plots a map of the proportion of Ukrainians concerned about air pollution.

# CLEANING THE DATA
### read in usia1997 data
usia97 <-
  read_sav("USIA1997_ENG.sav")

#read in shapefile
ukroblast <- st_read("gadm36_UKR_1.shp")
## Reading layer `gadm36_UKR_1' from data source `/Users/abarylsky/Documents/Alina's old laptop/Documents/Fall 2019/SDS 390/div-ii-project-abarylsky/gadm36_UKR_1.shp' using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 13 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 22.14045 ymin: 44.38597 xmax: 40.21807 ymax: 52.37503
## geographic CRS: WGS 84
# environmental concerns, oblast
### percent concerned about air pollution
environmentraw <-
  table(usia97$V175, usia97$V107) %>%
  addmargins() %>%
  as.data.frame.matrix() %>%
  # calculate percent
  mutate(air_concern = `2`/Sum) %>%
  rownames_to_column()
environment <-
  environmentraw %>%
  mutate(Oblast = derivedVariable("Crimea" = rowname == 1,
                                "Kiev City" = rowname == 2,
                                "Kiev" = rowname == 3,
                                "Vinnytsya" = rowname == 4,
                                "Volyn" = rowname == 5,
                                "Dnipropetrovs'k" = rowname == 6,
                                "Donets'k" = rowname == 7,
                                "Zhytomyr" = rowname == 8,
                                "Transcarpathia" = rowname == 9,
                                "Zaporizhzhya" = rowname == 10,
                                "Ivano-Fankivs'k" = rowname == 11,
                                "Kirovohrad" = rowname == 12,
                                "Luhans'k" = rowname == 13,
                                "L'viv" = rowname == 14,
                                "Mykolayiv" = rowname == 15,
                                "Odessa" = rowname == 16,
                                "Poltava" = rowname == 17,
                                "Rivne" = rowname == 18,
                                "Sumy" = rowname == 19,
                                "Ternopil'" = rowname == 20,
                                "Kharkiv" = rowname == 21,
                                "Kherson" = rowname == 22,
                                "Khmel'nyts'kyy" = rowname == 23,
                                "Cherkasy" = rowname == 24,
                                "Chernihiv" = rowname == 25,
                                "Chernivtsi" = rowname == 26,
                                "Sevastopol'" = rowname == 27)) %>%
  select(Oblast, air_concern) %>%
  arrange(Oblast)

# bin data, add to shape file
environment2 <- environment %>%
  mutate(bin = derivedFactor("< 0.12" = air_concern < 0.12,
                             "0.12-0.17" = air_concern >= 0.12 & air_concern < 0.17,
                             "0.17-0.25" = air_concern >= 0.17 & air_concern < 0.25,
                             "> 0.25" = air_concern >= 0.25))
ukroblast$air_concern <- environment2$bin
remove(environment2)
# PLOT USING NEW FUNCTION
pollution_map <- ukraine_map(ukroblast, ukroblast$air_concern, title = "Proportion who are concerned about air pollution")

pollution_map

This plots a map of Ukraine split into 8 regions defined by Barrington (2008).

# CLEANING THE DATA
# categorize oblasts into Barrington's 8 regions
ukroblast$region8 <-
  derivedFactor(
    East = ukroblast$NAME_1 == "Donets'k" |
      ukroblast$NAME_1 == "Luhans'k",
    Eastcentral = ukroblast$NAME_1 == "Kharkiv" |
      ukroblast$NAME_1 == "Zaporizhzhya" |
      ukroblast$NAME_1 == "Dnipropetrovs'k",
    Krym = ukroblast$NAME_1 == "Crimea" |
      ukroblast$NAME_1 == "Sevastopol'",
    South = ukroblast$NAME_1 == "Kherson" |
      ukroblast$NAME_1 == "Mykolayiv" | ukroblast$NAME_1 == "Odessa",
    Northcentral = ukroblast$NAME_1 == "Chernihiv" |
      ukroblast$NAME_1 == "Sumy" |
      ukroblast$NAME_1 == "Poltava" |
      ukroblast$NAME_1 == "Cherkasy" | ukroblast$NAME_1 == "Kirovohrad" |
      ukroblast$NAME_1 == "Kiev" |
      ukroblast$NAME_1 == "Kiev City",
    Westcentral = ukroblast$NAME_1 == "Khmel'nyts'kyy" |
      ukroblast$NAME_1 == "Zhytomyr" |
      ukroblast$NAME_1 == "Vinnytsya" |
      ukroblast$NAME_1 == "Rivne" | ukroblast$NAME_1 == "Volyn",
    West = ukroblast$NAME_1 == "Ivano-Frankivs'k" |
      ukroblast$NAME_1 == "L'viv" | ukroblast$NAME_1 == "Ternopil'",
    Southwest = ukroblast$NAME_1 == "Chernivtsi" |
      ukroblast$NAME_1 == "Transcarpathia"
  )
# PLOT USING NEW FUNCTION
eight_regions <- ukraine_map(ukroblast, ukroblast$region8, title =  "Barrington's 8-Region Classification")

eight_regions