Libraries

Read Business Master - April 2020

bm202004 <- read.table(file = "bm2004.csv",
                       sep = ",",
                       header = TRUE,
                       colClasses = "character")

Filter Business Master by PA and VSOs

pa_vso_bm202004 <-
  bm202004 %>%
    filter(STATE=="PA", NTEECC=="W30")

Read Core 2018 Full - CO (all Other 501(c) organizations)

core2018co_full <- read.csv2(file = "core/coreco.core2018co_full990.csv",
                       sep = ",",
                       header = TRUE,
                       colClasses = "character")

Read Core 2018 - CO (all Other 501(c) organizations)

core2018co <- read.csv2(file = "core/coreco.core2018co.csv",
                       sep = ",",
                       header = TRUE,
                       colClasses = "character")

Read Core 2018 Full - PC (Public Charity)

core2018pc_full <- read.csv2(file = "core/coreco.core2018pc_full990.csv",
                       sep = ",",
                       header = TRUE,
                       colClasses = "character")

Read Core 2018 - PC (Public Charity)

core2018pc <- read.csv2(file = "core/coreco.core2018pc.csv",
                       sep = ",",
                       header = TRUE,
                       colClasses = "character")

Compare overlap in core files with BM VSOs based STATE==“PA”, NTEECC==“W30”

co_full_990_pa_w30 <- inner_join(pa_vso_bm202004, core2018co_full, by = c("EIN"))
co_general_pa_w30 <- inner_join(pa_vso_bm202004, core2018co, by = c("EIN"))
pc_full_pa_w30 <- inner_join(pa_vso_bm202004, core2018pc_full, by = c("EIN"))
pc_general_pa_w30 <- inner_join(pa_vso_bm202004, core2018pc, by = c("EIN"))

Merge Core Files Data (Overlap, Priority?)

# Merge and match name
#all_core_merge <- all_core_files_list %>% reduce(full_join, by='EIN')

#all_core_merge <- co_general_pa_w30 %>%
#                    full_join(co_full_990_pa_w30, by='EIN') %>%
#                    full_join(pc_general_pa_w30, by='EIN') %>%
#                    full_join(pc_full_pa_w30, by='EIN')

all_core_bind <- bind_rows(co_general_pa_w30, co_full_990_pa_w30,
                        pc_general_pa_w30, pc_full_pa_w30)

Unique Orgs (by EIN)

all_core_bind %>% 
  #arrange(desc(EIN))
  distinct(EIN, .keep_all = TRUE)

Map

library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(urbnmapr)
library(urbnthemes)
## Setting Mac/Linux options...
## 
## Attaching package: 'urbnthemes'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_bar, geom_col, scale_color_discrete, scale_color_gradientn,
##     scale_color_ordinal, scale_colour_discrete, scale_colour_gradientn,
##     scale_colour_ordinal, scale_fill_discrete, scale_fill_gradientn,
##     scale_fill_ordinal
library(mapview)

# Subset by column
cores_map_data <- all_core_bind %>% select(EIN, NAME.x, SEC_NAME.x, CITY.x, STATE.x, ZIP, 
                                      FIPS.x, LATITUDE, LONGITUDE,CENSUSTRACT,
                                      SUBSECCD.x, ASSETS, INCOME, FIPS.x)
# Drop NAs, and empty strings for now
cores_map_data <- cores_map_data %>% drop_na("LONGITUDE", "LATITUDE")
cores_map_data <- cores_map_data[!(cores_map_data$LONGITUDE=="" | cores_map_data$LATITUDE==""),] 

cores_map_data <- cores_map_data %>%
  st_as_sf(
    coords = c("LONGITUDE", "LATITUDE"),
    crs = 4326
  )

pa_counties <- 
  get_urbn_map("counties", sf = TRUE) %>%
  filter(state_name %in% c("Pennsylvania"))  
## old-style crs object detected; please recreate object with a recent sf::st_crs()
ggplot() +
  geom_sf(
    data = pa_counties,
    mapping = aes()
  ) +
  
  geom_sf(
  data = cores_map_data,
  mapping = aes(),
  color = palette_urbn_main["yellow"],
  size = 2.0
  ) +
  
  theme_urbn_map()

mapview(cores_map_data)