This is my adaptation, using an Irish shapefile, of the original article by Matt Strimas-Mackey. Kudos!
Convenience function for loading packages.
is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
load_or_install<-function(package_names) {
for(package_name in package_names) {
if(!is_installed(package_name)) {
install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN")
}
library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE)
}
}
See prerequisites below.
# DEV VERSION OF GGPLOT2 - THIS MAY NOT ALWAYS BE NECESSARY
devtools::install_github("tidyverse/ggplot2")
## Skipping install of 'ggplot2' from a github remote, the SHA1 (08e135e0) has not changed since last install.
## Use `force = TRUE` to force installation
# DEV VERSION OF SF - THIS MAY NOT ALWAYS BE NECESSARY
devtools::install_github("edzer/sfr")
## Skipping install of 'sf' from a github remote, the SHA1 (a67b3a38) has not changed since last install.
## Use `force = TRUE` to force installation
load_or_install(c("sf","tidyverse","viridis","rvest","scales"))
## Linking to GEOS 3.5.1, GDAL 2.1.2, proj.4 4.9.2, lwgeom 2.2.1 r14555
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
Read in a local downloaded shapefile as an sf object from the data subfolder. See DATASOURCES below for original source of the data. Adjust the path below to reflect your environment.
file1 <- "data/cso-Census2011_Admin_Counties_generalised20m.shp"
IRL_counties_SF <- st_read(file1, quiet = TRUE)
# The COUNTYNAME for Dun Laoghaire-Rathdown contains a multibyte string representing ú
# This causes problems when glimpsing the data. The following is a kludge to remove the multibyte character, until I find a more elegant and reproducible way to do it.
IRL_counties_SF$COUNTYNAME <- as.character(IRL_counties_SF$COUNTYNAME)
IRL_counties_SF[20,]$COUNTYNAME <- "Dun Laoghaire-Rathdown"
Have a look at the sf object.
class(IRL_counties_SF)
## [1] "sf" "data.frame"
tibble::glimpse(IRL_counties_SF)
## Observations: 34
## Variables: 21
## $ NUTS1 <fctr> IE0, IE0, IE0, IE0, IE0, IE0, IE0, IE0, IE0, IE0, ...
## $ NUTS1NAME <fctr> Ireland, Ireland, Ireland, Ireland, Ireland, Irela...
## $ NUTS2 <fctr> IE02, IE02, IE02, IE02, IE02, IE02, IE01, IE01, IE...
## $ NUTS2NAME <fctr> Southern and Eastern, Southern and Eastern, Southe...
## $ NUTS3 <fctr> IE023, IE023, IE023, IE024, IE024, IE024, IE013, I...
## $ NUTS3NAME <fctr> Mid-West, Mid-West, Mid-West, South-East (IE), Sou...
## $ COUNTY <fctr> 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32...
## $ COUNTYNAME <chr> "Limerick City", "Limerick County", "North Tipperar...
## $ GEOGID <fctr> C20, C21, C22, C23, C24, C25, C26, C27, C28, C29, ...
## $ MALE2011 <dbl> 27947, 67868, 35340, 44244, 22921, 33543, 36514, 88...
## $ FEMALE2011 <dbl> 29159, 66835, 34982, 44188, 23811, 33520, 39015, 86...
## $ TOTAL2011 <dbl> 57106, 134703, 70322, 88432, 46732, 67063, 75529, 1...
## $ PPOCC2011 <dbl> 22589, 47555, 25720, 32770, 18344, 24174, 28248, 61...
## $ UNOCC2011 <dbl> 4092, 7920, 5070, 5414, 3997, 5849, 5407, 16496, 57...
## $ VACANT2011 <dbl> 3273, 6841, 4527, 4817, 3300, 5297, 3755, 15066, 55...
## $ HS2011 <dbl> 26681, 55475, 30790, 38184, 22341, 30023, 33655, 77...
## $ PCVAC20111 <dbl> 12.3, 12.3, 14.7, 12.6, 14.8, 17.6, 11.2, 19.4, 30....
## $ LAND_AREA <dbl> 28.39559, 2654.65653, 1992.19506, 2255.69472, 38.34...
## $ TOTAL_AREA <dbl> 30.77455, 2725.34665, 2046.39394, 2258.87052, 41.63...
## $ CREATEDATE <fctr> 27-03-2012, 27-03-2012, 27-03-2012, 27-03-2012, 27...
## $ geometry <simple_feature> MULTIPOLYGON(((155832.1608 ..., MULTIPOL...
as_tibble(IRL_counties_SF)
## # A tibble: 34 × 21
## NUTS1 NUTS1NAME NUTS2 NUTS2NAME NUTS3
## <fctr> <fctr> <fctr> <fctr> <fctr>
## 1 IE0 Ireland IE02 Southern and Eastern IE023
## 2 IE0 Ireland IE02 Southern and Eastern IE023
## 3 IE0 Ireland IE02 Southern and Eastern IE023
## 4 IE0 Ireland IE02 Southern and Eastern IE024
## 5 IE0 Ireland IE02 Southern and Eastern IE024
## 6 IE0 Ireland IE02 Southern and Eastern IE024
## 7 IE0 Ireland IE01 Border,Midland and Western IE013
## 8 IE0 Ireland IE01 Border,Midland and Western IE013
## 9 IE0 Ireland IE01 Border,Midland and Western IE011
## 10 IE0 Ireland IE01 Border,Midland and Western IE013
## # ... with 24 more rows, and 16 more variables: NUTS3NAME <fctr>,
## # COUNTY <fctr>, COUNTYNAME <chr>, GEOGID <fctr>, MALE2011 <dbl>,
## # FEMALE2011 <dbl>, TOTAL2011 <dbl>, PPOCC2011 <dbl>, UNOCC2011 <dbl>,
## # VACANT2011 <dbl>, HS2011 <dbl>, PCVAC20111 <dbl>, LAND_AREA <dbl>,
## # TOTAL_AREA <dbl>, CREATEDATE <fctr>, geometry <simple_feature>
According to the orginal article, “The geometry list-column of an sf object is an object of class sfc and an additional class corresponding to the geometry type, in this case sfc_MULTIPOLYGON. It can be accessed with st_geometry(). Additional information about the features, such as the coordinate reference system, is stored as attributes:”
(IRL_counties_SF_geom <- st_geometry(IRL_counties_SF))
## Geometry set for 34 features
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 17491.14 ymin: 19589.93 xmax: 334558.6 ymax: 466919.3
## epsg (SRID): NA
## proj4string: +proj=tmerc +lat_0=53.5 +lon_0=-8 +k=1.000035 +x_0=200000 +y_0=250000 +datum=ire65 +units=m +no_defs
## First 5 geometries:
## MULTIPOLYGON(((155832.1608 160028.7937, 155915....
## MULTIPOLYGON(((167907.4892 167002.3166, 167957....
## MULTIPOLYGON(((195253.807 212366.609300001, 195...
## MULTIPOLYGON(((225302.1704 164757.5625, 226080....
## MULTIPOLYGON(((260669.0198 113118.989399999, 26...
st_geometry(IRL_counties_SF) %>% class()
## [1] "sfc_MULTIPOLYGON" "sfc"
attributes(IRL_counties_SF_geom)
## $n_empty
## [1] 0
##
## $crs
## $epsg
## [1] NA
##
## $proj4string
## [1] "+proj=tmerc +lat_0=53.5 +lon_0=-8 +k=1.000035 +x_0=200000 +y_0=250000 +datum=ire65 +units=m +no_defs"
##
## attr(,"class")
## [1] "crs"
##
## $class
## [1] "sfc_MULTIPOLYGON" "sfc"
##
## $precision
## [1] 0
##
## $bbox
## xmin ymin xmax ymax
## 17491.14 19589.93 334558.59 466919.31
Again from the article, “Finally, individual simple features are sfg objects with additional classes corresponding to the specific type of feature. Here classes XY and MULTIPOLYGON specify that this is a 2-dimensional MULTIPOLYGON geometry.”
IRL_counties_SF_geom[[1]] %>% class
## [1] "XY" "MULTIPOLYGON" "sfg"
Let’s do a simple plot using the development version of ggpplot2.
ggplot(IRL_counties_SF) +
geom_sf(aes(fill = TOTAL2011)) +
ggtitle("Total Population 2011") +
scale_fill_continuous("Population",label = comma) +
theme_bw()
In my experience, SF was a tad fiddly to get working on Ubuntu Linux so note that you may need to do the following on Ubuntu beforehand. YMMV.