Data

mdsr(Modern Data Science with R) 패키지와 공간 데이터 분석 전용 sp 패키지 설치. sp는 공간 객체에 대한 class 정의 포함.

# install.packages("mdsr", repos = "https://cran.rstudio.com")
library(mdsr) 
# install.packages("sp", repos = "https://cran.rstudio.com")
library(sp) 
head(CholeraDeaths)
##            coordinates Id Count
## 1 (529308.7, 181031.4)  0     3
## 2 (529312.2, 181025.2)  0     2
## 3 (529314.4, 181020.3)  0     1
## 4 (529317.4, 181014.3)  0     1
## 5 (529320.7, 181007.9)  0     4
## 6   (529336.7, 181006)  0     2
str(CholeraDeaths)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  250 obs. of  2 variables:
##   .. ..$ Id   : int [1:250] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ Count: int [1:250] 3 2 1 1 4 2 2 2 3 2 ...
##   ..@ coords.nrs : num(0) 
##   ..@ coords     : num [1:250, 1:2] 529309 529312 529314 529317 529321 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "coords.x1" "coords.x2"
##   ..@ bbox       : num [1:2, 1:2] 529160 180858 529656 181306
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "coords.x1" "coords.x2"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs"

John Snow의 콜레라 데이터 지도 (사망자)(1854년, 127명 사망)

knitr::include_graphics("../pics/Snow-cholera-map-1.jpg", dpi = 96)

산점도

plot(CholeraDeaths) 

공간 데이터 분석 전용 라이브러리를 담고 있는 rgdal 설치. 다운로드한 압축 파일을 data 폴더에 압축을 풀어 둠. CholeraDeathsPumps의 두 공간정보 데이터 읽어들임. shapefiles 는 벡터 형식의 공간자료 저장

  • GDAL : Geospatial Data Abstraction Library
  • OGR : OpenGIS Simple Features Reference Implementation
  • dsn : data source name
  • SHP : shape file format
  • KML : Keyhole Markup Language
  • ESRI : Environmental Systems Research Institute, ArcView 개발

list.files()는 현재 워킹 폴더의 내용물 확인.

ogrListLayerslayer 열거용. ogrInfo는 특정 layer의 지리 정보 확인용

Cholera_DeathsPumps의 두 layer 확인 후 readOGR()로 읽어들임.

str로 특정 layer내의 데이터 구조 확인. str로 data 요소의 데이터 구조 파악.

download.file()의 용법 익히기. dsn 설정에서 “../data/SnowGIS_SHp/”로 설정하면 linux나 MacOS에서는 작동하지만 Windows에서는 “Cannot open data source” 에러가 나오게 됨. folder를 의미하는 마지막 “/” 를 쓰지 않아야 함.

# install.packages("rgdal", repos = "https://cran.rstudio.com")
library(rgdal) 
## rgdal: version: 1.3-6, (SVN revision 773)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-1
## 
## Attaching package: 'rgdal'
## The following object is masked from 'package:mosaic':
## 
##     project
# download.file("http://rtwilson.com/downloads/SnowGIS_SHP.zip", "../data/SnowGIS_SHP.zip")
dsn <- "../data/SnowGIS_SHP"  
#> dsn의 파일 리스트
list.files(dsn) 
##  [1] "Cholera_Deaths.dbf"          "Cholera_Deaths.prj"         
##  [3] "Cholera_Deaths.sbn"          "Cholera_Deaths.sbx"         
##  [5] "Cholera_Deaths.shp"          "Cholera_Deaths.shx"         
##  [7] "Icon\r"                      "OSMap_Grayscale.tfw"        
##  [9] "OSMap_Grayscale.tif"         "OSMap_Grayscale.tif.aux.xml"
## [11] "OSMap_Grayscale.tif.ovr"     "OSMap.tfw"                  
## [13] "OSMap.tif"                   "Pumps.dbf"                  
## [15] "Pumps.prj"                   "Pumps.sbx"                  
## [17] "Pumps.shp"                   "Pumps.shx"                  
## [19] "README.txt"                  "SnowMap.tfw"                
## [21] "SnowMap.tif"                 "SnowMap.tif.aux.xml"        
## [23] "SnowMap.tif.ovr"
ogrListLayers(dsn)
## [1] "Cholera_Deaths" "Pumps"         
## attr(,"driver")
## [1] "ESRI Shapefile"
## attr(,"nlayers")
## [1] 2
ogrInfo(dsn, layer = "Cholera_Deaths")
## Source: "/Users/coop2711/Google 드라이브/Works/Class/Data_Science/R.WD/Maps/data/SnowGIS_SHP", layer: "Cholera_Deaths"
## Driver: ESRI Shapefile; number of rows: 250 
## Feature type: wkbPoint with 2 dimensions
## Extent: (529160.3 180857.9) - (529655.9 181306.2)
## CRS: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs  
## LDID: 87 
## Number of fields: 2 
##    name type length typeName
## 1    Id    0      6  Integer
## 2 Count    0      4  Integer
# readOGR(dsn, "Pumps")
CholeraDeaths <- readOGR(dsn, layer = "Cholera_Deaths")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/coop2711/Google 드라이브/Works/Class/Data_Science/R.WD/Maps/data/SnowGIS_SHP", layer: "Cholera_Deaths"
## with 250 features
## It has 2 fields
Pumps <- readOGR(dsn, layer = "Pumps")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/coop2711/Google 드라이브/Works/Class/Data_Science/R.WD/Maps/data/SnowGIS_SHP", layer: "Pumps"
## with 8 features
## It has 1 fields
summary(CholeraDeaths)
## Object of class SpatialPointsDataFrame
## Coordinates:
##                min      max
## coords.x1 529160.3 529655.9
## coords.x2 180857.9 181306.2
## Is projected: TRUE 
## proj4string :
## [+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +ellps=airy +units=m +no_defs]
## Number of points: 250
## Data attributes:
##        Id        Count       
##  Min.   :0   Min.   : 1.000  
##  1st Qu.:0   1st Qu.: 1.000  
##  Median :0   Median : 1.000  
##  Mean   :0   Mean   : 1.956  
##  3rd Qu.:0   3rd Qu.: 2.000  
##  Max.   :0   Max.   :15.000
summary(Pumps)
## Object of class SpatialPointsDataFrame
## Coordinates:
##                min      max
## coords.x1 529183.7 529748.9
## coords.x2 180660.5 181193.7
## Is projected: TRUE 
## proj4string :
## [+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +ellps=airy +units=m +no_defs]
## Number of points: 8
## Data attributes:
##        Id   
##  Min.   :0  
##  1st Qu.:0  
##  Median :0  
##  Mean   :0  
##  3rd Qu.:0  
##  Max.   :0
str(CholeraDeaths@data)
## 'data.frame':    250 obs. of  2 variables:
##  $ Id   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Count: int  3 2 1 1 4 2 2 2 3 2 ...
str(Pumps@data)
## 'data.frame':    8 obs. of  1 variable:
##  $ Id: int  0 0 0 0 0 0 0 0

ggplot

id와 Count의 산점도에서 출발.

spplot

# install.packages("mapproj", repos = "https://cran.rstudio.com")
library(mapproj)
## Loading required package: maps
spplot(CholeraDeaths) 

geom_point, coord_quickmap

head(coordinates(CholeraDeaths), n = 20)
##       coords.x1 coords.x2
##  [1,]  529308.7  181031.4
##  [2,]  529312.2  181025.2
##  [3,]  529314.4  181020.3
##  [4,]  529317.4  181014.3
##  [5,]  529320.7  181007.9
##  [6,]  529336.7  181006.0
##  [7,]  529290.1  181024.4
##  [8,]  529301.0  181021.2
##  [9,]  529285.0  181020.2
## [10,]  529288.4  181031.8
## [11,]  529280.6  181026.6
## [12,]  529264.7  181035.2
## [13,]  529274.9  181008.0
## [14,]  529278.3  181003.0
## [15,]  529281.4  180997.1
## [16,]  529259.6  181010.3
## [17,]  529256.2  181001.8
## [18,]  529309.8  181004.7
## [19,]  529314.1  180997.4
## [20,]  529318.0  180991.3
data.frame(CholeraDeaths)[1:10, ]
##    Id Count coords.x1 coords.x2 optional
## 1   0     3  529308.7  181031.4     TRUE
## 2   0     2  529312.2  181025.2     TRUE
## 3   0     1  529314.4  181020.3     TRUE
## 4   0     1  529317.4  181014.3     TRUE
## 5   0     4  529320.7  181007.9     TRUE
## 6   0     2  529336.7  181006.0     TRUE
## 7   0     2  529290.1  181024.4     TRUE
## 8   0     2  529301.0  181021.2     TRUE
## 9   0     3  529285.0  181020.2     TRUE
## 10  0     2  529288.4  181031.8     TRUE
cholera_coords <- as.data.frame(coordinates(CholeraDeaths))
(m0 <- ggplot(data = cholera_coords) + 
  geom_point(aes(x = coords.x1,  ## figure 14.2 를 그리기 위한 전 단계
                 y = coords.x2))) 

(m1 <- m0 +  
    coord_quickmap())  ## Figure 14.2 

(m2 <- m0 + 
    coord_map()) ## Not working

get_map, plot

# install.packages("ggmap", repos = "https://cran.rstudio.com")
library(ggmap)
## Google Maps API Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it: see citation("ggmap") for details.
# John_Snow_code <- geocode("John Snow, London, England", source = "dsk")
# m <- get_map(John_Snow_code, 
#              zoom = 17, 
#              source = "google",
#              maptype = "roadmap")
bbox <- c(-0.140, 51.509, -0.133, 51.518)
m <- get_map(bbox, 
#             zoom = 17, 
             source = "google",
             maptype = "roadmap")
## Warning: bounding box given to google - spatial extent only approximate.
## converting bounding box to center/zoom specification. (experimental)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=51.5135,-0.1365&zoom=17&size=640x640&scale=2&maptype=roadmap&language=en-EN
class(m)
## [1] "ggmap"  "raster"
plot(m)

ggmap

ggmap(m)

Add Deaths Points

ggmap(m) +
  geom_point(data = as.data.frame(CholeraDeaths), 
             aes(x = coords.x1,
                 y = coords.x2,
                 size = Count))
## Warning: Removed 250 rows containing missing values (geom_point).

#> All the points are out of bounds

“bbox”

head(as.data.frame(CholeraDeaths))
##   Id Count coords.x1 coords.x2
## 1  0     3  529308.7  181031.4
## 2  0     2  529312.2  181025.2
## 3  0     1  529314.4  181020.3
## 4  0     1  529317.4  181014.3
## 5  0     4  529320.7  181007.9
## 6  0     2  529336.7  181006.0
str(m)
##  'ggmap' chr [1:1280, 1:1280] "#E9E9E6" "#FEFEFE" "#FEFEFE" "#FEFEFE" ...
##  - attr(*, "source")= chr "google"
##  - attr(*, "maptype")= chr "roadmap"
##  - attr(*, "zoom")= num 17
##  - attr(*, "bb")='data.frame':   1 obs. of  4 variables:
##   ..$ ll.lat: num 51.5
##   ..$ ll.lon: num -0.14
##   ..$ ur.lat: num 51.5
##   ..$ ur.lon: num -0.133
attr(m, "bb") # coordinates are at different units, bounding box. 
##     ll.lat     ll.lon   ur.lat     ur.lon
## 1 51.51136 -0.1399279 51.51563 -0.1330614

Terms

proj4string, CRS

proj4string(CholeraDeaths) %>% 
  strwrap() 
## [1] "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000"
## [2] "+y_0=-100000 +ellps=airy +units=m +no_defs"
CRS("+init=epsg:4326") 
## CRS arguments:
##  +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
CRS("+init=epsg:3857")
## CRS arguments:
##  +init=epsg:3857 +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0
## +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null
## +no_defs
CRS("+init=epsg:27700")
## CRS arguments:
##  +init=epsg:27700 +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717
## +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs
## +ellps=airy
## +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894
cholera_latlong <- CholeraDeaths %>% 
  spTransform(CRS("+init=epsg:4326"))
help("spTransform-methods", package = "rgdal")
bbox(cholera_latlong)
##                  min        max
## coords.x1 -0.1384685 -0.1313274
## coords.x2 51.5113460 51.5153252
ggmap(m) +
  geom_point(data = as.data.frame(cholera_latlong),
             aes(x = coords.x1, 
                 y = coords.x2,
                 size = Count))
## Warning: Removed 39 rows containing missing values (geom_point).

Snow data on the map

proj4string, showEPSG

CholeraDeaths %>% 
  proj4string() %>% 
  showEPSG()
## [1] "OGRERR_UNSUPPORTED_SRS"
proj4string(CholeraDeaths) <- CRS("+init=epsg:27700")
## Warning in `proj4string<-`(`*tmp*`, value = new("CRS", projargs = "+init=epsg:27700 +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894")): A new CRS was assigned to an object with an existing CRS:
## +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs
## without reprojecting.
## For reprojection, use function spTransform

spTransform

cholera_latlong <- CholeraDeaths %>%
  spTransform(CRS("+init=epsg:4326"))

Places of Deaths, Points by Size

snow <- ggmap(m) +
  geom_point(data = as.data.frame(cholera_latlong),
             aes(x = coords.x1,
                 y = coords.x2,
                 size = Count))
snow
## Warning: Removed 3 rows containing missing values (geom_point).

Pumps added

pumps <- readOGR(dsn, 
                 layer = "Pumps")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/coop2711/Google 드라이브/Works/Class/Data_Science/R.WD/Maps/data/SnowGIS_SHP", layer: "Pumps"
## with 8 features
## It has 1 fields
proj4string(pumps) <- CRS("+init=epsg:27700")
## Warning in `proj4string<-`(`*tmp*`, value = new("CRS", projargs = "+init=epsg:27700 +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894")): A new CRS was assigned to an object with an existing CRS:
## +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs
## without reprojecting.
## For reprojection, use function spTransform
pumps_latlong <- pumps %>% 
  spTransform(CRS("+init=epsg:4326"))
snow + 
  geom_point(data = as.data.frame(pumps_latlong),
             aes(x = coords.x1,
                 y = coords.x2,
                 size = 4, 
                 colour = "red")) +
#  scale_colour_manual(guide = NULL)
  guides(colour = "none", shape = "none")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

Save