shp 파일을 불러와서 시각화하고 행정자료와 결합하기

shp 파일 불러오기

library(rgdal)
## 필요한 패키지를 로딩중입니다: sp
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-3, (SVN revision 1187)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.5.2, released 2022/09/02
## Path to GDAL shared files: C:/Users/manne/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: C:/Users/manne/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.5-1
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(ggplot2)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## shp 파일을 불러온다. 
map <- readOGR("C:\\r_project\\mapping\\sig.shp", encoding = 'CP949')
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: GDAL support is provided by the sf and terra packages among others
## Warning: GDAL support is provided by the sf and terra packages among others

## Warning: GDAL support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\r_project\mapping\sig.shp", layer: "sig"
## with 250 features
## It has 3 fields
## Warning: GDAL support is provided by the sf and terra packages among others
## Warning: GDAL support is provided by the sf and terra packages among others

## Warning: GDAL support is provided by the sf and terra packages among others
## R에서 사용가능하도록 변환한다. 
df_map <- fortify(map)
## Regions defined for each Polygons
head(df_map)
##       long     lat order  hole piece id group
## 1 956615.5 1953567     1 FALSE     1  0   0.1
## 2 956621.6 1953565     2 FALSE     1  0   0.1
## 3 956626.2 1953564     3 FALSE     1  0   0.1
## 4 956638.8 1953562     4 FALSE     1  0   0.1
## 5 956659.1 1953559     5 FALSE     1  0   0.1
## 6 956661.5 1953558     6 FALSE     1  0   0.1
## 기본적인 전국지도를 그려본다. 
ggplot(data = df_map, aes(x = long, y = lat, group = group)) + 
  geom_polygon(fill='white', color='black')

## 축척을 고정한다. 
ggplot(data = df_map, aes(x = long, y = lat, group = group)) + 
  geom_polygon(fill='white', color='black') + 
  coord_quickmap()

서울만 추출해서 그려보자.

## 지도의 기본데이터만 추출한다. 
df_map_info <- map@data

## df_map과 df_map_info의 id를 매칭한다.
df_map_info$id <- 1:nrow(df_map_info) - 1

## 시도 구분 id를 뽑아낸다. 
df_map_info$sido <- as.numeric(substr(df_map_info$SIG_CD, start = 1, stop = 2))

head(df_map_info)
##   SIG_CD    SIG_ENG_NM SIG_KOR_NM id sido
## 0  11110     Jongno-gu     종로구  0   11
## 1  11140       Jung-gu       중구  1   11
## 2  11170    Yongsan-gu     용산구  2   11
## 3  11200  Seongdong-gu     성동구  3   11
## 4  11215   Gwangjin-gu     광진구  4   11
## 5  11230 Dongdaemun-gu   동대문구  5   11
head(df_map)
##       long     lat order  hole piece id group
## 1 956615.5 1953567     1 FALSE     1  0   0.1
## 2 956621.6 1953565     2 FALSE     1  0   0.1
## 3 956626.2 1953564     3 FALSE     1  0   0.1
## 4 956638.8 1953562     4 FALSE     1  0   0.1
## 5 956659.1 1953559     5 FALSE     1  0   0.1
## 6 956661.5 1953558     6 FALSE     1  0   0.1
## 서울을 추출해 보자
id_sido <- df_map_info[df_map_info$sido == 11, "id"]

id_sido
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 서울만 추출하여 뽑아낸다. 
df_map2 <- df_map[df_map$id %in% id_sido, ]
df_map2_info <- df_map_info[df_map_info$id %in% id_sido, ]

summary(df_map2_info)
##     SIG_CD           SIG_ENG_NM         SIG_KOR_NM              id    
##  Length:25          Length:25          Length:25          Min.   : 0  
##  Class :character   Class :character   Class :character   1st Qu.: 6  
##  Mode  :character   Mode  :character   Mode  :character   Median :12  
##                                                           Mean   :12  
##                                                           3rd Qu.:18  
##                                                           Max.   :24  
##       sido   
##  Min.   :11  
##  1st Qu.:11  
##  Median :11  
##  Mean   :11  
##  3rd Qu.:11  
##  Max.   :11
head(df_map2_info)
##   SIG_CD    SIG_ENG_NM SIG_KOR_NM id sido
## 0  11110     Jongno-gu     종로구  0   11
## 1  11140       Jung-gu       중구  1   11
## 2  11170    Yongsan-gu     용산구  2   11
## 3  11200  Seongdong-gu     성동구  3   11
## 4  11215   Gwangjin-gu     광진구  4   11
## 5  11230 Dongdaemun-gu   동대문구  5   11
head(df_map2)
##       long     lat order  hole piece id group
## 1 956615.5 1953567     1 FALSE     1  0   0.1
## 2 956621.6 1953565     2 FALSE     1  0   0.1
## 3 956626.2 1953564     3 FALSE     1  0   0.1
## 4 956638.8 1953562     4 FALSE     1  0   0.1
## 5 956659.1 1953559     5 FALSE     1  0   0.1
## 6 956661.5 1953558     6 FALSE     1  0   0.1
## 기본적인 서울 지도를 그려보자. 
ggplot(data = df_map2,
       aes(x = long, y = lat, 
           group = group)) + 
  geom_polygon(color = "black", fill = "white") + 
  theme(legend.position = "none")

### 서울 행정자료와 결합해 보자

서울 행정자료 불러오기 - 인구 자료

seoul0 <- read.csv("C:\\r_project\\dv\\201812_202212_주민등록인구및세대현황_연간.csv", 
                   fileEncoding = "euc-kr")

seoul0 <- seoul0[-1,]

library(reshape2)

dim(seoul0)
## [1] 25 31
seoul1 <- melt(seoul0, id.vars = c("행정구역"))
names(seoul1) <- c("id", "index0", "value")
seoul1$year <- as.numeric(substr(seoul1$index0, 2, 5))
seoul1$type <- substr(seoul1$index0, 8, 20)
seoul1 <- seoul1[, -2]
seoul1$value <- as.numeric(gsub(",", "", seoul1$value))

seoul2 <- dcast(seoul1, id + year ~ type)
index0 <- seoul2$id
index1 <- strsplit(index0, " ")

name <- c()
code <- c()

for (i in 1:length(index1))
{
  temp_name <- index1[[i]][2]
  temp_code <- index1[[i]][3]
  name <- append(name, temp_name)
  code <- append(code, temp_code)
}

code <- substr(code, 2, 6)

seoul3 <- data.frame(name, code, seoul2[,2:8])

names(seoul3) <- c("gu", "code", "year", 
                   "gender_ratio", "men", "pop_household", 
                   "household", "women", "total")

seoul3$gender_ratio2 <- with(seoul3, men/women)

서울 인구를 구별로 시각화하여 나타내보자.

## 서울 인구의 시각화
seoul_map_vis1 <- seoul3 %>%
  group_by(gu, code) %>%
    summarize(pop = mean(total))
## `summarise()` has grouped output by 'gu'. You can override using the `.groups`
## argument.
seoul_map_vis1
## # A tibble: 25 × 3
## # Groups:   gu [25]
##    gu     code      pop
##    <chr>  <chr>   <dbl>
##  1 강남구 11680 537782.
##  2 강동구 11740 449268.
##  3 강북구 11305 306803 
##  4 강서구 11500 582482.
##  5 관악구 11620 493912.
##  6 광진구 11215 346201.
##  7 구로구 11530 401528.
##  8 금천구 11545 231783.
##  9 노원구 11350 522877.
## 10 도봉구 11320 325418.
## # … with 15 more rows
## 데이터 프레임 병합을 위해 변수명을 바꾼다. 
names(seoul_map_vis1) <- c("gu", "SIG_CD", "pop")

head(seoul_map_vis1)
## # A tibble: 6 × 3
## # Groups:   gu [6]
##   gu     SIG_CD     pop
##   <chr>  <chr>    <dbl>
## 1 강남구 11680  537782.
## 2 강동구 11740  449268.
## 3 강북구 11305  306803 
## 4 강서구 11500  582482.
## 5 관악구 11620  493912.
## 6 광진구 11215  346201.
head(df_map2_info)
##   SIG_CD    SIG_ENG_NM SIG_KOR_NM id sido
## 0  11110     Jongno-gu     종로구  0   11
## 1  11140       Jung-gu       중구  1   11
## 2  11170    Yongsan-gu     용산구  2   11
## 3  11200  Seongdong-gu     성동구  3   11
## 4  11215   Gwangjin-gu     광진구  4   11
## 5  11230 Dongdaemun-gu   동대문구  5   11
df_map2_info <- merge(df_map2_info, seoul_map_vis1, by = "SIG_CD")

df_map2_temp <- df_map2_info[,c("id", "pop")]

head(df_map2_temp)
##   id      pop
## 1  0 147960.2
## 2  1 124014.4
## 3  2 225862.4
## 4  3 293931.2
## 5  4 346200.6
## 6  5 342225.4
df_map2_pop <- merge(df_map2, df_map2_temp, by = "id")


## 서울특별시의 자치구별 인구를 시각화해 보자. 
ggplot(data = df_map2_pop,
       aes(x = long, y = lat, group = group, fill = pop)) + 
  geom_polygon(color = "black") 

## 색을 제대로 입혀 보자. 
ggplot(data = df_map2_pop,
       aes(x = long, y = lat, 
           group = group, fill = pop)) + 
  geom_polygon(color = "black") +
  scale_fill_gradient(low = "#dadaeb", high = "#3f007d")

## 서울 자치구 성비를 시각화하여 나타내보자.

## 서울시 자치구 성비의 시각화

head(seoul3, 0)
##  [1] gu            code          year          gender_ratio  men          
##  [6] pop_household household     women         total         gender_ratio2
## <0 행> <또는 row.names의 길이가 0입니다>
seoul_map_vis2 <- subset(seoul3, year == 2022)
seoul_map_vis2 <- select(seoul_map_vis2, code, gu, gender_ratio2)
head(seoul_map_vis2)
##     code     gu gender_ratio2
## 5  11680 강남구     0.9168279
## 10 11740 강동구     0.9590157
## 15 11305 강북구     0.9435712
## 20 11500 강서구     0.9230985
## 25 11620 관악구     1.0113387
## 30 11215 광진구     0.9294696
names(seoul_map_vis2) <- c("SIG_CD", "gu", "gender_ratio2")

head(seoul_map_vis2)
##    SIG_CD     gu gender_ratio2
## 5   11680 강남구     0.9168279
## 10  11740 강동구     0.9590157
## 15  11305 강북구     0.9435712
## 20  11500 강서구     0.9230985
## 25  11620 관악구     1.0113387
## 30  11215 광진구     0.9294696
head(df_map2_info)
##   SIG_CD    SIG_ENG_NM SIG_KOR_NM id sido       gu      pop
## 1  11110     Jongno-gu     종로구  0   11   종로구 147960.2
## 2  11140       Jung-gu       중구  1   11     중구 124014.4
## 3  11170    Yongsan-gu     용산구  2   11   용산구 225862.4
## 4  11200  Seongdong-gu     성동구  3   11   성동구 293931.2
## 5  11215   Gwangjin-gu     광진구  4   11   광진구 346200.6
## 6  11230 Dongdaemun-gu   동대문구  5   11 동대문구 342225.4
head(df_map2_info)
##   SIG_CD    SIG_ENG_NM SIG_KOR_NM id sido       gu      pop
## 1  11110     Jongno-gu     종로구  0   11   종로구 147960.2
## 2  11140       Jung-gu       중구  1   11     중구 124014.4
## 3  11170    Yongsan-gu     용산구  2   11   용산구 225862.4
## 4  11200  Seongdong-gu     성동구  3   11   성동구 293931.2
## 5  11215   Gwangjin-gu     광진구  4   11   광진구 346200.6
## 6  11230 Dongdaemun-gu   동대문구  5   11 동대문구 342225.4
df_map3_info <- merge(df_map2_info, seoul_map_vis2, by = "SIG_CD")

df_map3_temp <- df_map3_info[,c("id", "gender_ratio2")]

head(df_map3_temp)
##   id gender_ratio2
## 1  0     0.9371232
## 2  1     0.9464880
## 3  2     0.9253630
## 4  3     0.9464282
## 5  4     0.9294696
## 6  5     0.9699571
df_map2_gender_ratio <- merge(df_map2, df_map3_temp, by = "id")

head(df_map2_gender_ratio)
##   id     long     lat order  hole piece group gender_ratio2
## 1  0 956615.5 1953567     1 FALSE     1   0.1     0.9371232
## 2  0 956621.6 1953565     2 FALSE     1   0.1     0.9371232
## 3  0 956626.2 1953564     3 FALSE     1   0.1     0.9371232
## 4  0 956638.8 1953562     4 FALSE     1   0.1     0.9371232
## 5  0 956659.1 1953559     5 FALSE     1   0.1     0.9371232
## 6  0 956661.5 1953558     6 FALSE     1   0.1     0.9371232
ggplot(data = df_map2_gender_ratio,
       aes(x = long, y = lat, 
           group = group, fill = gender_ratio2)) + 
  geom_polygon(color = "black") +
  scale_fill_gradient(low = "#dadaeb", high = "#3f007d")

## 인구 분포와의 비교
ggplot(data = df_map2_pop,
       aes(x = long, y = lat, 
           group = group, fill = pop)) + 
  geom_polygon(color = "black") +
  scale_fill_gradient(low = "#dadaeb", high = "#3f007d")