#Load all the packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggiraph)
#Find the place: Find a shape file of the place you plan to make.I got it from here:https://gadm.org/download_country.html. This is a shape file of China. You can also choose the country you are interested in.
CN<-read_sf("chn_adm_ocha_2020_shp","chn_admbnda_adm1_ocha_2020")
head(CN)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 97.56054 ymin: 21.14672 xmax: 122.517 ymax: 39.5806
## Geodetic CRS: WGS 84
## # A tibble: 6 × 7
## ADM1_EN ADM1_ZH ADM1_PCODE ADM0_EN ADM0_ZH ADM0_PCODE
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Shaanxi Province 陕西省 CN061 China 中国 CN
## 2 Shanghai Municipality 上海市 CN031 China 中国 CN
## 3 Chongqing Municipality 重庆市 CN050 China 中国 CN
## 4 Zhejiang Province 浙江省 CN033 China 中国 CN
## 5 Jiangxi Province 江西省 CN036 China 中国 CN
## 6 Yunnan Province 云南省 CN053 China 中国 CN
## # ℹ 1 more variable: geometry <MULTIPOLYGON [°]>
#Add population: I then found a data set of Chinese's population by province in this website:https://www.statista.com/statistics/279013/population-in-china-by-region/. I imported the xlsx file here.You can also choose other geographical data like crime rate or tourism.
excel_file_path <- "/Users/winnieren/Desktop/Fall_2023/Chinese_pop.xlsx"
pop <- read_excel(excel_file_path)
View(pop)
#Combining the data: Before I merged these two data set, I wanted to make sure that they are matching. Thus, below is the way I checked that.
# Check for non-matching and non-unique values
CN %>%
filter(!ADM1_EN %in% CN$ADM1_EN) # Check for non-matching values
## Simple feature collection with 0 features and 6 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 7
## # ℹ 7 variables: ADM1_EN <chr>, ADM1_ZH <chr>, ADM1_PCODE <chr>, ADM0_EN <chr>,
## # ADM0_ZH <chr>, ADM0_PCODE <chr>, geometry <GEOMETRY [°]>
# Identify duplicate values in both data frames
CN %>%
count(ADM1_EN) %>%
filter(n > 1) # Check for duplicates
## Simple feature collection with 0 features and 2 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ADM1_EN <chr>, n <int>, geometry <GEOMETRY [°]>
pop %>%
count(ADM1_EN) %>%
filter(n > 1) # Check for duplicates
## # A tibble: 0 × 2
## # ℹ 2 variables: ADM1_EN <chr>, n <int>
# Handle non-matching or non-unique values before merging
# ...
# Merge the data frames
map_data <- merge(CN,pop, by.x = "ADM1_EN", by.y = "ADM1_EN")
View(map_data)
#Setting up the density classes
map_data$DesClass<-cut(map_data$Pop, breaks =c(0,20,40,60,80,100,Inf),
labels=c('<20', '20-40','40-60','60-80','80-100','>100'))
#plotting:I used ggplot and geom_sf_interactive to make the interactive map. You can also try leaflet or geom_polygon.
p <-ggplot()+
geom_sf_interactive(data=map_data,aes(fill=DesClass,tooltip = paste("Province: ", ADM1_EN, "<br>Population: ", Pop)),color='transparent')+
geom_sf_interactive(fill='transparent',color="white",data=map_data)+
scale_fill_viridis_d(name='Million Inhab/km2',
guide=guide_legend(
direction="horizontal",
title.position = 'top',
title.hjust = .5,
label.hjust = .5,
label.position = 'bottom'
))+
labs(title="China's demographics",
subtitle = "Population density",
caption=c('Source:https://www.statista.com/statistics/279013/population-in-china-by-region/'))+
theme_void()+
theme(title=element_text(face='bold'),
legend.position = 'bottom')
girafe(ggobj = p)