if (!require("leaflet")) install.packages("leaflet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rgdal")) install.packages("rgdal", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("rtweet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("htmlwidgets")) install.packages("htmlwidgets", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)

require("htmlwidgets")
require("leaflet")
require("rgdal")
require("rtweet")

1. Twitter User Location

First, We build a map to show the population distribution in Hong Kong.

districts <- readOGR("./Hong_Kong_18_Districts/Hong_Kong_18_Districts.shp",layer = "Hong_Kong_18_Districts", GDAL1_integer64_policy = TRUE)
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\kwfu.JMSC\Documents\JMSC\Social Media analysis\Hong_Kong_18_Districts\Hong_Kong_18_Districts.shp", layer: "Hong_Kong_18_Districts"
## with 18 features
## It has 5 fields
## Integer64 fields read as doubles:  OBJECTID
pop <- read.csv("district_pop.csv")

pop
##             district population percentage
## 1  CENTRAL & WESTERN     243266   3.316336
## 2           WAN CHAI     180123   2.455536
## 3            EASTERN     555034   7.566530
## 4           SOUTHERN     274994   3.748870
## 5      YAU TSIM MONG     342970   4.675556
## 6       SHAM SHUI PO     405869   5.533030
## 7       KOWLOON CITY     418732   5.708386
## 8       WONG TAI SIN     425235   5.797038
## 9          KWUN TONG     648541   8.841269
## 10        KWAI TSING     520572   7.096725
## 11         TSUEN WAN     318916   4.347639
## 12          TUEN MUN     489299   6.670394
## 13         YUEN LONG     614178   8.372813
## 14             NORTH     315270   4.297935
## 15            TAI PO     303926   4.143287
## 16           SHA TIN     659794   8.994676
## 17          SAI KUNG     461864   6.296385
## 18           ISLANDS     156801   2.137598
## 19                      7335384 100.000000
districts$pop <- pop$percentage[match(as.character(districts$ENAME),pop$district)]

m <- leaflet()
m <- addTiles(m)
pal <- colorNumeric(palette="Greens",domain=districts$pop)
m <- addPolygons(m,data=districts,weight = 1,popup=~ENAME,color = ~pal(districts$pop))
m <- addLegend(m, "bottomright", pal = pal, values = districts$pop, title = "Population %", labFormat = labelFormat(suffix = "%"), opacity = 0.75)
m

2. Hong Kong Population Distribution

Next, we create a map dispalying the provincial distribution of average annuel wage of employment in China.

chn_adm <- readOGR("./CHN_adm/CHN_adm1.shp", layer = "CHN_adm1", GDAL1_integer64_policy = TRUE)
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\kwfu.JMSC\Documents\JMSC\Social Media analysis\CHN_adm\CHN_adm1.shp", layer: "CHN_adm1"
## with 31 features
## It has 9 fields
## Integer64 fields read as doubles:  ID_0 ID_1
wage <- read.csv("chm_wage.csv") 

wage
##          Province   wage
## 1         Beijing 131700
## 2         Tianjin  94534
## 3           Hebei  63036
## 4          Shanxi  60061
## 5      Nei Mongol  66679
## 6        Liaoning  61153
## 7           Jilin  61451
## 8    Heilongjiang  56067
## 9        Shanghai 129795
## 10        Jiangsu  78267
## 11       Zhejiang  80750
## 12          Anhui  65150
## 13         Fujian  67420
## 14        Jiangxi  61429
## 15       Shandong  68081
## 16          Henan  55495
## 17          Hubei  65912
## 18          Hunan  63690
## 19      Guangdong  79183
## 20        Guangxi  63821
## 21         Hainan  67727
## 22      Chongqing  70889
## 23        Sichuan  69419
## 24        Guizhou  71795
## 25         Yunnan  69106
## 26         Xizang 108817
## 27        Shaanxi  65181
## 28          Gansu  63374
## 29        Qinghai  75701
## 30    Ningxia Hui  70298
## 31 Xinjiang Uygur  67932
chn_adm$wage <- wage$wage[match(as.character(chn_adm$NAME_1),wage$Province)]

m <- leaflet()
m <- addTiles(m)
pal <- colorNumeric(palette="Greens",domain=chn_adm$wage)
m <- addPolygons(m,data=chn_adm,weight = 1,popup=~NAME_1,color = ~pal(chn_adm$wage))
m <- addLegend(m, "bottomright", pal = pal, values = chn_adm$wage, title = "Average Annual Wage", labFormat = labelFormat(prefix = "$"), opacity = 0.75)
m

A html page is exported for iframe inclusion into website.

saveWidget(m, file="chinamap.html",selfcontained = FALSE)

3. Wage Distribution in China

We finally use Twitter API to search for tweets according to the user location, i.e. 10,000 miles surrounding us.

#tweet <- search_tweets(" ",n=10000,geocode = "22.39,114.14,10000mi")
tweet <- readRDS("tweet.rds")

## Extract the coordinates
t_geo <- t(sapply(tweet$geo_coords,"["))

## Remove all NA rows
with_coord <- !is.na(t_geo[,1])
lat_lon <- data.frame(lat=t_geo[with_coord,1],lng=t_geo[with_coord,2])
lat_lon <- cbind(lat_lon,popup=paste0('At  ',tweet$created_at[with_coord],',',tweet$screen_name[with_coord],' said, " ',tweet$text[with_coord],'"'))

## Show the first three columns
lat_lon[,c("lat","lng")]
##         lat      lng
## 1  11.95969 121.9267
## 2  14.57218 121.1879
## 3  14.23936 121.1518
## 4  14.85194 120.2593
## 5  14.56311 121.0689
## 6  16.23996 120.6262
## 7  14.60796 121.0803
## 8  14.57167 121.0236
## 9  14.54195 121.0556
## 10 17.32980 120.4455
## 11 14.64369 121.0312
## 12 22.28413 114.1482
## 13 14.53889 121.0577
## 14 11.98162 121.9163
## 15 13.11422 123.6468
## 16 14.73678 121.0553
## 17 15.47550 120.5963
## Call the Map and add the default tile
m <- leaflet()
m <- addTiles(m)
m <- addMarkers(m,data=lat_lon,lng=~lng,lat=~lat,popup=~popup)
m