rm(list=ls())
library(data.table)
library(ggplot2)
library(rgdal)
library(rgeos)
library(scales)
library(tidyverse)
library(dplyr)
library(fst)
library(lfe)
library(stargazer)
library(gganimate)
library(jsonlite)
library(lubridate)
sci <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/facebook/gadm1_nuts2_gadm1_nuts2_aug2020.tsv")
sci[,user_country:=substr(user_loc,1,3)]
sci[,fr_country:=substr(fr_loc,1,3)]
sci <- sci[user_country=="LKA" & fr_country=="LKA"]
sci[,user_loc:=as.numeric(substr(user_loc,4,6))]
sci[,fr_loc:=as.numeric(substr(fr_loc,4,6))]
sci_wide <- dcast(sci[,c("user_loc","fr_loc","scaled_sci")],user_loc~fr_loc)
lka_shp <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/facebook/lka_shp","gadm36_LKA_1")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\facebook\lka_shp", layer: "gadm36_LKA_1"
## with 25 features
## It has 10 fields
lka_shp.data <- data.table(lka_shp@data)
lka_shp.data[,id:=substr(GID_1,5,10)]
lka_shp.data[,id:=as.numeric(substr(id,1,nchar(id)-2))]
lka_shp.data[,NAME_1:=as.character(NAME_1)]
lka_shp.data <- data.table(lka_shp.data)
lka_shp <- fortify(lka_shp,region="GID_1")
lka_shp <- data.table(lka_shp)
lka_shp[,id:=substr(id,5,10)]
lka_shp[,id:=as.numeric(substr(id,1,nchar(id)-2))]
gampaha <- sci[user_loc==7,c("fr_loc","scaled_sci")]
gampaha <- gampaha %>% mutate(SCId=ntile(scaled_sci,10))
gampaha <- data.table(gampaha)
i=7 # Gampaha
temp <- merge(lka_shp,gampaha,by.x="id",by.y="fr_loc")
Used Facebook’s Social Connected Index (SCI) [https://data.humdata.org/dataset/social-connectedness-index] to rank the connectedness to Gampaha district. SCI was rescaled to be between 0 and 10, 10 being the highest connectedness. In the map below darker colors indicate higher connectedness to Gampaha district.
ggplot()+ geom_polygon(data=temp, aes(x=long,y=lat,group=group,fill=SCId),color="black")+
scale_fill_gradientn(colors=c("ivory1","honeydew1","darkseagreen3","darkturquoise","dodgerblue4"))+theme_minimal()+labs(x="",y="",fill ="Connectedness")+theme_minimal()+theme(legend.position = "bottom",axis.text = element_blank())
The figure below shows the correlation between the connectedness to Gampaha district (horizontal axis) and the number of COVID-19 cases (vertical axis). The figure suggests a positive correlation between social connectedness to Gampaha district and spred of COVID-19.
covidcases <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/Exploring/slcovid10192020.csv")
covidcases <- data.table(covidcases)
covidcases <- merge(covidcases,lka_shp.data[,c("NAME_1","id")],by.x="district",by.y="NAME_1",all.x=T)
covidcases[is.na(covidcases)] <-0
covidcases <- merge(covidcases,gampaha,by.x="id",by.y="fr_loc")
covidcases <- covidcases %>% mutate(SCId=ntile(scaled_sci,10))
covidcases <- data.table(covidcases)
covidcases[,SCId:=round(SCId,0)]
ggplot(covidcases[district != "Gampaha"],aes(x=round(SCId,0),y=log(1+cases)))+geom_text(aes(label=district),size=2.5,position=position_jitter(width=0.75,height=0.5))+stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 1)+scale_y_continuous(breaks = 0:5,
labels = round(c(exp(0:5)), 0))+theme_minimal()+labs(x="Connectedness to Gampaha district",y="COVID-19 cases as of 19 Oct 2020")
We cannot replicate the correlation figure above using more granular geographical regions since the data on COVID-19 cases are only available at district level (https://covidsl.com/). However, we can use social connectedness at more granular level to predict the potential hotspots.
sci2 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/facebook/gadm1_nuts3_counties_gadm1_nuts3_counties_Aug2020.tsv")
sci2[,user_country:=substr(user_loc,1,3)]
sci2[,fr_country:=substr(fr_loc,1,3)]
sci2 <- sci2[user_country=="LKA" & fr_country=="LKA"]
sci2 <- sci2[user_loc=="LKA7_108"]
sci2 <- sci2 %>% mutate(SCId=ntile(scaled_sci,100))
sci2 <- data.table(sci2)
lka_shp2 <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/facebook/lka_shp","gadm36_LKA_2")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\facebook\lka_shp", layer: "gadm36_LKA_2"
## with 323 features
## It has 13 fields
lka_shp2.data <- data.table(lka_shp2@data)
lka_shp2.data[,GID_1:=as.character(GID_1)]
lka_shp2.data[,rn:=.I]
lka_shp2.data[,loc:=paste0("LKA",substr(GID_1,5,(nchar(GID_1)-2)),"_",rn)]
lka_shp2.data[,GID_2:=as.character(GID_2)]
lka_shp2.data[,NAME_2:=as.character(NAME_2)]
lka_shp2.data <- lka_shp2.data[,c("GID_2","loc","NAME_2")]
lka_shp2 <- fortify(lka_shp2,region="GID_2")
lka_shp2 <- data.table(lka_shp2)
lka_shp2 <- merge(lka_shp2,lka_shp2.data,by.x="id",by.y="GID_2")
lka_shp2 <- merge(lka_shp2,sci2,by.x="loc",by.y="fr_loc")
The figure below provides the connectedness to Minuwangoda. The dark red indicates Minuwangoda.
ggplot()+geom_polygon(data=lka_shp2[loc!="LKR7_108"], aes(x=long,y=lat,group=group,fill=SCId/10),color="black")+
geom_polygon(data=lka_shp2[id=="LKA.7.10_1"], aes(x=long,y=lat,group=group),fill="darkred",color="black")+
scale_fill_gradientn(colors=c("ivory1","honeydew1","darkseagreen3","darkturquoise","dodgerblue4"))+theme_minimal()+labs(x="",y="",fill ="Connectedness")+theme_minimal()+theme(legend.position = "bottom",axis.text = element_blank())