library(dplyr)
library(tigris)
options(tigris_class = "sf")
library(tidycensus)
library(data.table)
library(tidyquant)
library(ggplot2)

Spatial data

cos<-counties(state="TX", cb=T)
cos$fips<-as.numeric(cos$GEOID)

Population estimates data

coests<-get_estimates(geography="county", year=2018, product = "population", output = "wide")
head(coests)

COVID data

To get these, I cloned the git repository from Johns Hopkins to my local machine

dir<-"/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series/"

list.files(dir)
## [1] "README.md"                               
## [2] "time_series_covid19_confirmed_global.csv"
## [3] "time_series_covid19_confirmed_US.csv"    
## [4] "time_series_covid19_deaths_global.csv"   
## [5] "time_series_covid19_deaths_US.csv"       
## [6] "time_series_covid19_recovered_global.csv"
files<-list.files(dir, pattern = ".csv", full.names = T)
files
## [1] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_confirmed_global.csv"
## [2] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_confirmed_US.csv"    
## [3] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_deaths_global.csv"   
## [4] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_deaths_US.csv"       
## [5] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_recovered_global.csv"
dat<-read.csv(files[2]) #cases
dat2<-read.csv(files[4]) #deaths

#clean case data
test<-dat%>%
  filter(Province_State %in% unique(fips_codes$state_name)[1:51])%>%
  mutate(cofips = formatC(FIPS, width=5, format="d", flag="0"))%>%
  mutate(st=substr(cofips, 1,2))%>%
  #filter(st=="48")%>%
  select(names(.)[c(-1:-5, -8:-11)])%>%
  setDT(.)%>%
  melt(., id.vars = c("cofips"), measure.vars = names(.)[3:79])%>%
  arrange(cofips, variable)

test$date<-seq.Date(from=as.Date("2020/1/22"), to=as.Date("2020/4/7"), by="day")

#clean death data
test2<-dat2%>%
  filter(Province_State %in% unique(fips_codes$state_name)[1:51])%>%
  mutate(cofips = formatC(FIPS, width=5, format="d", flag="0"))%>%
  mutate(st=substr(cofips, 1,2))%>%
  #filter(st=="48")%>%
  filter(is.na(cofips)==F)%>%
  select(names(.)[c(-1:-5, -8:-12)])%>%
  setDT(.)%>%
  melt(., id.vars = c("cofips"), measure.vars = names(.)[3:79])%>%
  arrange(cofips, variable)

test2$date<-seq.Date(from=as.Date("2020/1/22"), to=as.Date("2020/4/7"), by="day")
test2$deaths<-test2$value

names(test)<-c("cofips", "day", "value", "date")
names(test2)<-c("cofips", "day", "value", "date", "deaths")

test$deaths<-test2$deaths

Area resource file

load(url("https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"))
arfs<-arf2018%>%
  select(f0002013, f00011, f00012,  f1198417, f0978718 )%>%
  mutate(cofips=paste(f00011, f00012, sep=""),  hpsa=f0978718)#%>%
  #filter(f00011=="48")

Merge data

test2<-merge(test, arfs, by.x="cofips", by.y="cofips")

test2<-merge(test2, coests, by.x="cofips", by.y="GEOID")
head(test2)

Plotting - US data

test2%>%
  filter(date>"2020/3/1")%>%
  mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
                                     .$f0002013=="02" ~ "metro 250,000 to 1 million population",
                                     .$f0002013=="03" ~ "metro <250,000 population",
                                     .$f0002013=="04" ~ "Urban population of 20,000+, metro adjacent",
                                     .$f0002013=="05" ~ "Urban population of 20,000+, not adjacent",
                                     .$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
                                     .$f0002013=="07" ~ "Urban population of 2,500-19,999, not adjacent",
                                     .$f0002013=="08" ~ "Rural or <2,500 urban population, metro adjacent",
                                     .$f0002013=="09" ~ "Rural or <2,500 urban population, not adjacent")))%>%
  group_by(Place, date)%>%
  dplyr::summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
  mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
  #mutate(ch=lag(ncases,2))%>%
  #filter(f0002013!="01")%>%
  ggplot(aes(y=cfr, x=date))+#geom_point(aes(group=Place, color=Place))+ylim(c(0, 5))+
  geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
  labs(title = "US COVID-19 Case Fatality Ratio",
       subtitle = "March 1 - April 7, 2020. 4 day moving average",
       caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
       x = "Date",
       y = "Fatalities per 100 Cases",
       color = "RUCC Code") + 
  theme_minimal()+ scale_color_brewer(type = "qual", palette = "Paired")+  theme(legend.position="bottom")#+scale_y_continuous(trans='log2')

ggsave(filename="~/covidcfr.png", height=10, width=12)
test2%>%
  filter(date>"2020/3/1")%>%
  #filter(substr(cofips,1,2)=="48")%>%
  mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
                                     .$f0002013=="02" ~ "metro 250,000 to 1 million population",
                                     .$f0002013=="03" ~ "metro < 250,000 population",
                                     .$f0002013=="04" ~ "Urban population of 20,000 +, metro adjacent",
                                     .$f0002013=="05" ~ "Urban population of 20,000 +, metro adjacent",
                                     .$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
                                     .$f0002013=="07" ~ "Urban population of 2,500-19,999,metro adjacent",
                                     .$f0002013=="08" ~ "Rural or < 2,500 urban population, metro adjacent",
                                     .$f0002013=="09" ~ "Rural or < 2,500 urban population, not adjacent")))%>%
  group_by(Place, date)%>%
  summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
  mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
  #mutate(ch=lag(ncases,2))%>%
  #filter(f0002013!="01")%>%
  ggplot(aes(y=rate, x=date))+#geom_line(aes(group=Place, color=stringr::str_wrap(Place,width = 15)), lwd=1.2)+
  geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
  labs(title = "US COVID-19 Incidence Rates",
       subtitle = "March 1 - April 7, 2020: 4 day moving average",
       caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
       x = "Date",
       y = "Rate per 100,000 population",
       color = "RUCC Code") + 
  theme_minimal()+scale_color_brewer(type = "qual", palette = "Paired")+theme(legend.position="bottom")#+scale_y_continuous(trans='log2')

ggsave(filename="~/covidrate.png", height=10, width=12)

Plotting - Texas data

test2%>%
  filter(date>"2020/3/1")%>%
  filter(substr(cofips,1,2)=="48")%>%
  mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
                                     .$f0002013=="02" ~ "metro 250,000 to 1 million population",
                                     .$f0002013=="03" ~ "metro <250,000 population",
                                     .$f0002013=="04" ~ "Urban population of 20,000+, metro adjacent",
                                     .$f0002013=="05" ~ "Urban population of 20,000+, not adjacent",
                                     .$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
                                     .$f0002013=="07" ~ "Urban population of 2,500-19,999, not adjacent",
                                     .$f0002013=="08" ~ "Rural or <2,500 urban population, metro adjacent",
                                     .$f0002013=="09" ~ "Rural or <2,500 urban population, not adjacent")))%>%
  group_by(Place, date)%>%
  dplyr::summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
  mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
  #mutate(ch=lag(ncases,2))%>%
  ggplot(aes(y=cfr, x=date))+#geom_point(aes(group=Place, color=Place))+ylim(c(0, 5))+
  geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
  labs(title = "Texas COVID-19 Case Fatality Ratio",
       subtitle = "March 1 - April 7, 2020. 4 day moving average",
       caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
       x = "Date",
       y = "Fatalities per 100 Cases",
       color = "RUCC Code") + 
  theme_minimal()+ scale_color_brewer(type = "qual", palette = "Paired")+  theme(legend.position="bottom")#+scale_y_continuous(trans='log2')

ggsave(filename="~/txcovidcfr.png", height=10, width=12)
test2%>%
  filter(date>"2020/3/1")%>%
  filter(substr(cofips,1,2)=="48")%>%
  mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
                                     .$f0002013=="02" ~ "metro 250,000 to 1 million population",
                                     .$f0002013=="03" ~ "metro < 250,000 population",
                                     .$f0002013=="04" ~ "Urban population of 20,000 +, metro adjacent",
                                     .$f0002013=="05" ~ "Urban population of 20,000 +, metro adjacent",
                                     .$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
                                     .$f0002013=="07" ~ "Urban population of 2,500-19,999,metro adjacent",
                                     .$f0002013=="08" ~ "Rural or < 2,500 urban population, metro adjacent",
                                     .$f0002013=="09" ~ "Rural or < 2,500 urban population, not adjacent")))%>%
  group_by(Place, date)%>%
  summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
  mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
  #mutate(ch=lag(ncases,2))%>%
  #filter(f0002013!="01")%>%
  ggplot(aes(y=rate, x=date))+#geom_line(aes(group=Place, color=stringr::str_wrap(Place,width = 15)), lwd=1.2)+
  geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
  labs(title = "TX COVID-19 Incidence Rates",
       subtitle = "March 1 - April 7, 2020 - 4 day moving average",
       caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
       x = "Date",
       y = "Rate per 100,000 population",
       color = "RUCC Code") + 
  theme_minimal()+scale_color_brewer(type = "qual", palette = "Paired")+theme(legend.position="bottom")#+scale_y_continuous(trans='log2')

ggsave(filename="~/txcovidrate.png", height=10, width=12)

Maps of Texas cases

cos$fips<-as.character(cos$fips)

geodat<-geo_join(cos, test,"fips","cofips" )

library(INLA)
inla.pardiso()
w#quantile(mdat$confirmed, na.rm=T,p=seq(0,1,length.out = 7))
library(ggplot2)
require(scales)
options(scipen=10000)
brks<- quantile(geodat$value, na.rm=T,p=seq(0,1,length.out = 7))
brks

library(scales)
brks<-c(0,5,10,15,50,100,2000)
labs<-scales::comma(x = brks)
mdat%>%
  mutate(cases=cut(confirmed, breaks =brks, lables=labs))%>%
  ggplot()+geom_sf(aes(fill=cases))+
  scale_fill_brewer()