library(dplyr)
library(tigris)
options(tigris_class = "sf")
library(tidycensus)
library(data.table)
library(tidyquant)
library(ggplot2)
cos<-counties(state="TX", cb=T)
cos$fips<-as.numeric(cos$GEOID)
coests<-get_estimates(geography="county", year=2018, product = "population", output = "wide")
head(coests)
To get these, I cloned the git repository from Johns Hopkins to my local machine
dir<-"/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series/"
list.files(dir)
## [1] "README.md"
## [2] "time_series_covid19_confirmed_global.csv"
## [3] "time_series_covid19_confirmed_US.csv"
## [4] "time_series_covid19_deaths_global.csv"
## [5] "time_series_covid19_deaths_US.csv"
## [6] "time_series_covid19_recovered_global.csv"
files<-list.files(dir, pattern = ".csv", full.names = T)
files
## [1] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_confirmed_global.csv"
## [2] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_confirmed_US.csv"
## [3] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_deaths_global.csv"
## [4] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_deaths_US.csv"
## [5] "/home/corey/git_area/COVID-19/csse_covid_19_data/csse_covid_19_time_series//time_series_covid19_recovered_global.csv"
dat<-read.csv(files[2]) #cases
dat2<-read.csv(files[4]) #deaths
#clean case data
test<-dat%>%
filter(Province_State %in% unique(fips_codes$state_name)[1:51])%>%
mutate(cofips = formatC(FIPS, width=5, format="d", flag="0"))%>%
mutate(st=substr(cofips, 1,2))%>%
#filter(st=="48")%>%
select(names(.)[c(-1:-5, -8:-11)])%>%
setDT(.)%>%
melt(., id.vars = c("cofips"), measure.vars = names(.)[3:79])%>%
arrange(cofips, variable)
test$date<-seq.Date(from=as.Date("2020/1/22"), to=as.Date("2020/4/7"), by="day")
#clean death data
test2<-dat2%>%
filter(Province_State %in% unique(fips_codes$state_name)[1:51])%>%
mutate(cofips = formatC(FIPS, width=5, format="d", flag="0"))%>%
mutate(st=substr(cofips, 1,2))%>%
#filter(st=="48")%>%
filter(is.na(cofips)==F)%>%
select(names(.)[c(-1:-5, -8:-12)])%>%
setDT(.)%>%
melt(., id.vars = c("cofips"), measure.vars = names(.)[3:79])%>%
arrange(cofips, variable)
test2$date<-seq.Date(from=as.Date("2020/1/22"), to=as.Date("2020/4/7"), by="day")
test2$deaths<-test2$value
names(test)<-c("cofips", "day", "value", "date")
names(test2)<-c("cofips", "day", "value", "date", "deaths")
test$deaths<-test2$deaths
load(url("https://github.com/coreysparks/data/blob/master/arf2018.Rdata?raw=true"))
arfs<-arf2018%>%
select(f0002013, f00011, f00012, f1198417, f0978718 )%>%
mutate(cofips=paste(f00011, f00012, sep=""), hpsa=f0978718)#%>%
#filter(f00011=="48")
test2<-merge(test, arfs, by.x="cofips", by.y="cofips")
test2<-merge(test2, coests, by.x="cofips", by.y="GEOID")
head(test2)
test2%>%
filter(date>"2020/3/1")%>%
mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
.$f0002013=="02" ~ "metro 250,000 to 1 million population",
.$f0002013=="03" ~ "metro <250,000 population",
.$f0002013=="04" ~ "Urban population of 20,000+, metro adjacent",
.$f0002013=="05" ~ "Urban population of 20,000+, not adjacent",
.$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
.$f0002013=="07" ~ "Urban population of 2,500-19,999, not adjacent",
.$f0002013=="08" ~ "Rural or <2,500 urban population, metro adjacent",
.$f0002013=="09" ~ "Rural or <2,500 urban population, not adjacent")))%>%
group_by(Place, date)%>%
dplyr::summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
#mutate(ch=lag(ncases,2))%>%
#filter(f0002013!="01")%>%
ggplot(aes(y=cfr, x=date))+#geom_point(aes(group=Place, color=Place))+ylim(c(0, 5))+
geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
labs(title = "US COVID-19 Case Fatality Ratio",
subtitle = "March 1 - April 7, 2020. 4 day moving average",
caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
x = "Date",
y = "Fatalities per 100 Cases",
color = "RUCC Code") +
theme_minimal()+ scale_color_brewer(type = "qual", palette = "Paired")+ theme(legend.position="bottom")#+scale_y_continuous(trans='log2')
ggsave(filename="~/covidcfr.png", height=10, width=12)
test2%>%
filter(date>"2020/3/1")%>%
#filter(substr(cofips,1,2)=="48")%>%
mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
.$f0002013=="02" ~ "metro 250,000 to 1 million population",
.$f0002013=="03" ~ "metro < 250,000 population",
.$f0002013=="04" ~ "Urban population of 20,000 +, metro adjacent",
.$f0002013=="05" ~ "Urban population of 20,000 +, metro adjacent",
.$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
.$f0002013=="07" ~ "Urban population of 2,500-19,999,metro adjacent",
.$f0002013=="08" ~ "Rural or < 2,500 urban population, metro adjacent",
.$f0002013=="09" ~ "Rural or < 2,500 urban population, not adjacent")))%>%
group_by(Place, date)%>%
summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
#mutate(ch=lag(ncases,2))%>%
#filter(f0002013!="01")%>%
ggplot(aes(y=rate, x=date))+#geom_line(aes(group=Place, color=stringr::str_wrap(Place,width = 15)), lwd=1.2)+
geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
labs(title = "US COVID-19 Incidence Rates",
subtitle = "March 1 - April 7, 2020: 4 day moving average",
caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
x = "Date",
y = "Rate per 100,000 population",
color = "RUCC Code") +
theme_minimal()+scale_color_brewer(type = "qual", palette = "Paired")+theme(legend.position="bottom")#+scale_y_continuous(trans='log2')
ggsave(filename="~/covidrate.png", height=10, width=12)
test2%>%
filter(date>"2020/3/1")%>%
filter(substr(cofips,1,2)=="48")%>%
mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
.$f0002013=="02" ~ "metro 250,000 to 1 million population",
.$f0002013=="03" ~ "metro <250,000 population",
.$f0002013=="04" ~ "Urban population of 20,000+, metro adjacent",
.$f0002013=="05" ~ "Urban population of 20,000+, not adjacent",
.$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
.$f0002013=="07" ~ "Urban population of 2,500-19,999, not adjacent",
.$f0002013=="08" ~ "Rural or <2,500 urban population, metro adjacent",
.$f0002013=="09" ~ "Rural or <2,500 urban population, not adjacent")))%>%
group_by(Place, date)%>%
dplyr::summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
#mutate(ch=lag(ncases,2))%>%
ggplot(aes(y=cfr, x=date))+#geom_point(aes(group=Place, color=Place))+ylim(c(0, 5))+
geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
labs(title = "Texas COVID-19 Case Fatality Ratio",
subtitle = "March 1 - April 7, 2020. 4 day moving average",
caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
x = "Date",
y = "Fatalities per 100 Cases",
color = "RUCC Code") +
theme_minimal()+ scale_color_brewer(type = "qual", palette = "Paired")+ theme(legend.position="bottom")#+scale_y_continuous(trans='log2')
ggsave(filename="~/txcovidcfr.png", height=10, width=12)
test2%>%
filter(date>"2020/3/1")%>%
filter(substr(cofips,1,2)=="48")%>%
mutate(Place = as.factor(case_when(.$f0002013=="01" ~ "metro >1 million population",
.$f0002013=="02" ~ "metro 250,000 to 1 million population",
.$f0002013=="03" ~ "metro < 250,000 population",
.$f0002013=="04" ~ "Urban population of 20,000 +, metro adjacent",
.$f0002013=="05" ~ "Urban population of 20,000 +, metro adjacent",
.$f0002013=="06" ~ "Urban population of 2,500-19,999, metro adjacent",
.$f0002013=="07" ~ "Urban population of 2,500-19,999,metro adjacent",
.$f0002013=="08" ~ "Rural or < 2,500 urban population, metro adjacent",
.$f0002013=="09" ~ "Rural or < 2,500 urban population, not adjacent")))%>%
group_by(Place, date)%>%
summarise(ncases=sum(value), ndeaths=sum(deaths), poptot=sum(POP))%>%
mutate(rate = 100000*(ncases/poptot), cfr= 100*(ndeaths/ncases) )%>%
#mutate(ch=lag(ncases,2))%>%
#filter(f0002013!="01")%>%
ggplot(aes(y=rate, x=date))+#geom_line(aes(group=Place, color=stringr::str_wrap(Place,width = 15)), lwd=1.2)+
geom_ma(aes(group=Place, color=stringr::str_wrap(Place,width = 15)),n=4,lty=1, lwd=1.1)+
labs(title = "TX COVID-19 Incidence Rates",
subtitle = "March 1 - April 7, 2020 - 4 day moving average",
caption = "Source: https://github.com/CSSEGISandData/COVID-19 \n Calculations by Corey S. Sparks, Ph.D.",
x = "Date",
y = "Rate per 100,000 population",
color = "RUCC Code") +
theme_minimal()+scale_color_brewer(type = "qual", palette = "Paired")+theme(legend.position="bottom")#+scale_y_continuous(trans='log2')
ggsave(filename="~/txcovidrate.png", height=10, width=12)
cos$fips<-as.character(cos$fips)
geodat<-geo_join(cos, test,"fips","cofips" )
library(INLA)
inla.pardiso()
w#quantile(mdat$confirmed, na.rm=T,p=seq(0,1,length.out = 7))
library(ggplot2)
require(scales)
options(scipen=10000)
brks<- quantile(geodat$value, na.rm=T,p=seq(0,1,length.out = 7))
brks
library(scales)
brks<-c(0,5,10,15,50,100,2000)
labs<-scales::comma(x = brks)
mdat%>%
mutate(cases=cut(confirmed, breaks =brks, lables=labs))%>%
ggplot()+geom_sf(aes(fill=cases))+
scale_fill_brewer()