2021/12/27

In the evening

Dinner

  • Eat spaghetti
  • Drink wine

A scatterplot.

A scatterplot.

Going to sleep

  • Get in bed
  • Count sheep

Exploring COVID-19 data

{nCov2019}

安裝package

#remotes::install_github("yulab-smu/nCov2019", dependencies = TRUE)

Statistic query

Data query is simple as one command:

library("nCov2019")
res <- query()
## last update: 2021-12-20 
## Gloabl total  275036482  cases; and  5371168  deaths
## Gloabl total affect country or areas: 224
## Gloabl total recovered cases: 101413
## last update: 2021-12-20 
## Total Candidates Programs : 51 
## Total Candidates Programs : 84

Data types

names(res)
## [1] "latest"       "global"       "historical"   "vaccine"      "therapeutics"
x<- res$latest
print(x)
## last update: 2021-12-20

Global

head(x["Global"]) #return all global countries
##   country    cases deaths recovered   active todayCases todayDeaths
## 1      UK 11361387 147218   9783355  1430814      82886          45
## 2     USA 51765714 827323  40539875 10398516      66751         109
## 3  France  8625849 121493   7506909   997447      48473          75
## 4  Russia 10214790 297203   8984921   932666      27967        1023
## 5   Italy  5389155 135641   4891239   362275      24259          97
## 6 Germany  6812736 108963   5753300   950473      24190         127
##   todayRecovered population     tests    updated
## 1          41501   68408203 384287490 2021-12-20
## 2          24730  333844797 786478329 2021-12-20
## 3           8194   65485052 178020146 2021-12-20
## 4          32655  146026119 235100000 2021-12-20
## 5           9403   60331591 130368736 2021-12-20
## 6          29200   84176291  87852398 2021-12-20

x[c("USA","India")] # return only for USA and India 
##    country    cases deaths recovered   active todayCases todayDeaths
## 2      USA 51765714 827323  40539875 10398516      66751         109
## 14   India 34746838 477554  34178940    90344       6563         132
##    todayRecovered population     tests    updated
## 2           24730  333844797 786478329 2021-12-20
## 14           7469 1399865149 664109365 2021-12-20

s

x[c("USA","India")] # return only for USA and India 
##    country    cases deaths recovered   active todayCases todayDeaths
## 2      USA 51765714 827323  40539875 10398516      66751         109
## 14   India 34746838 477554  34178940    90344       6563         132
##    todayRecovered population     tests    updated
## 2           24730  333844797 786478329 2021-12-20
## 14           7469 1399865149 664109365 2021-12-20

j

df = x["Global"]
head(df[order(df$cases, decreasing = T),])  
##    country    cases deaths recovered   active todayCases todayDeaths
## 2      USA 51765714 827323  40539875 10398516      66751         109
## 14   India 34746838 477554  34178940    90344       6563         132
## 37  Brazil 22213762 617838  21414318   181606       1419          54
## 1       UK 11361387 147218   9783355  1430814      82886          45
## 4   Russia 10214790 297203   8984921   932666      27967        1023
## 7   Turkey  9171119  80415   8785600   305104      16910         171
##    todayRecovered population     tests    updated
## 2           24730  333844797 786478329 2021-12-20
## 14           7469 1399865149 664109365 2021-12-20
## 37              0  214771691  63776166 2021-12-20
## 1           41501   68408203 384287490 2021-12-20
## 4           32655  146026119 235100000 2021-12-20
## 7           22858   85667433 114375538 2021-12-20

C

x = res$latest
head(x$detail)  # more detail data 
##      updated country countryInfo._id countryInfo.iso2 countryInfo.iso3
## 1 2021-12-20      UK             826               GB              GBR
## 2 2021-12-20     USA             840               US              USA
## 3 2021-12-20  France             250               FR              FRA
## 4 2021-12-20  Russia             643               RU              RUS
## 5 2021-12-20   Italy             380               IT              ITA
## 6 2021-12-20 Germany             276               DE              DEU
##   countryInfo.lat countryInfo.long                           countryInfo.flag
## 1         54.0000          -2.0000 https://disease.sh/assets/img/flags/gb.png
## 2         38.0000         -97.0000 https://disease.sh/assets/img/flags/us.png
## 3         46.0000           2.0000 https://disease.sh/assets/img/flags/fr.png
## 4         60.0000         100.0000 https://disease.sh/assets/img/flags/ru.png
## 5         42.8333          12.8333 https://disease.sh/assets/img/flags/it.png
## 6         51.0000           9.0000 https://disease.sh/assets/img/flags/de.png
##      cases todayCases deaths todayDeaths recovered todayRecovered   active
## 1 11361387      82886 147218          45   9783355          41501  1430814
## 2 51765714      66751 827323         109  40539875          24730 10398516
## 3  8625849      48473 121493          75   7506909           8194   997447
## 4 10214790      27967 297203        1023   8984921          32655   932666
## 5  5389155      24259 135641          97   4891239           9403   362275
## 6  6812736      24190 108963         127   5753300          29200   950473
##   critical casesPerOneMillion deathsPerOneMillion     tests testsPerOneMillion
## 1      875             166082                2152 384287490            5617564
## 2    15741             155059                2478 786478329            2355820
## 3     2933             131722                1855 178020146            2718485
## 4     2300              69952                2035 235100000            1609986
## 5      966              89326                2248 130368736            2160870
## 6     4636              80934                1294  87852398            1043672
##   population     continent oneCasePerPeople oneDeathPerPeople oneTestPerPeople
## 1   68408203        Europe                6               465                0
## 2  333844797 North America                6               404                0
## 3   65485052        Europe                8               539                0
## 4  146026119        Europe               14               491                1
## 5   60331591        Europe               11               445                0
## 6   84176291        Europe               12               773                1
##   activePerOneMillion recoveredPerOneMillion criticalPerOneMillion
## 1            20915.82              143014.35                 12.79
## 2            31147.76              121433.30                 47.15
## 3            15231.67              114635.46                 44.79
## 4             6386.98               61529.55                 15.75
## 5             6004.73               81072.60                 16.01
## 6            11291.46               68348.22                 55.07

Historical data

Z<- res$historical
print(Z)
## last update: 2021-12-19
head(Z["Global"])
##         country       date cases deaths recovered
## 1   Afghanistan 2020-01-22     0      0         0
## 197 Afghanistan 2020-01-23     0      0         0
## 393 Afghanistan 2020-01-24     0      0         0
## 589 Afghanistan 2020-01-25     0      0         0
## 785 Afghanistan 2020-01-26     0      0         0
## 981 Afghanistan 2020-01-27     0      0         0

Country

head(Z[c("China","UK","USA")])  #head(Z[country,province])
##      country       date cases deaths recovered
## 37     China 2020-01-22   548     17        28
## 233    China 2020-01-23   643     18        30
## 429    China 2020-01-24   920     26        36
## 625    China 2020-01-25  1406     42        39
## 821    China 2020-01-26  2075     56        49
## 1017   China 2020-01-27  2877     82        58

convert

Vaccine

X <-res$ vaccine
summary(X)
##          phase candidates
## 1      Phase 3         10
## 2    Phase 2/3          3
## 3      Phase 2          2
## 4    Phase 1/2          9
## 5      Phase 1         13
## 6 Pre-clinical         14

All

head(X["all"])
##    id candidate
## 1 id1    BNT162
## 2 id2 mRNA-1273
## 3 id3  Ad5-nCoV
## 4 id4   AZD1222
## 5 id5 CoronaVac
## 6 id6   Covaxin
##                                                                  mechanism
## 1                                                       mRNA-based vaccine
## 2                                                       mRNA-based vaccine
## 3                           Recombinant vaccine (adenovirus type 5 vector)
## 4 Replication-deficient viral vector vaccine (adenovirus from chimpanzees)
## 5                        Inactivated vaccine (formalin with alum adjuvant)
## 6                                                      Inactivated vaccine
##                   sponsors trialPhase
## 1         Pfizer, BioNTech    Phase 3
## 2                  Moderna    Phase 3
## 3        CanSino Biologics    Phase 3
## 4 The University of Oxford    Phase 3
## 5                  Sinovac    Phase 3
## 6           Bharat Biotech    Phase 3
##                                              institutions
## 1 Multiple study sites in Europe, North America and China
## 2  Kaiser Permanente Washington Health Research Institute
## 3                                         Tongji Hospital
## 4                          The University of Oxford,&nbsp
## 5              Sinovac Research and Development Co., Ltd.
## 6

summary

X <- res$therapeutics
summary(X)
##                                     phase candidates
## 1                                 Phase 3         13
## 2                             Phase 2/3/4          3
## 3                               Phase 2/3         28
## 4                               Phase 1/2          1
## 5                                 Phase 2         15
## 6                               Phase 3/4          2
## 7    No longer being studied for COVID-19          4
## 8                                 Various          1
## 9                                 Phase 1          4
## 10                             Phase 2b/3          2
## 11 No longer being developed for COVID-19          1
## 12                            Phase 1/2/3          1
## 13                              Phase 1/4          1
## 14                            Phase 1b/2a          1
## 15                                Phase 4          1
## 16                              Phase 2/2          1
## 17                               Phase 1b          4
## 18                              Phase 2/4          1

head

ID

X[ID="id1"] 
## [1] "Background: Molnupiravir (Lagevrio, formerly known as MK-4482 and EIDD-2801) is an oral broad-spectrum antiviral that has shown effectiveness against infections such as influenza, chikungunya, Ebola and equine encephalitis. It has a similar mechanism of action to remdesivir and prevents replication of the virus. In animal models, molnupiravir inhibited the replication of SARS-CoV-2 and MERS in mice, SARS-CoV-2 in Syrian hamsters, and blocks transmission of SARS-CoV-2 in ferrets, according to preclinical papers.&nbsp;Regulatory actions: Australia: Provisional determination status has been granted.&nbsp;Bangladesh: Bangladesh has authorized the use of molnupiravir and is in the process of authorizing generic manufacturers to supply the drug to its citizens.Canada: Merck Canada has initiated a rolling submission. &nbsp;EU: EMA started a rolling review of molnupiravir as of 25 October and noted that CHMP will provide EU-wide recommendations for early use of the treatment prior to authorization, given increasing case numbers in the region. On 23 November, EMA received an application for marketing authorization of molnupiravir under the name Lagrevrio. The European Commission has listed molnupiravir in its portfolio of ten most promising COVID-19 therapeutics.&nbsp;UK: On 4 November, MHRA approved molnupiravir for use in patients with mild or moderate COVID-19 at high risk of developing severe disease. US: Merck and Ridgeback Bio have applied for an EUA; an advisory committee recommended FDA authorize molnupiravir for emergency use in a 13-10 vote. Trials: A Phase 1 trial of 130 participants who received molnupiravir or placebo has been completed (NCT04392219) as has a Phase 2a safety trial (NCT04405570). A Phase 2/3 trial, the MOVe-IN trial of hospitalized adults (NCT04575584) was terminated, while its outpatient version, MOVe-OUT (NCT04575597) is active, but no longer recruiting. A trial to assess molnupiravir's ability to reduce viral shedding of SARS-CoV-2 (NCT04405739) continues. The Phase 3 MOVe-AHEAD trial, evaluating molnupiravir as post-exposure prophylaxis for individuals living with a person who has tested positive for COVID-19, is currently recruiting (NCT04939428). A generics manufacturer producing and supplying molnupiravir for India, Hetero, is conducting Phase 3 open-label studies on patients with mild and moderate COVID-19.Outcomes: Merck announced results from the Phase 3 MOVe-OUT trial in October 2021, which showed a 50% reduction in hospitalization for non-hospitalized patients with mild or moderate COVID-19 who received the drug. Further results from MOVe-OUT, announced on 26 November, lowered the drug's effectiveness at reducing the risk of hospitalization to 30%. Hetero, a manufacturer that has entered a non-exclusive licensing agreement with Merck regarding molnupiravir, announced results from their own Phase 3 open-label study that showed taking molnupiravir resulted in early clinical improvement and improved median time to clinical improvement compared with standard of care. Early phase results, including from the Phase 1 trial published in Antimicrobial Agents and Chemotherapy, showed molnupiravir was safe and well-tolerated in humans. Results from a Phase 2a trial of a secondary outcome in 202 participants, presented at the CROI meeting, showed a significant reduction in negative SARS-CoV-2 viral culture at 5 days compared with the placebo group. &nbsp;Status: Merck and Ridgeback Biotherapeutics announced on 15 April that they will proceed with the Phase 3 portion of the MOVe-OUT trial, but would not be continuing with the MOVe-IN trial after an analysis revealed molnupiravir was not effective for hospitalized adults with COVID-19. Pending an EUA, the US government has agreed to purchase 1.7 million 5-day treatment courses of molnupiravir for $1.2 billion. Other countries have also taken steps to procure molnupiravir following positive Phase 3 results, such as Australia, Malaysia, South Korea, and Thailand."

Visualization

Plot

X <- res$latest
plot(X)

## x

X <- res$latest
plot(X)

Green

plot(X, type = "tests", palette="Green")

## ggplot It could be also intuitively compare the number of new confirmed cases per day among different countries.

library(ggplot2)
library(dplyr)

ggplot

X <- res$historical
tmp <- X["global"] %>%
  group_by(country) %>%
  arrange(country,date) %>%
  mutate(diff = cases - lag(cases, default =  first(cases))) %>%
  filter(country %in% c("Australia", "Japan", "Italy", "Germany",  "China")) 

ggplot(tmp,aes(date, log(diff+1), color=country)) + geom_line() +
  labs(y="Log2(daily increase cases)") + 
  theme(axis.text = element_text(angle = 15, hjust = 1)) +
  scale_x_date(date_labels = "%Y-%m-%d") + 
  theme_minimal()

plot

Y <- res$historical
plot(Y, region="Global" ,date = "2020-08-01", type="cases")

## Animations plot

Other plots

library(ggplot2)
x <- res$historical
d = x['Japan' ] # you can replace Anhui with any province
d = d[order(d$cases), ]

ggplot(d, 
       aes(date, cases)) +
  geom_col(fill = 'firebrick') + 
  theme_minimal(base_size = 14) +
  xlab(NULL) + ylab(NULL) + 
  scale_x_date(date_labels = "%Y/%m/%d") +
  labs(caption = paste("accessed date:", max(d$date)))

library("dplyr")
library("ggrepel")

x <- res$latest
y <- res$historical

country_list =  x["global"]$country[1:10]

y[country_list]  %>%
subset( date > as.Date("2020-10-01") ) %>%
group_by(country) %>%
arrange(country,date) %>%
mutate(increase = cases - lag(cases, default =  first(cases))) -> df

ggplot(df, aes(x=date, y=increase, color=country  ))+
  geom_smooth() + 
  geom_label_repel(aes(label = paste(country,increase)), 
    data = df[df$date == max(df$date), ], hjust = 1) + 
  labs(x=NULL,y=NULL)+ 
  theme_bw() + theme(legend.position = 'none') 

library('tidyr')
library('ggrepel')
library('ggplot2')
y <- res$historical
country = "India"

y[country] -> d
d <- gather(d, curve, count, -date, -country)

ggplot(d, aes(date, count, color = curve)) + geom_point() + geom_line() + 
  labs(x=NULL,y=NULL,title=paste("Trend of cases, recovered and deaths in", country)) +
    scale_color_manual(values=c("#f39c12", "#dd4b39", "#00a65a")) +
    theme_bw() +   
  geom_label_repel(aes(label = paste(curve,count)), 
                   data = d[d$date == max(d$date), ], hjust = 1) + 
  theme(legend.position = "none",
        axis.text = element_text(angle = 15, hjust = 1)) +
  scale_x_date(date_labels = "%Y-%m-%d")

library('tidyr')
library('ggrepel')
library('ggplot2')
y <- res$historical
d <- y["global"]

d <- d[d$cases > 0,]
length(unique(d$country))
## [1] 196
d <- subset(d,date <= as.Date("2020-3-19"))
max_time <- max(d$date)
min_time <- max_time - 7
d <-  d[d$date >= min_time,]
dd <- d[d$date == max(d$date,na.rm = TRUE),]

d$country <- factor(d$country, 
  levels=unique(dd$country[order(dd$cases)]))
breaks = c(0,1000, 10000, 100000, 10000000)

ggplot(d, aes(date, country)) + 
  geom_tile(aes(fill = cases), color = 'black') + 
  scale_fill_viridis_c(trans = 'log', breaks = breaks, 
  labels = breaks) + 
  xlab(NULL) + ylab(NULL) +
  scale_x_date(date_labels = "%Y-%m-%d") + theme_minimal()

require(dplyr)

y <- res$historical
d <- y["global"]

time = as.Date("2020-03-19")
dd <- filter(d, date == time) %>% 
    arrange(desc(cases)) 

dd = dd[1:40, ]
dd$country = factor(dd$country, levels=dd$country)

dd$angle = 1:40 * 360/40
require(ggplot2)
p <- ggplot(dd, aes(country, cases, fill=cases)) + 
    geom_col(width=1, color='grey90') + 
    geom_col(aes(y=I(5)), width=1, fill='grey90', alpha = .2) +       
    geom_col(aes(y=I(3)), width=1, fill='grey90', alpha = .2) +    
    geom_col(aes(y=I(2)), width=1, fill = "white") +
    scale_y_log10() + 
    scale_fill_gradientn(colors=c("darkgreen", "green", "orange", "firebrick","red"), trans="log") + 
    geom_text(aes(label=paste(country, cases, sep="\n"), 
                  y = cases *.8, angle=angle), 
            data=function(d) d[d$cases > 700,], 
            size=3, color = "white", fontface="bold", vjust=1)  + 
     geom_text(aes(label=paste0(cases, " cases ", country), 
                  y = max(cases) * 2, angle=angle+90), 
            data=function(d) d[d$cases < 700,], 
            size=3, vjust=0) + 
    coord_polar(direction=-1) + 
    theme_void() + 
    theme(legend.position="none") +
    ggtitle("COVID19 global trend", time)
p

require(dplyr)
require(ggplot2)
require(shadowtext)
y <- res$historical
d <- y["global"]



dd <- d %>% 
  as_tibble %>%
  filter(cases > 1000000) %>%
  group_by(country) %>%
  mutate(days_since_1m = as.numeric(date - min(date))) %>%
  ungroup 
  

  

breaks=c(1000, 10000, 20000, 50000, 500000,500000,5000000,20000000)


p <- ggplot(dd, aes(days_since_1m, cases, color = country)) +
  geom_smooth(method='lm', aes(group=1),
              data = dd, 
              color='grey10', linetype='dashed') +
  geom_line(size = 0.8) +
  geom_point(pch = 21, size = 1) +
  scale_y_log10(expand = expansion(add = c(0,0.1)), 
                breaks = breaks, labels = breaks) +
  scale_x_continuous(expand = expansion(add = c(0,1))) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "none",
    plot.margin = margin(3,15,3,3,"mm")
  ) +
  coord_cartesian(clip = "off") +
  geom_shadowtext(aes(label = paste0(" ",country)), hjust=0, vjust = 0, 
                  data = . %>% group_by(country) %>% top_n(1,days_since_1m),
                  bg.color = "white") +
  labs(x = "Number of days since 1,000,000th case", y = "", 
       subtitle = "Total number of cases")
print(p)