Table of Content

1. Prepare workspace and datasets

1.1. Import packages

library(tidyverse) 
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(skimr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

1.2. Load datasets

worldwide <- read_csv("Worldwide Vaccine Data.csv")
## Rows: 180 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Country
## dbl (4): Doses administered per 100 people, Total doses administered, % of p...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(worldwide)
## Rows: 180
## Columns: 5
## $ Country                             <chr> "Afghanistan", "Albania", "Algeria~
## $ `Doses administered per 100 people` <dbl> 8.2, 59.0, 23.0, 8.9, 111.0, 12.0,~
## $ `Total doses administered`          <dbl> 3133227, 1674093, 9989662, 2820134~
## $ `% of population vaccinated`        <dbl> 2.0, 32.0, 14.0, 5.8, 65.0, 8.0, 7~
## $ `% of population fully vaccinated`  <dbl> NA, 26.0, 9.7, 3.1, 46.0, 4.4, 70.~
## Rename variables
worldwide <- worldwide %>%
    rename(country = "Country",
          doses_per_100 = "Doses administered per 100 people",
          total_doses = "Total doses administered",
          pct_pop_vaccinated = "% of population vaccinated",
          pct_pop_fully_vaccinated = "% of population fully vaccinated")

skim_without_charts(worldwide)
Data summary
Name worldwide
Number of rows 180
Number of columns 5
_______________________
Column type frequency:
character 1
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 32 0 180 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
doses_per_100 0 1.00 67.98 52.87 0.2 18.5 61.0 112.5 202
total_doses 0 1.00 33566984.45 177673772.22 31332.0 438282.8 3393275.0 13601571.0 2190792000
pct_pop_vaccinated 2 0.99 38.04 27.22 0.1 12.0 37.5 64.0 94
pct_pop_fully_vaccinated 1 0.99 29.95 25.10 0.1 6.4 26.0 51.5 84
## This dataset is used to extract country and region codes for plotly to run

code <- read_csv("country_code.csv")
## Rows: 249 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): name, alpha-2, alpha-3, iso_3166-2, region, sub-region, intermediat...
## dbl (4): country-code, region-code, sub-region-code, intermediate-region-code
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(code)
## Rows: 249
## Columns: 11
## $ name                       <chr> "Afghanistan", "Åland Islands", "Albania", ~
## $ `alpha-2`                  <chr> "AF", "AX", "AL", "DZ", "AS", "AD", "AO", "~
## $ `alpha-3`                  <chr> "AFG", "ALA", "ALB", "DZA", "ASM", "AND", "~
## $ `country-code`             <dbl> 4, 248, 8, 12, 16, 20, 24, 660, 10, 28, 32,~
## $ `iso_3166-2`               <chr> "ISO 3166-2:AF", "ISO 3166-2:AX", "ISO 3166~
## $ region                     <chr> "Asia", "Europe", "Europe", "Africa", "Ocea~
## $ `sub-region`               <chr> "Southern Asia", "Northern Europe", "Southe~
## $ `intermediate-region`      <chr> NA, NA, NA, NA, NA, NA, "Middle Africa", "C~
## $ `region-code`              <dbl> 142, 150, 150, 2, 9, 150, 2, 19, NA, 19, 19~
## $ `sub-region-code`          <dbl> 34, 154, 39, 15, 61, 39, 202, 419, NA, 419,~
## $ `intermediate-region-code` <dbl> NA, NA, NA, NA, NA, NA, 17, 29, NA, 29, 5, ~
code <- code %>%
     rename(country = name,
          country_code = "alpha-3",
          subregion = "sub-region",
          region_code = "region-code",
          subregion_code = "sub-region-code") %>%
    select(country, country_code, region, region_code, subregion, subregion_code)

skim_without_charts(code)
Data summary
Name code
Number of rows 249
Number of columns 6
_______________________
Column type frequency:
character 4
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 44 0 249 0
country_code 0 1 3 3 0 249 0
region 1 1 4 8 0 5 0
subregion 1 1 9 31 0 17 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
region_code 1 1 65.95 67.35 2 9.00 19 142 150
subregion_code 1 1 179.87 138.33 15 53.75 154 202 419
## This dataset is used to add GDP data to the original datasets

gdp <- read_csv("gdp_csv.csv")
## Rows: 11507 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Country Name, Country Code
## dbl (2): Year, Value
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(gdp)
## Rows: 11,507
## Columns: 4
## $ `Country Name` <chr> "Arab World", "Arab World", "Arab World", "Arab World",~
## $ `Country Code` <chr> "ARB", "ARB", "ARB", "ARB", "ARB", "ARB", "ARB", "ARB",~
## $ Year           <dbl> 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1~
## $ Value          <dbl> 25760683041, 28434203615, 31385499664, 36426909888, 433~
gdp_2016 <- gdp %>%
    filter(Year == 2016) %>%
    rename(country = "Country Name",
          country_code = "Country Code") %>%
    mutate(gdp_billion = round((Value/1000000000),2)) %>%
    select(country, country_code, gdp_billion)

skim_without_charts(gdp_2016)
Data summary
Name gdp_2016
Number of rows 236
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 52 0 236 0
country_code 0 1 3 3 0 236 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
gdp_billion 0 1 2646.4 8606.93 0.03 10.43 56.31 653.43 75845.11
## This dataset is used to add GDP per Capita data to the original datasets

gdp_per_capita <- read_csv("gdp_per_capita.csv")
## Rows: 260 Columns: 32
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Country, Country Code
## dbl (29): 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ...
## lgl  (1): 2019
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(gdp_per_capita)
## Rows: 260
## Columns: 32
## $ Country        <chr> "Aruba", "Afghanistan", "Angola", "Albania", "Arab Worl~
## $ `Country Code` <chr> "ABW", "AFG", "AGO", "ALB", "ARB", "ARE", "ARG", "ARM",~
## $ `1990`         <dbl> 24101.1094, NA, 3089.6834, 2549.4730, 6808.2070, 72906.~
## $ `1991`         <dbl> 25870.7559, NA, 3120.3561, 1909.1140, 6872.2732, 71753.~
## $ `1992`         <dbl> 26533.3439, NA, 2908.1608, 1823.3077, 7255.3284, 71567.~
## $ `1993`         <dbl> 27430.7524, NA, 2190.7682, 2057.4497, 7458.6471, 70082.~
## $ `1994`         <dbl> 28656.5202, NA, 2195.5323, 2289.8731, 7645.6829, 72471.~
## $ `1995`         <dbl> 28648.9900, NA, 2496.1995, 2665.7649, 7774.2074, 74994.~
## $ `1996`         <dbl> 28499.0894, NA, 2794.8969, 2980.0663, 8094.1498, 76848.~
## $ `1997`         <dbl> 30215.9492, NA, 2953.3427, 2717.3621, 8397.5157, 80390.~
## $ `1998`         <dbl> 30512.6839, NA, 3027.3418, 3021.0147, 8797.6626, 77421.~
## $ `1999`         <dbl> 30728.0545, NA, 3037.7212, 3471.6526, 8938.4515, 76654.~
## $ `2000`         <dbl> 33120.0542, NA, 3097.3073, 3861.3342, 9415.6326, 82215.~
## $ `2001`         <dbl> 32117.9123, NA, 3191.2663, 4301.3528, 9584.1083, 80843.~
## $ `2002`         <dbl> 30862.2227, 839.4859, 3564.0960, 4661.3716, 9581.7971, ~
## $ `2003`         <dbl> 31387.2830, 888.1534, 3614.6073, 4994.5188, 9974.6419, ~
## $ `2004`         <dbl> 34176.4646, 885.8408, 3978.6972, 5422.7785, 10937.3161,~
## $ `2005`         <dbl> 35207.5772, 979.2740, 4555.1858, 5865.3062, 11646.4861,~
## $ `2006`         <dbl> 36362.219, 1031.643, 5048.876, 6559.783, 12442.188, 799~
## $ `2007`         <dbl> 37865.4935, 1176.1264, 5697.2513, 7276.3030, 13041.9255~
## $ `2008`         <dbl> 38515.2638, 1218.1182, 6221.4234, 8228.3742, 13739.7278~
## $ `2009`         <dbl> 34693.0868, 1454.6630, 6092.7832, 8814.8109, 13640.8468~
## $ `2010`         <dbl> 33732.8475, 1637.3780, 6230.2970, 9628.0258, 14127.7780~
## $ `2011`         <dbl> 35492.6185, 1626.7648, 6346.3951, 10207.7524, 14518.827~
## $ `2012`         <dbl> 35498.9821, 1806.7639, 6772.5283, 10526.2355, 15423.465~
## $ `2013`         <dbl> 37419.8928, 1874.7656, 6980.4230, 10571.0107, 15824.780~
## $ `2014`         <dbl> 38223.372, 1897.526, 7199.245, 11259.226, 16153.245, 66~
## $ `2015`         <dbl> 38249.0549, 1886.6930, 7096.6006, 11662.0305, 16501.792~
## $ `2016`         <dbl> 38390.2717, 1896.9925, 6756.9351, 11868.1790, 16935.383~
## $ `2017`         <dbl> 39454.6298, 1934.6368, 6650.5849, 12930.1400, 17099.889~
## $ `2018`         <dbl> NA, 1955.0062, 6452.3552, 13364.1554, 17570.1376, 75075~
## $ `2019`         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
capita_2016 <- gdp_per_capita %>%
    rename(country = "Country",
          country_code = "Country Code",
          gdp_per_capita_2016 = "2016") %>%
    select(country, country_code, gdp_per_capita_2016)

skim_without_charts(capita_2016)
Data summary
Name capita_2016
Number of rows 260
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 52 0 260 0
country_code 0 1 3 3 0 260 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
gdp_per_capita_2016 22 0.92 19998.61 20605.85 743.9 4969.3 13643.22 27594.41 123573.6

1.3. Join datasets

check_country_code <- left_join(worldwide, code, by = "country")
check_country_code <- check_country_code %>%
  select(country, country_code)
skim_without_charts(check_country_code)
Data summary
Name check_country_code
Number of rows 180
Number of columns 2
_______________________
Column type frequency:
character 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1.00 4 32 0 180 0
country_code 14 0.92 3 3 0 166 0

We are missing 14 country codes, because there are variants of country names in the two data sets.

missing_cc <- check_country_code %>%
  filter(is.na(country_code))

missing_cc
## # A tibble: 14 x 2
##    country                country_code
##    <chr>                  <chr>       
##  1 Bosnia and Herzegovina <NA>        
##  2 Brunei                 <NA>        
##  3 Cape Verde             <NA>        
##  4 Dominican Rep.         <NA>        
##  5 Guinea-Bissau          <NA>        
##  6 Ivory Coast            <NA>        
##  7 Macau                  <NA>        
##  8 Mainland China         <NA>        
##  9 North Macedonia        <NA>        
## 10 Republic of the Congo  <NA>        
## 11 São Tomé and Príncipe  <NA>        
## 12 U.A.E.                 <NA>        
## 13 U.K.                   <NA>        
## 14 West Bank & Gaza       <NA>
## Update "country" values in worldwide dataset.

worldwide[match("Bosnia and Herzegovina", worldwide$country),1] <- "Bosnia And Herzegovina"
worldwide[match("Brunei", worldwide$country),1] <- "Brunei Darussalam"
worldwide[match("Cape Verde", worldwide$country),1] <- "Cabo Verde"
worldwide[match("Dominican Rep.", worldwide$country),1] <- "Dominican Republic"
worldwide[match("Guinea-Bissau", worldwide$country),1] <- "Guinea Bissau"
worldwide[match("Ivory Coast", worldwide$country),1] <- "Côte D'Ivoire"
worldwide[match("Macau", worldwide$country),1] <- "Macao"
worldwide[match("Mainland China", worldwide$country),1] <- "China"
worldwide[match("North Macedonia", worldwide$country),1] <- "Macedonia"
worldwide[match("Republic of the Congo", worldwide$country),1] <- "Congo (Democratic Republic Of The)"
worldwide[match("São Tomé and Príncipe", worldwide$country),1] <- "Sao Tome and Principe"
worldwide[match("U.A.E.", worldwide$country),1] <- "United Arab Emirates"
worldwide[match("U.K.", worldwide$country),1] <- "United Kingdom"
worldwide[match("West Bank & Gaza", worldwide$country),1] <- "Palestine, State of"
worldwide[match("South Korea", worldwide$country),1] <- "Korea, Republic of"
world <- left_join(worldwide, code, by = "country")
skim_without_charts(world)
Data summary
Name world
Number of rows 180
Number of columns 10
_______________________
Column type frequency:
character 4
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 34 0 180 0
country_code 0 1 3 3 0 180 0
region 0 1 4 8 0 5 0
subregion 0 1 9 31 0 17 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
doses_per_100 0 1.00 67.98 52.87 0.2 18.5 61.0 112.5 202
total_doses 0 1.00 33566984.45 177673772.22 31332.0 438282.8 3393275.0 13601571.0 2190792000
pct_pop_vaccinated 2 0.99 38.04 27.22 0.1 12.0 37.5 64.0 94
pct_pop_fully_vaccinated 1 0.99 29.95 25.10 0.1 6.4 26.0 51.5 84
region_code 0 1.00 73.39 68.67 2.0 2.0 19.0 142.0 150
subregion_code 0 1.00 173.78 131.36 15.0 39.0 154.0 202.0 419
gdp_2016 <- gdp_2016 %>% select(country_code, gdp_billion)
world <- left_join(world, gdp_2016, by = "country_code")
skim_without_charts(world)
Data summary
Name world
Number of rows 180
Number of columns 11
_______________________
Column type frequency:
character 4
numeric 7
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 34 0 180 0
country_code 0 1 3 3 0 180 0
region 0 1 4 8 0 5 0
subregion 0 1 9 31 0 17 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
doses_per_100 0 1.00 67.98 52.87 0.20 18.50 61.0 112.5 2.020000e+02
total_doses 0 1.00 33566984.45 177673772.22 31332.00 438282.75 3393275.0 13601571.0 2.190792e+09
pct_pop_vaccinated 2 0.99 38.04 27.22 0.10 12.00 37.5 64.0 9.400000e+01
pct_pop_fully_vaccinated 1 0.99 29.95 25.10 0.10 6.40 26.0 51.5 8.400000e+01
region_code 0 1.00 73.39 68.67 2.00 2.00 19.0 142.0 1.500000e+02
subregion_code 0 1.00 173.78 131.36 15.00 39.00 154.0 202.0 4.190000e+02
gdp_billion 11 0.94 438.62 1755.83 0.18 10.68 38.3 238.5 1.862447e+04
missing_gdp <- world %>% filter(is.na(gdp_billion))
missing_gdp
## # A tibble: 11 x 11
##    country          doses_per_100 total_doses pct_pop_vaccinat~ pct_pop_fully_v~
##    <chr>                    <dbl>       <dbl>             <dbl>            <dbl>
##  1 Aruba                    146        155285              76               70  
##  2 Cuba                     168      19073986              76               42  
##  3 Curaçao                  118        186219              62               56  
##  4 Djibouti                   6.9       67229               4.2              2.7
##  5 French Polynesia         101        282180              54               47  
##  6 Libya                     22       1501622              20                2.5
##  7 New Caledonia             72        206784              44               28  
##  8 South Sudan                0.9      100621               0.7              0.3
##  9 Syria                      3.1      533949               1.7              1.5
## 10 Taiwan                    58      13856466              50                8.2
## 11 Venezuela                 39      11094206              24               15  
## # ... with 6 more variables: country_code <chr>, region <chr>,
## #   region_code <dbl>, subregion <chr>, subregion_code <dbl>, gdp_billion <dbl>
## Update GDP values in worldwide dataset by Google

world[match("Aruba", world$country),11] <- 2.96
world[match("Cuba", world$country),11] <- 91.37
world[match("Curaçao", world$country),11] <- 3.12
world[match("Djibouti", world$country),11] <- 2.60
world[match("French Polynesia", world$country),11] <- 5.49
world[match("Libya", world$country),11] <- 26.2
world[match("New Caledonia", world$country),11] <- 2.68
world[match("South Sudan", world$country),11] <- 3.50
world[match("Syria", world$country),11] <- 12.37
world[match("Taiwan", world$country),11] <- 543.08
world[match("Venezuela", world$country),11] <- 279.25
capita_2016 <- capita_2016 %>% select(country_code, gdp_per_capita_2016)
world <- left_join(world, capita_2016, by = "country_code")
skim_without_charts(world)
Data summary
Name world
Number of rows 180
Number of columns 12
_______________________
Column type frequency:
character 4
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 34 0 180 0
country_code 0 1 3 3 0 180 0
region 0 1 4 8 0 5 0
subregion 0 1 9 31 0 17 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
doses_per_100 0 1.00 67.98 52.87 0.20 18.50 61.00 112.50 2.020000e+02
total_doses 0 1.00 33566984.45 177673772.22 31332.00 438282.75 3393275.00 13601571.00 2.190792e+09
pct_pop_vaccinated 2 0.99 38.04 27.22 0.10 12.00 37.50 64.00 9.400000e+01
pct_pop_fully_vaccinated 1 0.99 29.95 25.10 0.10 6.40 26.00 51.50 8.400000e+01
region_code 0 1.00 73.39 68.67 2.00 2.00 19.00 142.00 1.500000e+02
subregion_code 0 1.00 173.78 131.36 15.00 39.00 154.00 202.00 4.190000e+02
gdp_billion 0 1.00 417.22 1703.59 0.18 9.34 35.09 225.69 1.862447e+04
gdp_per_capita_2016 9 0.95 20350.56 21954.26 780.91 4693.49 12693.56 29290.47 1.235736e+05
missing_capita <- world %>% filter(is.na(gdp_per_capita_2016))
missing_capita
## # A tibble: 9 x 12
##   country          doses_per_100 total_doses pct_pop_vaccinated pct_pop_fully_v~
##   <chr>                    <dbl>       <dbl>              <dbl>            <dbl>
## 1 Cuba                     168      19073986               76               42  
## 2 Djibouti                   6.9       67229                4.2              2.7
## 3 French Polynesia         101        282180               54               47  
## 4 New Caledonia             72        206784               44               28  
## 5 Somalia                    2.8      430762                1.6              1.2
## 6 South Sudan                0.9      100621                0.7              0.3
## 7 Syria                      3.1      533949                1.7              1.5
## 8 Taiwan                    58      13856466               50                8.2
## 9 Venezuela                 39      11094206               24               15  
## # ... with 7 more variables: country_code <chr>, region <chr>,
## #   region_code <dbl>, subregion <chr>, subregion_code <dbl>,
## #   gdp_billion <dbl>, gdp_per_capita_2016 <dbl>
## Update GDP per Capita values in worldwide dataset by Google

world[match("Cuba", world$country),12] <- 8060
world[match("Djibouti", world$country),12] <- 2602 
world[match("Somalia", world$country),12] <- 187
world[match("French Polynesia", world$country),12] <- 22000
world[match("New Caledonia", world$country),12] <- 32831
world[match("South Sudan", world$country),12] <- 298
world[match("Syria", world$country),12] <- 709
world[match("Taiwan", world$country),12] <- 48128
world[match("Venezuela", world$country),12] <- 9092
#Final review of the dataset
skim_without_charts(world)
Data summary
Name world
Number of rows 180
Number of columns 12
_______________________
Column type frequency:
character 4
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 34 0 180 0
country_code 0 1 3 3 0 180 0
region 0 1 4 8 0 5 0
subregion 0 1 9 31 0 17 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
doses_per_100 0 1.00 67.98 52.87 0.20 18.50 61.00 112.50 2.020000e+02
total_doses 0 1.00 33566984.45 177673772.22 31332.00 438282.75 3393275.00 13601571.00 2.190792e+09
pct_pop_vaccinated 2 0.99 38.04 27.22 0.10 12.00 37.50 64.00 9.400000e+01
pct_pop_fully_vaccinated 1 0.99 29.95 25.10 0.10 6.40 26.00 51.50 8.400000e+01
region_code 0 1.00 73.39 68.67 2.00 2.00 19.00 142.00 1.500000e+02
subregion_code 0 1.00 173.78 131.36 15.00 39.00 154.00 202.00 4.190000e+02
gdp_billion 0 1.00 417.22 1703.59 0.18 9.34 35.09 225.69 1.862447e+04
gdp_per_capita_2016 0 1.00 20021.40 21744.59 187.00 4190.69 12323.67 28768.61 1.235736e+05

2. Exploratory Data Analysis

2.1. Worldwide Map of Covid-19 Vaccinations (Doses Per 100 People)

world %>%
  plot_geo() %>%
  add_trace(locations = ~country_code,
            z = ~doses_per_100,
            hoverinfo = "text",
            text = ~paste("Country: ", country, "<br>",
                         "Doses per 100: ", doses_per_100)
            ) %>%
  layout(geo = list(scope = "world"),
         title = "Worldwide Map of Covid-19 Vaccinations<br>Doses Per 100 People") %>%
  colorbar(title = 'Doses per 100 people',
           outlinewidth = 0)

2.2. High GDP Per Capita vs. Doses Per 100 People

world %>%
  plot_ly(x = ~gdp_per_capita_2016, y = ~doses_per_100, color = ~region) %>%
  add_markers(hoverinfo = "text",
              text = ~paste("Country: ", country, "<br>",
                            "GDP per Capita: ", gdp_per_capita_2016, "<br>",
                            "Doses per 100: ", doses_per_100)) %>%
  layout(xaxis = list(title = "GDP per Capita 2016, Log Scale", type = "log", showgrid = FALSE),
         yaxis = list(title = "Doses per 100 People"),
         title = "GDP per Capita vs. Doses per 100<br>Click Region To Filter")

2.3. Top 15 Highest GDP and Percent of Population Fully Vaccinated

world %>%
  top_n(15, wt = gdp_billion) %>%
  plot_ly(x = ~pct_pop_fully_vaccinated, y = ~fct_reorder(country, gdp_billion)) %>%
  add_markers()

2.4. Percent of Population Vaccinated by Region

world %>%
  plot_ly(x = ~region, y = ~pct_pop_vaccinated) %>%
  add_boxplot() %>%
  layout(xaxis = list(title = "Region"),
         yaxis = list(title = "Percent of Population Vaccinated"),
         title = "Percent of Population Vaccinated By Regions")
## Warning: Ignoring 2 observations
world %>%
  plot_ly(x = ~region, y = ~pct_pop_fully_vaccinated) %>%
  add_boxplot() %>%
  layout(xaxis = list(title = "Region"),
         yaxis = list(title = "Percent of Population Fully Vaccinated"),
         title = "Percent of Population Fully Vaccinated By Regions")
## Warning: Ignoring 1 observations

3. Key Observations

  • Roll out of vaccinations (Doses Per 100 People) is better at countries with higher GDP per Capita than lower, understandably due to their strong finance.
  • European region outperforms the rest of the regions in the world.
  • Top 15 Highest GDP countries in the world seems to have very different vaccinations results. This might be interesting to look at further with insights into policies and number of cases. India also has low percent of populations vaccinated.

Sources

For all comments and inputs for further improvements (I do appreciate), please email me at