Table of Content

1. Prepare workspace and datasets

1.1. Import packages

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)
library(skimr)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

1.2. Load datasets

worldwide <- read_csv("Worldwide Vaccine Data.csv")

## Rows: 180 Columns: 5

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Country
## dbl (4): Doses administered per 100 people, Total doses administered, % of p...

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(worldwide)

## Rows: 180
## Columns: 5
## $ Country                             <chr> "Afghanistan", "Albania", "Algeria~
## $ `Doses administered per 100 people` <dbl> 8.2, 59.0, 23.0, 8.9, 111.0, 12.0,~
## $ `Total doses administered`          <dbl> 3133227, 1674093, 9989662, 2820134~
## $ `% of population vaccinated`        <dbl> 2.0, 32.0, 14.0, 5.8, 65.0, 8.0, 7~
## $ `% of population fully vaccinated`  <dbl> NA, 26.0, 9.7, 3.1, 46.0, 4.4, 70.~

## Rename variables
worldwide <- worldwide %>%
    rename(country = "Country",
          doses_per_100 = "Doses administered per 100 people",
          total_doses = "Total doses administered",
          pct_pop_vaccinated = "% of population vaccinated",
          pct_pop_fully_vaccinated = "% of population fully vaccinated")

skim_without_charts(worldwide)

Data summary
Name	worldwide
Number of rows	180
Number of columns	5
_______________________
Column type frequency:
character	1
numeric	4
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
country	0	1	4	32	0	180	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
doses_per_100	0	1.00	67.98	52.87	0.2	18.5	61.0	112.5	202
total_doses	0	1.00	33566984.45	177673772.22	31332.0	438282.8	3393275.0	13601571.0	2190792000
pct_pop_vaccinated	2	0.99	38.04	27.22	0.1	12.0	37.5	64.0	94
pct_pop_fully_vaccinated	1	0.99	29.95	25.10	0.1	6.4	26.0	51.5	84

## This dataset is used to extract country and region codes for plotly to run

code <- read_csv("country_code.csv")

## Rows: 249 Columns: 11

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): name, alpha-2, alpha-3, iso_3166-2, region, sub-region, intermediat...
## dbl (4): country-code, region-code, sub-region-code, intermediate-region-code

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(code)

## Rows: 249
## Columns: 11
## $ name                       <chr> "Afghanistan", "Åland Islands", "Albania", ~
## $ `alpha-2`                  <chr> "AF", "AX", "AL", "DZ", "AS", "AD", "AO", "~
## $ `alpha-3`                  <chr> "AFG", "ALA", "ALB", "DZA", "ASM", "AND", "~
## $ `country-code`             <dbl> 4, 248, 8, 12, 16, 20, 24, 660, 10, 28, 32,~
## $ `iso_3166-2`               <chr> "ISO 3166-2:AF", "ISO 3166-2:AX", "ISO 3166~
## $ region                     <chr> "Asia", "Europe", "Europe", "Africa", "Ocea~
## $ `sub-region`               <chr> "Southern Asia", "Northern Europe", "Southe~
## $ `intermediate-region`      <chr> NA, NA, NA, NA, NA, NA, "Middle Africa", "C~
## $ `region-code`              <dbl> 142, 150, 150, 2, 9, 150, 2, 19, NA, 19, 19~
## $ `sub-region-code`          <dbl> 34, 154, 39, 15, 61, 39, 202, 419, NA, 419,~
## $ `intermediate-region-code` <dbl> NA, NA, NA, NA, NA, NA, 17, 29, NA, 29, 5, ~

code <- code %>%
     rename(country = name,
          country_code = "alpha-3",
          subregion = "sub-region",
          region_code = "region-code",
          subregion_code = "sub-region-code") %>%
    select(country, country_code, region, region_code, subregion, subregion_code)

skim_without_charts(code)

Data summary
Name	code
Number of rows	249
Number of columns	6
_______________________
Column type frequency:
character	4
numeric	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
country	0	1	4	44	249
country_code	0	1	3	3	249
region	1	1	4	8	5
subregion	1	1	9	31	17

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
region_code	1	1	65.95	67.35	2	9.00	19	142	150
subregion_code	1	1	179.87	138.33	15	53.75	154	202	419

## This dataset is used to add GDP data to the original datasets

gdp <- read_csv("gdp_csv.csv")

## Rows: 11507 Columns: 4

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Country Name, Country Code
## dbl (2): Year, Value

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(gdp)

## Rows: 11,507
## Columns: 4
## $ `Country Name` <chr> "Arab World", "Arab World", "Arab World", "Arab World",~
## $ `Country Code` <chr> "ARB", "ARB", "ARB", "ARB", "ARB", "ARB", "ARB", "ARB",~
## $ Year           <dbl> 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1~
## $ Value          <dbl> 25760683041, 28434203615, 31385499664, 36426909888, 433~

gdp_2016 <- gdp %>%
    filter(Year == 2016) %>%
    rename(country = "Country Name",
          country_code = "Country Code") %>%
    mutate(gdp_billion = round((Value/1000000000),2)) %>%
    select(country, country_code, gdp_billion)

skim_without_charts(gdp_2016)

Data summary
Name	gdp_2016
Number of rows	236
Number of columns	3
_______________________
Column type frequency:
character	2
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
country	0	1	4	52	0	236	0
country_code	0	1	3	3	0	236	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
gdp_billion	0	1	2646.4	8606.93	0.03	10.43	56.31	653.43	75845.11

## This dataset is used to add GDP per Capita data to the original datasets

gdp_per_capita <- read_csv("gdp_per_capita.csv")

## Rows: 260 Columns: 32

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Country, Country Code
## dbl (29): 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ...
## lgl  (1): 2019

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(gdp_per_capita)

## Rows: 260
## Columns: 32
## $ Country        <chr> "Aruba", "Afghanistan", "Angola", "Albania", "Arab Worl~
## $ `Country Code` <chr> "ABW", "AFG", "AGO", "ALB", "ARB", "ARE", "ARG", "ARM",~
## $ `1990`         <dbl> 24101.1094, NA, 3089.6834, 2549.4730, 6808.2070, 72906.~
## $ `1991`         <dbl> 25870.7559, NA, 3120.3561, 1909.1140, 6872.2732, 71753.~
## $ `1992`         <dbl> 26533.3439, NA, 2908.1608, 1823.3077, 7255.3284, 71567.~
## $ `1993`         <dbl> 27430.7524, NA, 2190.7682, 2057.4497, 7458.6471, 70082.~
## $ `1994`         <dbl> 28656.5202, NA, 2195.5323, 2289.8731, 7645.6829, 72471.~
## $ `1995`         <dbl> 28648.9900, NA, 2496.1995, 2665.7649, 7774.2074, 74994.~
## $ `1996`         <dbl> 28499.0894, NA, 2794.8969, 2980.0663, 8094.1498, 76848.~
## $ `1997`         <dbl> 30215.9492, NA, 2953.3427, 2717.3621, 8397.5157, 80390.~
## $ `1998`         <dbl> 30512.6839, NA, 3027.3418, 3021.0147, 8797.6626, 77421.~
## $ `1999`         <dbl> 30728.0545, NA, 3037.7212, 3471.6526, 8938.4515, 76654.~
## $ `2000`         <dbl> 33120.0542, NA, 3097.3073, 3861.3342, 9415.6326, 82215.~
## $ `2001`         <dbl> 32117.9123, NA, 3191.2663, 4301.3528, 9584.1083, 80843.~
## $ `2002`         <dbl> 30862.2227, 839.4859, 3564.0960, 4661.3716, 9581.7971, ~
## $ `2003`         <dbl> 31387.2830, 888.1534, 3614.6073, 4994.5188, 9974.6419, ~
## $ `2004`         <dbl> 34176.4646, 885.8408, 3978.6972, 5422.7785, 10937.3161,~
## $ `2005`         <dbl> 35207.5772, 979.2740, 4555.1858, 5865.3062, 11646.4861,~
## $ `2006`         <dbl> 36362.219, 1031.643, 5048.876, 6559.783, 12442.188, 799~
## $ `2007`         <dbl> 37865.4935, 1176.1264, 5697.2513, 7276.3030, 13041.9255~
## $ `2008`         <dbl> 38515.2638, 1218.1182, 6221.4234, 8228.3742, 13739.7278~
## $ `2009`         <dbl> 34693.0868, 1454.6630, 6092.7832, 8814.8109, 13640.8468~
## $ `2010`         <dbl> 33732.8475, 1637.3780, 6230.2970, 9628.0258, 14127.7780~
## $ `2011`         <dbl> 35492.6185, 1626.7648, 6346.3951, 10207.7524, 14518.827~
## $ `2012`         <dbl> 35498.9821, 1806.7639, 6772.5283, 10526.2355, 15423.465~
## $ `2013`         <dbl> 37419.8928, 1874.7656, 6980.4230, 10571.0107, 15824.780~
## $ `2014`         <dbl> 38223.372, 1897.526, 7199.245, 11259.226, 16153.245, 66~
## $ `2015`         <dbl> 38249.0549, 1886.6930, 7096.6006, 11662.0305, 16501.792~
## $ `2016`         <dbl> 38390.2717, 1896.9925, 6756.9351, 11868.1790, 16935.383~
## $ `2017`         <dbl> 39454.6298, 1934.6368, 6650.5849, 12930.1400, 17099.889~
## $ `2018`         <dbl> NA, 1955.0062, 6452.3552, 13364.1554, 17570.1376, 75075~
## $ `2019`         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~

capita_2016 <- gdp_per_capita %>%
    rename(country = "Country",
          country_code = "Country Code",
          gdp_per_capita_2016 = "2016") %>%
    select(country, country_code, gdp_per_capita_2016)

skim_without_charts(capita_2016)

Data summary
Name	capita_2016
Number of rows	260
Number of columns	3
_______________________
Column type frequency:
character	2
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
country	0	1	4	52	0	260	0
country_code	0	1	3	3	0	260	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
gdp_per_capita_2016	22	0.92	19998.61	20605.85	743.9	4969.3	13643.22	27594.41	123573.6

1.3. Join datasets

check_country_code <- left_join(worldwide, code, by = "country")
check_country_code <- check_country_code %>%
  select(country, country_code)
skim_without_charts(check_country_code)

Data summary
Name	check_country_code
Number of rows	180
Number of columns	2
_______________________
Column type frequency:
character	2
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
country	0	1.00	4	32	0	180	0
country_code	14	0.92	3	3	0	166	0

We are missing 14 country codes, because there are variants of country names in the two data sets.

missing_cc <- check_country_code %>%
  filter(is.na(country_code))

missing_cc

## # A tibble: 14 x 2
##    country                country_code
##    <chr>                  <chr>       
##  1 Bosnia and Herzegovina <NA>        
##  2 Brunei                 <NA>        
##  3 Cape Verde             <NA>        
##  4 Dominican Rep.         <NA>        
##  5 Guinea-Bissau          <NA>        
##  6 Ivory Coast            <NA>        
##  7 Macau                  <NA>        
##  8 Mainland China         <NA>        
##  9 North Macedonia        <NA>        
## 10 Republic of the Congo  <NA>        
## 11 São Tomé and Príncipe  <NA>        
## 12 U.A.E.                 <NA>        
## 13 U.K.                   <NA>        
## 14 West Bank & Gaza       <NA>

## Update "country" values in worldwide dataset.

worldwide[match("Bosnia and Herzegovina", worldwide$country),1] <- "Bosnia And Herzegovina"
worldwide[match("Brunei", worldwide$country),1] <- "Brunei Darussalam"
worldwide[match("Cape Verde", worldwide$country),1] <- "Cabo Verde"
worldwide[match("Dominican Rep.", worldwide$country),1] <- "Dominican Republic"
worldwide[match("Guinea-Bissau", worldwide$country),1] <- "Guinea Bissau"
worldwide[match("Ivory Coast", worldwide$country),1] <- "Côte D'Ivoire"
worldwide[match("Macau", worldwide$country),1] <- "Macao"
worldwide[match("Mainland China", worldwide$country),1] <- "China"
worldwide[match("North Macedonia", worldwide$country),1] <- "Macedonia"
worldwide[match("Republic of the Congo", worldwide$country),1] <- "Congo (Democratic Republic Of The)"
worldwide[match("São Tomé and Príncipe", worldwide$country),1] <- "Sao Tome and Principe"
worldwide[match("U.A.E.", worldwide$country),1] <- "United Arab Emirates"
worldwide[match("U.K.", worldwide$country),1] <- "United Kingdom"
worldwide[match("West Bank & Gaza", worldwide$country),1] <- "Palestine, State of"
worldwide[match("South Korea", worldwide$country),1] <- "Korea, Republic of"

world <- left_join(worldwide, code, by = "country")
skim_without_charts(world)

Data summary
Name	world
Number of rows	180
Number of columns	10
_______________________
Column type frequency:
character	4
numeric	6
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
country	1	4	34	180
country_code	1	3	3	180
region	1	4	8	5
subregion	1	9	31	17

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
doses_per_100	0	1.00	67.98	52.87	0.2	18.5	61.0	112.5	202
total_doses	0	1.00	33566984.45	177673772.22	31332.0	438282.8	3393275.0	13601571.0	2190792000
pct_pop_vaccinated	2	0.99	38.04	27.22	0.1	12.0	37.5	64.0	94
pct_pop_fully_vaccinated	1	0.99	29.95	25.10	0.1	6.4	26.0	51.5	84
region_code	0	1.00	73.39	68.67	2.0	2.0	19.0	142.0	150
subregion_code	0	1.00	173.78	131.36	15.0	39.0	154.0	202.0	419

gdp_2016 <- gdp_2016 %>% select(country_code, gdp_billion)
world <- left_join(world, gdp_2016, by = "country_code")
skim_without_charts(world)

Data summary
Name	world
Number of rows	180
Number of columns	11
_______________________
Column type frequency:
character	4
numeric	7
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
country	1	4	34	180
country_code	1	3	3	180
region	1	4	8	5
subregion	1	9	31	17

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
doses_per_100	0	1.00	67.98	52.87	0.20	18.50	61.0	112.5	2.020000e+02
total_doses	0	1.00	33566984.45	177673772.22	31332.00	438282.75	3393275.0	13601571.0	2.190792e+09
pct_pop_vaccinated	2	0.99	38.04	27.22	0.10	12.00	37.5	64.0	9.400000e+01
pct_pop_fully_vaccinated	1	0.99	29.95	25.10	0.10	6.40	26.0	51.5	8.400000e+01
region_code	0	1.00	73.39	68.67	2.00	2.00	19.0	142.0	1.500000e+02
subregion_code	0	1.00	173.78	131.36	15.00	39.00	154.0	202.0	4.190000e+02
gdp_billion	11	0.94	438.62	1755.83	0.18	10.68	38.3	238.5	1.862447e+04

missing_gdp <- world %>% filter(is.na(gdp_billion))
missing_gdp

## # A tibble: 11 x 11
##    country          doses_per_100 total_doses pct_pop_vaccinat~ pct_pop_fully_v~
##    <chr>                    <dbl>       <dbl>             <dbl>            <dbl>
##  1 Aruba                    146        155285              76               70  
##  2 Cuba                     168      19073986              76               42  
##  3 Curaçao                  118        186219              62               56  
##  4 Djibouti                   6.9       67229               4.2              2.7
##  5 French Polynesia         101        282180              54               47  
##  6 Libya                     22       1501622              20                2.5
##  7 New Caledonia             72        206784              44               28  
##  8 South Sudan                0.9      100621               0.7              0.3
##  9 Syria                      3.1      533949               1.7              1.5
## 10 Taiwan                    58      13856466              50                8.2
## 11 Venezuela                 39      11094206              24               15  
## # ... with 6 more variables: country_code <chr>, region <chr>,
## #   region_code <dbl>, subregion <chr>, subregion_code <dbl>, gdp_billion <dbl>

## Update GDP values in worldwide dataset by Google

world[match("Aruba", world$country),11] <- 2.96
world[match("Cuba", world$country),11] <- 91.37
world[match("Curaçao", world$country),11] <- 3.12
world[match("Djibouti", world$country),11] <- 2.60
world[match("French Polynesia", world$country),11] <- 5.49
world[match("Libya", world$country),11] <- 26.2
world[match("New Caledonia", world$country),11] <- 2.68
world[match("South Sudan", world$country),11] <- 3.50
world[match("Syria", world$country),11] <- 12.37
world[match("Taiwan", world$country),11] <- 543.08
world[match("Venezuela", world$country),11] <- 279.25

capita_2016 <- capita_2016 %>% select(country_code, gdp_per_capita_2016)
world <- left_join(world, capita_2016, by = "country_code")
skim_without_charts(world)

Data summary
Name	world
Number of rows	180
Number of columns	12
_______________________
Column type frequency:
character	4
numeric	8
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
country	1	4	34	180
country_code	1	3	3	180
region	1	4	8	5
subregion	1	9	31	17

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
doses_per_100	0	1.00	67.98	52.87	0.20	18.50	61.00	112.50	2.020000e+02
total_doses	0	1.00	33566984.45	177673772.22	31332.00	438282.75	3393275.00	13601571.00	2.190792e+09
pct_pop_vaccinated	2	0.99	38.04	27.22	0.10	12.00	37.50	64.00	9.400000e+01
pct_pop_fully_vaccinated	1	0.99	29.95	25.10	0.10	6.40	26.00	51.50	8.400000e+01
region_code	0	1.00	73.39	68.67	2.00	2.00	19.00	142.00	1.500000e+02
subregion_code	0	1.00	173.78	131.36	15.00	39.00	154.00	202.00	4.190000e+02
gdp_billion	0	1.00	417.22	1703.59	0.18	9.34	35.09	225.69	1.862447e+04
gdp_per_capita_2016	9	0.95	20350.56	21954.26	780.91	4693.49	12693.56	29290.47	1.235736e+05

missing_capita <- world %>% filter(is.na(gdp_per_capita_2016))
missing_capita

## # A tibble: 9 x 12
##   country          doses_per_100 total_doses pct_pop_vaccinated pct_pop_fully_v~
##   <chr>                    <dbl>       <dbl>              <dbl>            <dbl>
## 1 Cuba                     168      19073986               76               42  
## 2 Djibouti                   6.9       67229                4.2              2.7
## 3 French Polynesia         101        282180               54               47  
## 4 New Caledonia             72        206784               44               28  
## 5 Somalia                    2.8      430762                1.6              1.2
## 6 South Sudan                0.9      100621                0.7              0.3
## 7 Syria                      3.1      533949                1.7              1.5
## 8 Taiwan                    58      13856466               50                8.2
## 9 Venezuela                 39      11094206               24               15  
## # ... with 7 more variables: country_code <chr>, region <chr>,
## #   region_code <dbl>, subregion <chr>, subregion_code <dbl>,
## #   gdp_billion <dbl>, gdp_per_capita_2016 <dbl>

## Update GDP per Capita values in worldwide dataset by Google

world[match("Cuba", world$country),12] <- 8060
world[match("Djibouti", world$country),12] <- 2602 
world[match("Somalia", world$country),12] <- 187
world[match("French Polynesia", world$country),12] <- 22000
world[match("New Caledonia", world$country),12] <- 32831
world[match("South Sudan", world$country),12] <- 298
world[match("Syria", world$country),12] <- 709
world[match("Taiwan", world$country),12] <- 48128
world[match("Venezuela", world$country),12] <- 9092

#Final review of the dataset
skim_without_charts(world)

Data summary
Name	world
Number of rows	180
Number of columns	12
_______________________
Column type frequency:
character	4
numeric	8
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
country	1	4	34	180
country_code	1	3	3	180
region	1	4	8	5
subregion	1	9	31	17

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100
doses_per_100	0	1.00	67.98	52.87	0.20	18.50	61.00	112.50	2.020000e+02
total_doses	0	1.00	33566984.45	177673772.22	31332.00	438282.75	3393275.00	13601571.00	2.190792e+09
pct_pop_vaccinated	2	0.99	38.04	27.22	0.10	12.00	37.50	64.00	9.400000e+01
pct_pop_fully_vaccinated	1	0.99	29.95	25.10	0.10	6.40	26.00	51.50	8.400000e+01
region_code	0	1.00	73.39	68.67	2.00	2.00	19.00	142.00	1.500000e+02
subregion_code	0	1.00	173.78	131.36	15.00	39.00	154.00	202.00	4.190000e+02
gdp_billion	0	1.00	417.22	1703.59	0.18	9.34	35.09	225.69	1.862447e+04
gdp_per_capita_2016	0	1.00	20021.40	21744.59	187.00	4190.69	12323.67	28768.61	1.235736e+05

2. Exploratory Data Analysis

2.1. Worldwide Map of Covid-19 Vaccinations (Doses Per 100 People)

world %>%
  plot_geo() %>%
  add_trace(locations = ~country_code,
            z = ~doses_per_100,
            hoverinfo = "text",
            text = ~paste("Country: ", country, "<br>",
                         "Doses per 100: ", doses_per_100)
            ) %>%
  layout(geo = list(scope = "world"),
         title = "Worldwide Map of Covid-19 Vaccinations<br>Doses Per 100 People") %>%
  colorbar(title = 'Doses per 100 people',
           outlinewidth = 0)

2.2. High GDP Per Capita vs. Doses Per 100 People

world %>%
  plot_ly(x = ~gdp_per_capita_2016, y = ~doses_per_100, color = ~region) %>%
  add_markers(hoverinfo = "text",
              text = ~paste("Country: ", country, "<br>",
                            "GDP per Capita: ", gdp_per_capita_2016, "<br>",
                            "Doses per 100: ", doses_per_100)) %>%
  layout(xaxis = list(title = "GDP per Capita 2016, Log Scale", type = "log", showgrid = FALSE),
         yaxis = list(title = "Doses per 100 People"),
         title = "GDP per Capita vs. Doses per 100<br>Click Region To Filter")

2.3. Top 15 Highest GDP and Percent of Population Fully Vaccinated

world %>%
  top_n(15, wt = gdp_billion) %>%
  plot_ly(x = ~pct_pop_fully_vaccinated, y = ~fct_reorder(country, gdp_billion)) %>%
  add_markers()

2.4. Percent of Population Vaccinated by Region

world %>%
  plot_ly(x = ~region, y = ~pct_pop_vaccinated) %>%
  add_boxplot() %>%
  layout(xaxis = list(title = "Region"),
         yaxis = list(title = "Percent of Population Vaccinated"),
         title = "Percent of Population Vaccinated By Regions")

## Warning: Ignoring 2 observations

world %>%
  plot_ly(x = ~region, y = ~pct_pop_fully_vaccinated) %>%
  add_boxplot() %>%
  layout(xaxis = list(title = "Region"),
         yaxis = list(title = "Percent of Population Fully Vaccinated"),
         title = "Percent of Population Fully Vaccinated By Regions")

## Warning: Ignoring 1 observations

3. Key Observations

Roll out of vaccinations (Doses Per 100 People) is better at countries with higher GDP per Capita than lower, understandably due to their strong finance.
European region outperforms the rest of the regions in the world.
Top 15 Highest GDP countries in the world seems to have very different vaccinations results. This might be interesting to look at further with insights into policies and number of cases. India also has low percent of populations vaccinated.

Sources

Worldwide Vaccine Dataset by Anandhu H: Dowload from Kaggle
Country Mapping - ISO, Continent, Region by Andrada Olteanu: Download from Kaggle
Country Regional and World GDP by Bojan Tunguz: Download from Kaggle
GDP per capita all countries by Nitisha: Download from Kaggle

For all comments and inputs for further improvements (I do appreciate), please email me at dannydo1910@gmail.com

EDA Worldwide Covid-19 Vaccination

Danh Do

28-Sep-21

Table of Content

1. Prepare workspace and datasets

1.1. Import packages

1.2. Load datasets

1.3. Join datasets

2. Exploratory Data Analysis

2.1. Worldwide Map of Covid-19 Vaccinations (Doses Per 100 People)

2.2. High GDP Per Capita vs. Doses Per 100 People

2.3. Top 15 Highest GDP and Percent of Population Fully Vaccinated

2.4. Percent of Population Vaccinated by Region

3. Key Observations

Sources