knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(gt)
library(ggplot2)
here::set_here()
## File .here already exists in /cloud/project

Data

https://drive.google.com/file/d/1tUl4HDFEF0UBBhDqu5kkIb8tHHluLIjq/view?usp=sharing

load the data

# option 1

ds <- read.csv("/cloud/project/IHME-GBD_2019_DATA-bf4c8eb1-1.csv")

# option 2

setwd("/cloud/project")
ds <- read.csv("IHME-GBD_2019_DATA-bf4c8eb1-1.csv")

# option 3
here::set_here()
## File .here already exists in /cloud/project
ds <- read.csv("IHME-GBD_2019_DATA-bf4c8eb1-1.csv")
dim(ds)
## [1] 100320     10
colnames(ds)
##  [1] "measure"  "location" "sex"      "age"      "cause"    "metric"  
##  [7] "year"     "val"      "upper"    "lower"
lapply(ds, function(x) head(unique(x)))
## $measure
## [1] "Deaths"    "Incidence"
## 
## $location
## [1] "Japan"          "United Kingdom" "India"          "South Africa"  
## [5] "Jordan"         "China"         
## 
## $sex
## [1] "Male"   "Female" "Both"  
## 
## $age
## [1] "All ages"
## 
## $cause
## [1] "Esophageal cancer"                   "Stomach cancer"                     
## [3] "Liver cancer"                        "Larynx cancer"                      
## [5] "Tracheal, bronchus, and lung cancer" "Breast cancer"                      
## 
## $metric
## [1] "Number"
## 
## $year
## [1] 1990 1991 1992 1993 1994 1995
## 
## $val
## [1]  6861.007  1510.983  8371.990 33409.444 19904.498 53313.942
## 
## $upper
## [1]  7016.740  1577.354  8571.852 33999.129 20607.839 54577.255
## 
## $lower
## [1]  6666.347  1387.008  8052.260 32301.334 18549.961 50875.215
lapply(ds, function(x)length(unique(x)))
## $measure
## [1] 2
## 
## $location
## [1] 20
## 
## $sex
## [1] 3
## 
## $age
## [1] 1
## 
## $cause
## [1] 29
## 
## $metric
## [1] 1
## 
## $year
## [1] 30
## 
## $val
## [1] 92874
## 
## $upper
## [1] 93371
## 
## $lower
## [1] 93421
dim(ds)
## [1] 100320     10

select

remove the age and metric columns from ds

ds<- ds %>% 
  select(-c(age, metric, upper, lower)) %>% 
  filter(cause!="Other neoplasms")
dim(ds)
## [1] 96720     6

filter to remove rows

Remove other neoplasms

ds<- ds %>% 
  filter(cause!="Other neoplasms")

filter to select rows

What is the incidence of bladder cancer in Jordan in 2019?

ds %>% 
  filter (measure=="Incidence", cause=="Bladder cancer", location=="Jordan", year=="2019", sex=="Both")
##     measure location  sex          cause year      val
## 1 Incidence   Jordan Both Bladder cancer 2019 532.2109

summarise

total cases of cancers in Jordan and Saudi Arabia in 2019

ds %>% 
  filter(year==2019) %>%   # filter year 2019 rows 
  filter(location %in% c("Jordan", "Saudi Arabia")) %>%    # filter countries
  filter (measure=="Incidence") %>%  # filter incidence
  filter(sex=="Both") %>% 
  summarise(`Total Cases`=round(sum(val), 0))
##   Total Cases
## 1       43352

group_by

summarise after you group_by

ds %>% 
  filter(year==2019, 
         location %in% c("Jordan", "Saudi Arabia"),
         measure=="Incidence",
         sex=="Both") %>%
  group_by(location) %>% 
  summarise(`Total Cases`=round(sum(val), 0))
## # A tibble: 2 × 2
##   location     `Total Cases`
##   <chr>                <dbl>
## 1 Jordan               11140
## 2 Saudi Arabia         32212

spread

make a table where the columns are the name of countries and all years listed with values representing the total number of Breast cancers diagnosed per year

ds %>% 
  filter(measure=="Incidence", cause=="Breast cancer", sex=="Female") %>% 
  select(location, year, val) %>% 
  spread(year, val) 
##                      location        1990        1991        1992        1993
## 1                     Algeria   1525.7519   1622.1262   1716.5353   1817.2102
## 2                   Australia   7931.5731   8367.1045   8909.5126   9210.5920
## 3                  Bangladesh   4951.7096   4935.0098   5392.4688   5687.6257
## 4                       China  81073.7356  85032.1136  87913.3039  92164.7656
## 5                       Egypt   2509.4541   2615.2064   2796.9892   3106.5191
## 6                      France  30446.5873  31506.1084  32797.4982  34629.5924
## 7                     Germany  46415.0222  48214.2326  49311.5868  52113.4287
## 8                       India  35840.3931  36526.9722  37748.2696  39518.2588
## 9                   Indonesia  16966.5541  17759.2837  18623.6539  19624.0006
## 10 Iran (Islamic Republic of)   2835.4077   3011.0468   3158.6196   3398.4233
## 11                      Italy  37781.8650  39297.3382  39902.9546  41511.2310
## 12                      Japan  31482.6549  33179.1843  34952.0560  36543.5685
## 13                     Jordan    301.3238    320.2912    326.9867    358.5764
## 14                   Pakistan  12284.3640  12993.7900  13778.5104  14417.2154
## 15          Republic of Korea   3444.0837   3740.3756   4041.3059   4403.1919
## 16         Russian Federation  36948.9201  37905.9669  40441.5046  46609.7232
## 17               Saudi Arabia    464.2322    529.6569    583.0284    633.3079
## 18               South Africa   3439.0717   3559.7136   3998.8298   3863.9696
## 19             United Kingdom  41845.9578  41976.5433  42410.2696  43859.2480
## 20   United States of America 189272.6436 193251.8467 196188.1655 205126.7657
##           1994        1995        1996        1997        1998        1999
## 1    1937.3892   2074.0496   2164.2197   2240.6064   2296.1959   2412.7308
## 2    9939.2126  10174.2807  10618.1775  11076.4779  11347.2250  11453.3102
## 3    5999.1374   6478.8497   6669.5602   6789.6298   6906.3096   7139.8991
## 4   96460.6982 101024.8600 107517.5722 113441.9560 120996.4554 128523.7040
## 5    3360.9177   3516.6427   3867.5903   4217.7359   4465.4927   4859.5757
## 6   35059.3068  36038.7312  37067.0666  38030.2037  40250.7229  41716.4558
## 7   53688.3572  55338.6404  56561.4758  56985.9113  57588.1526  58936.5961
## 8   41963.5789  43322.0345  46333.0281  50236.3255  52351.9529  53297.7811
## 9   20578.4390  21665.0407  22903.5602  24040.4505  25004.0261  26007.9706
## 10   3519.0765   3719.7062   3984.0928   4095.6831   4245.3411   4298.2523
## 11  42414.0214  43208.3563  44016.1676  44449.7620  46792.8226  46398.2797
## 12  37436.7994  39657.8153  40725.4782  42443.3247  45013.5123  46941.1357
## 13    376.9194    402.6131    436.2268    460.6137    492.8142    526.1204
## 14  15275.2778  16477.2841  17414.0011  18287.0057  19020.8253  19966.8079
## 15   4750.7302   5137.5221   5503.8358   5950.2163   6298.2460   6770.4769
## 16  49620.6439  47495.7097  45297.1867  43678.7117  43733.8591  47773.2250
## 17    700.1457    778.5358    866.3584    937.6745    991.4498   1054.5538
## 18   4202.9111   4024.0056   4190.5893   4916.2957   4968.6275   5006.9236
## 19  43597.0543  44635.4759  44521.7649  44686.5064  45469.2493  46155.5154
## 20 209672.6098 213553.5085 213163.4238 213756.5006 213177.6346 216427.0230
##           2000        2001        2002        2003        2004        2005
## 1    2531.3428   2667.1999   2821.0811   2973.5767   3174.4731   3416.1100
## 2   11913.4890  12029.7793  12552.1292  12555.9071  12757.9476  12625.1646
## 3    7599.6024   7792.9287   8015.7157   8173.2153   8344.8136   8812.0658
## 4  137035.0337 145905.5729 152847.7232 161175.5138 173193.3535 182560.2471
## 5    5206.4916   5847.4520   6210.9450   6559.9258   6956.4319   7294.0433
## 6   42117.7951  43696.2671  45764.1051  47574.7850  47542.2120  48220.2917
## 7   59391.1874  59644.4215  61213.9886  62671.4963  62960.5560  63767.3790
## 8   54772.5923  57729.4975  60343.1985  60927.8700  61542.4486  64860.5970
## 9   27145.1508  28252.7448  29198.5814  30236.5808  31388.8237  32683.2075
## 10   4740.9212   5305.9721   5512.1336   5686.4406   5920.8557   6369.8050
## 11  47475.8511  47942.7124  49069.5352  50717.1787  49259.4802  50035.5404
## 12  47795.4911  49128.3912  50534.3314  52974.1834  56438.1780  58893.8176
## 13    554.4103    583.4947    618.5677    660.3077    734.3254    810.8314
## 14  21283.7317  22506.8004  23476.1777  24321.2757  25202.5441  26208.4153
## 15   7313.3683   7813.4004   8555.9178   9520.0279  10191.2191  11079.6911
## 16  49744.1334  50954.3383  52950.9909  55534.1817  55911.8595  59900.2157
## 17   1143.5237   1227.7448   1343.6066   1430.0148   1513.3469   1661.9166
## 18   5361.8659   5352.7827   5500.6518   5696.2230   5831.8317   5945.6513
## 19  46534.5276  46206.2639  46979.3003  47881.2286  47031.3578  47565.1675
## 20 218583.2438 221919.6460 222378.9592 222564.0212 218546.9538 219934.7608
##          2006        2007        2008        2009       2010       2011
## 1    3618.736   3791.1218   3967.1429   4160.6597   4326.757   4528.539
## 2   12795.560  13275.5186  13409.4129  13844.6987  13732.233  13998.813
## 3    9136.932   9412.8158   9761.9913  10281.3571  10860.287  11247.933
## 4  191570.888 204084.4119 216872.8337 230054.8009 245765.252 256989.735
## 5    7545.288   7439.7957   7672.9140   8147.7491   8467.110   8389.922
## 6   48811.544  48752.5615  50370.2653  51269.8299  50765.571  50329.567
## 7   64121.282  65405.9705  67332.8709  68536.2946  68606.609  68783.316
## 8   68773.333  72917.1719  76040.3487  79296.0217  86192.460  91882.707
## 9   33923.566  34666.0564  36283.0162  37797.6121  39180.540  40941.770
## 10   6767.571   6911.4069   7226.6393   7591.1210   8034.972   8692.387
## 11  49708.070  50646.5998  51338.0612  52328.2719  51398.029  52294.552
## 12  60594.900  63069.2377  65902.5142  67434.8730  70561.400  74104.378
## 13    857.285    871.2951    893.1305    939.1301   1008.613   1096.760
## 14  27232.396  28266.0750  29606.8604  30943.0222  32463.028  33654.219
## 15  11416.622  12469.0294  13412.6705  14477.4668  15157.625  15656.080
## 16  56988.267  56475.0356  59674.5103  60262.3418  62995.023  60733.837
## 17   1833.918   2018.3432   2198.5960   2361.3236   2582.784   2793.639
## 18   6276.696   6388.1212   6715.2157   7019.9277   7313.335   7465.562
## 19  47824.922  48223.9053  49537.4494  48717.1070  49239.210  48906.473
## 20 219661.871 219363.0629 223048.0130 228176.7619 227444.260 231247.344
##          2012       2013       2014       2015       2016       2017       2018
## 1    4725.194   4914.861   5125.532   5335.265   5555.360   5843.860   6260.288
## 2   13791.200  14081.845  14517.411  14786.652  14693.163  14853.315  15323.172
## 3   11944.347  12419.702  12933.748  13703.596  14575.023  15516.318  16630.650
## 4  264211.087 273332.767 290076.049 298740.848 306806.793 330273.611 352363.533
## 5    8597.051   7949.721   8215.022   9013.433   9475.859   9817.397  10237.408
## 6   50037.092  49932.824  50044.737  51356.118  49541.295  49756.517  50243.866
## 7   67797.101  69563.863  67793.307  70029.804  70169.512  69803.544  69782.038
## 8   94587.568 102170.492 108552.589 119536.005 126015.995 130325.031 137308.308
## 9   42119.066  43479.180  44536.445  45057.387  46249.500  47707.109  49272.230
## 10   9349.762  10062.038  10860.105  11799.648  12578.292  13213.987  13992.891
## 11  52588.097  52237.199  51640.939  53952.828  52480.780  52373.982  52992.357
## 12  73417.688  74971.217  76506.136  75876.856  77467.624  75942.669  74941.416
## 13   1210.334   1321.047   1433.157   1524.855   1651.700   1775.375   1912.032
## 14  35246.121  37744.661  39821.695  41546.185  43667.270  45820.467  48088.579
## 15  16247.434  17142.538  17787.524  18223.313  18710.984  19158.015  19786.151
## 16  61206.046  62761.987  66399.283  67834.212  68056.073  66082.009  66725.593
## 17   3032.042   3303.634   3606.029   3893.231   4238.023   4580.742   4961.754
## 18   7684.523   7960.239   8396.893   8616.572   8640.123   8646.788   8461.427
## 19  49001.153  49329.994  49317.717  50372.669  51454.265  51644.638  52510.071
## 20 231694.418 232473.409 234690.137 237462.439 242567.086 242679.614 245906.180
##          2019
## 1    6621.358
## 2   15545.474
## 3   17661.279
## 4  368374.805
## 5   10599.894
## 6   50715.028
## 7   69661.660
## 8  144085.924
## 9   50628.321
## 10  14742.877
## 11  53095.405
## 12  74260.208
## 13   2053.257
## 14  50293.290
## 15  20251.092
## 16  67692.718
## 17   5330.214
## 18   8539.865
## 19  52730.451
## 20 251530.892

Exercise 1

make a table where the rows are the diseases and columns are the sexes with values representing the incidence of each disease in 2019

Get the GDP and population sizes of countries from gapminder package

Question: Can we combine the dataframes (ds and gapminder) using country names?

library(gapminder)
# gapminder
pop<- gapminder
unique(ds$location)[!unique(ds$location) %in% gapminder$country]
## [1] "Republic of Korea"          "United States of America"  
## [3] "Russian Federation"         "Iran (Islamic Republic of)"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:gt':
## 
##     html
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
a<- which(unique(ds$location) %nin% pop$country)
unique(ds$location)[a]
## [1] "Republic of Korea"          "United States of America"  
## [3] "Russian Federation"         "Iran (Islamic Republic of)"
a
## [1]  9 12 14 16
# [1] "Russian Federation"         "Republic of Korea"          "United States of America"  
# [4] "Iran (Islamic Republic of)"


cat("\n gapminder countries \n")
## 
##  gapminder countries
unique(gapminder$country)
##   [1] Afghanistan              Albania                  Algeria                 
##   [4] Angola                   Argentina                Australia               
##   [7] Austria                  Bahrain                  Bangladesh              
##  [10] Belgium                  Benin                    Bolivia                 
##  [13] Bosnia and Herzegovina   Botswana                 Brazil                  
##  [16] Bulgaria                 Burkina Faso             Burundi                 
##  [19] Cambodia                 Cameroon                 Canada                  
##  [22] Central African Republic Chad                     Chile                   
##  [25] China                    Colombia                 Comoros                 
##  [28] Congo, Dem. Rep.         Congo, Rep.              Costa Rica              
##  [31] Cote d'Ivoire            Croatia                  Cuba                    
##  [34] Czech Republic           Denmark                  Djibouti                
##  [37] Dominican Republic       Ecuador                  Egypt                   
##  [40] El Salvador              Equatorial Guinea        Eritrea                 
##  [43] Ethiopia                 Finland                  France                  
##  [46] Gabon                    Gambia                   Germany                 
##  [49] Ghana                    Greece                   Guatemala               
##  [52] Guinea                   Guinea-Bissau            Haiti                   
##  [55] Honduras                 Hong Kong, China         Hungary                 
##  [58] Iceland                  India                    Indonesia               
##  [61] Iran                     Iraq                     Ireland                 
##  [64] Israel                   Italy                    Jamaica                 
##  [67] Japan                    Jordan                   Kenya                   
##  [70] Korea, Dem. Rep.         Korea, Rep.              Kuwait                  
##  [73] Lebanon                  Lesotho                  Liberia                 
##  [76] Libya                    Madagascar               Malawi                  
##  [79] Malaysia                 Mali                     Mauritania              
##  [82] Mauritius                Mexico                   Mongolia                
##  [85] Montenegro               Morocco                  Mozambique              
##  [88] Myanmar                  Namibia                  Nepal                   
##  [91] Netherlands              New Zealand              Nicaragua               
##  [94] Niger                    Nigeria                  Norway                  
##  [97] Oman                     Pakistan                 Panama                  
## [100] Paraguay                 Peru                     Philippines             
## [103] Poland                   Portugal                 Puerto Rico             
## [106] Reunion                  Romania                  Rwanda                  
## [109] Sao Tome and Principe    Saudi Arabia             Senegal                 
## [112] Serbia                   Sierra Leone             Singapore               
## [115] Slovak Republic          Slovenia                 Somalia                 
## [118] South Africa             Spain                    Sri Lanka               
## [121] Sudan                    Swaziland                Sweden                  
## [124] Switzerland              Syria                    Taiwan                  
## [127] Tanzania                 Thailand                 Togo                    
## [130] Trinidad and Tobago      Tunisia                  Turkey                  
## [133] Uganda                   United Kingdom           United States           
## [136] Uruguay                  Venezuela                Vietnam                 
## [139] West Bank and Gaza       Yemen, Rep.              Zambia                  
## [142] Zimbabwe                
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
pop<- pop %>% 
  mutate(country=recode(country,
 "United States"= "United States of America" ,
 "Korea, Rep."= "Republic of Korea",
 "Iran"="Iran (Islamic Republic of)"))

pop<- pop %>% filter(year==2007) %>% 
  select(-year)

join

Join the ds and population dataframes by country name

left_ds_pop<- left_join(ds, pop, by=c("location"="country"))
right_ds_pop<- right_join(ds, pop, by=c("location"="country"))
full_ds_pop<- full_join(ds, pop, by=c("location"="country"))
inner_ds_pop<- inner_join(ds, pop, by=c("location"="country"))
inner_ds_pop->ds

#mutate

Calculate the MIR (mortality incidence ratio) for all per country in the year 2019

mir<- ds %>% filter(sex=="Both", year==2019) %>% 
  select(location, measure, gdpPercap, val, continent) %>% 
  group_by(location, measure, gdpPercap, continent) %>% 
  summarise(val=sum(val)) %>%   # colnames here will be those listed in group_by
  ungroup() %>%   # remember to ungroup
  spread(measure, val)%>% 
  mutate(MIR=Deaths/Incidence)
## `summarise()` has grouped output by 'location', 'measure', 'gdpPercap'. You can
## override using the `.groups` argument.
mir
## # A tibble: 19 × 6
##    location                   gdpPercap continent   Deaths Incidence   MIR
##    <chr>                          <dbl> <fct>        <dbl>     <dbl> <dbl>
##  1 Algeria                        6223. Africa      23609.    41763. 0.565
##  2 Australia                     34435. Oceania     52063.   130867. 0.398
##  3 Bangladesh                     1391. Asia       105425.   133983. 0.787
##  4 China                          4959. Asia      2696412.  4470225. 0.603
##  5 Egypt                          5581. Africa      57694.    90225. 0.639
##  6 France                        30470. Europe     193279.   390356. 0.495
##  7 Germany                       32170. Europe     269540.   566876. 0.475
##  8 India                          2452. Asia       926274.  1188929. 0.779
##  9 Indonesia                      3541. Asia       227290.   305810. 0.743
## 10 Iran (Islamic Republic of)    11606. Asia        66367.   117015. 0.567
## 11 Italy                         28570. Europe     188146.   423320. 0.444
## 12 Japan                         31656. Asia       436141.   868569. 0.502
## 13 Jordan                         4519. Asia         5605.    11140. 0.503
## 14 Pakistan                       2606. Asia       179113.   243350. 0.736
## 15 Republic of Korea             23348. Asia       102992.   216199. 0.476
## 16 Saudi Arabia                  21655. Asia        13025.    32212. 0.404
## 17 South Africa                   9270. Africa      54245.    69073. 0.785
## 18 United Kingdom                33203. Europe     192686.   391596. 0.492
## 19 United States of America      42952. Americas   758674.  1877695. 0.404
library(ggrepel)
library(ggplot2)

mir %>% ggplot(aes(x=gdpPercap, y=MIR))+
  geom_point(aes(col=continent), size=3, alpha=0.8)+
  theme_classic()+
  geom_smooth(alpha=0.1,method='lm', formula= y~x, linetype="dashed", se=F)+
  geom_text_repel(aes(label=location))+
  
  labs(x="GPD per Capita (US$)", y="Mortality-incidence ratio (MIR)", col="Continent", title="MIR ~ GPD", subtitle="Data obtained from GBD study results tool", caption="prepared on 24 Sep 2022")+
  theme(axis.text.x = element_text(size=10, angle=45, hjust=0.9, vjust=0.9), legend.text = element_text(size=12), legend.position = c(0.5,0.2))+
  ylim(0, 0.8)+
  guides(col=guide_legend(ncol=3))+
  scale_color_brewer(palette = "Set1")+
  scale_y_continuous(labels = scales::percent)
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

scales percent: https://thomasadventure.blog/posts/ggplot2-percentage-scale/