knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(gt)
library(ggplot2)
here::set_here()
## File .here already exists in /cloud/project
# option 1
ds <- read.csv("/cloud/project/IHME-GBD_2019_DATA-bf4c8eb1-1.csv")
# option 2
setwd("/cloud/project")
ds <- read.csv("IHME-GBD_2019_DATA-bf4c8eb1-1.csv")
# option 3
here::set_here()
## File .here already exists in /cloud/project
ds <- read.csv("IHME-GBD_2019_DATA-bf4c8eb1-1.csv")
dim(ds)
## [1] 100320 10
colnames(ds)
## [1] "measure" "location" "sex" "age" "cause" "metric"
## [7] "year" "val" "upper" "lower"
lapply(ds, function(x) head(unique(x)))
## $measure
## [1] "Deaths" "Incidence"
##
## $location
## [1] "Japan" "United Kingdom" "India" "South Africa"
## [5] "Jordan" "China"
##
## $sex
## [1] "Male" "Female" "Both"
##
## $age
## [1] "All ages"
##
## $cause
## [1] "Esophageal cancer" "Stomach cancer"
## [3] "Liver cancer" "Larynx cancer"
## [5] "Tracheal, bronchus, and lung cancer" "Breast cancer"
##
## $metric
## [1] "Number"
##
## $year
## [1] 1990 1991 1992 1993 1994 1995
##
## $val
## [1] 6861.007 1510.983 8371.990 33409.444 19904.498 53313.942
##
## $upper
## [1] 7016.740 1577.354 8571.852 33999.129 20607.839 54577.255
##
## $lower
## [1] 6666.347 1387.008 8052.260 32301.334 18549.961 50875.215
lapply(ds, function(x)length(unique(x)))
## $measure
## [1] 2
##
## $location
## [1] 20
##
## $sex
## [1] 3
##
## $age
## [1] 1
##
## $cause
## [1] 29
##
## $metric
## [1] 1
##
## $year
## [1] 30
##
## $val
## [1] 92874
##
## $upper
## [1] 93371
##
## $lower
## [1] 93421
dim(ds)
## [1] 100320 10
remove the age and metric columns from ds
ds<- ds %>%
select(-c(age, metric, upper, lower)) %>%
filter(cause!="Other neoplasms")
dim(ds)
## [1] 96720 6
Remove other neoplasms
ds<- ds %>%
filter(cause!="Other neoplasms")
What is the incidence of bladder cancer in Jordan in 2019?
ds %>%
filter (measure=="Incidence", cause=="Bladder cancer", location=="Jordan", year=="2019", sex=="Both")
## measure location sex cause year val
## 1 Incidence Jordan Both Bladder cancer 2019 532.2109
total cases of cancers in Jordan and Saudi Arabia in 2019
ds %>%
filter(year==2019) %>% # filter year 2019 rows
filter(location %in% c("Jordan", "Saudi Arabia")) %>% # filter countries
filter (measure=="Incidence") %>% # filter incidence
filter(sex=="Both") %>%
summarise(`Total Cases`=round(sum(val), 0))
## Total Cases
## 1 43352
summarise after you group_by
ds %>%
filter(year==2019,
location %in% c("Jordan", "Saudi Arabia"),
measure=="Incidence",
sex=="Both") %>%
group_by(location) %>%
summarise(`Total Cases`=round(sum(val), 0))
## # A tibble: 2 × 2
## location `Total Cases`
## <chr> <dbl>
## 1 Jordan 11140
## 2 Saudi Arabia 32212
make a table where the columns are the name of countries and all years listed with values representing the total number of Breast cancers diagnosed per year
ds %>%
filter(measure=="Incidence", cause=="Breast cancer", sex=="Female") %>%
select(location, year, val) %>%
spread(year, val)
## location 1990 1991 1992 1993
## 1 Algeria 1525.7519 1622.1262 1716.5353 1817.2102
## 2 Australia 7931.5731 8367.1045 8909.5126 9210.5920
## 3 Bangladesh 4951.7096 4935.0098 5392.4688 5687.6257
## 4 China 81073.7356 85032.1136 87913.3039 92164.7656
## 5 Egypt 2509.4541 2615.2064 2796.9892 3106.5191
## 6 France 30446.5873 31506.1084 32797.4982 34629.5924
## 7 Germany 46415.0222 48214.2326 49311.5868 52113.4287
## 8 India 35840.3931 36526.9722 37748.2696 39518.2588
## 9 Indonesia 16966.5541 17759.2837 18623.6539 19624.0006
## 10 Iran (Islamic Republic of) 2835.4077 3011.0468 3158.6196 3398.4233
## 11 Italy 37781.8650 39297.3382 39902.9546 41511.2310
## 12 Japan 31482.6549 33179.1843 34952.0560 36543.5685
## 13 Jordan 301.3238 320.2912 326.9867 358.5764
## 14 Pakistan 12284.3640 12993.7900 13778.5104 14417.2154
## 15 Republic of Korea 3444.0837 3740.3756 4041.3059 4403.1919
## 16 Russian Federation 36948.9201 37905.9669 40441.5046 46609.7232
## 17 Saudi Arabia 464.2322 529.6569 583.0284 633.3079
## 18 South Africa 3439.0717 3559.7136 3998.8298 3863.9696
## 19 United Kingdom 41845.9578 41976.5433 42410.2696 43859.2480
## 20 United States of America 189272.6436 193251.8467 196188.1655 205126.7657
## 1994 1995 1996 1997 1998 1999
## 1 1937.3892 2074.0496 2164.2197 2240.6064 2296.1959 2412.7308
## 2 9939.2126 10174.2807 10618.1775 11076.4779 11347.2250 11453.3102
## 3 5999.1374 6478.8497 6669.5602 6789.6298 6906.3096 7139.8991
## 4 96460.6982 101024.8600 107517.5722 113441.9560 120996.4554 128523.7040
## 5 3360.9177 3516.6427 3867.5903 4217.7359 4465.4927 4859.5757
## 6 35059.3068 36038.7312 37067.0666 38030.2037 40250.7229 41716.4558
## 7 53688.3572 55338.6404 56561.4758 56985.9113 57588.1526 58936.5961
## 8 41963.5789 43322.0345 46333.0281 50236.3255 52351.9529 53297.7811
## 9 20578.4390 21665.0407 22903.5602 24040.4505 25004.0261 26007.9706
## 10 3519.0765 3719.7062 3984.0928 4095.6831 4245.3411 4298.2523
## 11 42414.0214 43208.3563 44016.1676 44449.7620 46792.8226 46398.2797
## 12 37436.7994 39657.8153 40725.4782 42443.3247 45013.5123 46941.1357
## 13 376.9194 402.6131 436.2268 460.6137 492.8142 526.1204
## 14 15275.2778 16477.2841 17414.0011 18287.0057 19020.8253 19966.8079
## 15 4750.7302 5137.5221 5503.8358 5950.2163 6298.2460 6770.4769
## 16 49620.6439 47495.7097 45297.1867 43678.7117 43733.8591 47773.2250
## 17 700.1457 778.5358 866.3584 937.6745 991.4498 1054.5538
## 18 4202.9111 4024.0056 4190.5893 4916.2957 4968.6275 5006.9236
## 19 43597.0543 44635.4759 44521.7649 44686.5064 45469.2493 46155.5154
## 20 209672.6098 213553.5085 213163.4238 213756.5006 213177.6346 216427.0230
## 2000 2001 2002 2003 2004 2005
## 1 2531.3428 2667.1999 2821.0811 2973.5767 3174.4731 3416.1100
## 2 11913.4890 12029.7793 12552.1292 12555.9071 12757.9476 12625.1646
## 3 7599.6024 7792.9287 8015.7157 8173.2153 8344.8136 8812.0658
## 4 137035.0337 145905.5729 152847.7232 161175.5138 173193.3535 182560.2471
## 5 5206.4916 5847.4520 6210.9450 6559.9258 6956.4319 7294.0433
## 6 42117.7951 43696.2671 45764.1051 47574.7850 47542.2120 48220.2917
## 7 59391.1874 59644.4215 61213.9886 62671.4963 62960.5560 63767.3790
## 8 54772.5923 57729.4975 60343.1985 60927.8700 61542.4486 64860.5970
## 9 27145.1508 28252.7448 29198.5814 30236.5808 31388.8237 32683.2075
## 10 4740.9212 5305.9721 5512.1336 5686.4406 5920.8557 6369.8050
## 11 47475.8511 47942.7124 49069.5352 50717.1787 49259.4802 50035.5404
## 12 47795.4911 49128.3912 50534.3314 52974.1834 56438.1780 58893.8176
## 13 554.4103 583.4947 618.5677 660.3077 734.3254 810.8314
## 14 21283.7317 22506.8004 23476.1777 24321.2757 25202.5441 26208.4153
## 15 7313.3683 7813.4004 8555.9178 9520.0279 10191.2191 11079.6911
## 16 49744.1334 50954.3383 52950.9909 55534.1817 55911.8595 59900.2157
## 17 1143.5237 1227.7448 1343.6066 1430.0148 1513.3469 1661.9166
## 18 5361.8659 5352.7827 5500.6518 5696.2230 5831.8317 5945.6513
## 19 46534.5276 46206.2639 46979.3003 47881.2286 47031.3578 47565.1675
## 20 218583.2438 221919.6460 222378.9592 222564.0212 218546.9538 219934.7608
## 2006 2007 2008 2009 2010 2011
## 1 3618.736 3791.1218 3967.1429 4160.6597 4326.757 4528.539
## 2 12795.560 13275.5186 13409.4129 13844.6987 13732.233 13998.813
## 3 9136.932 9412.8158 9761.9913 10281.3571 10860.287 11247.933
## 4 191570.888 204084.4119 216872.8337 230054.8009 245765.252 256989.735
## 5 7545.288 7439.7957 7672.9140 8147.7491 8467.110 8389.922
## 6 48811.544 48752.5615 50370.2653 51269.8299 50765.571 50329.567
## 7 64121.282 65405.9705 67332.8709 68536.2946 68606.609 68783.316
## 8 68773.333 72917.1719 76040.3487 79296.0217 86192.460 91882.707
## 9 33923.566 34666.0564 36283.0162 37797.6121 39180.540 40941.770
## 10 6767.571 6911.4069 7226.6393 7591.1210 8034.972 8692.387
## 11 49708.070 50646.5998 51338.0612 52328.2719 51398.029 52294.552
## 12 60594.900 63069.2377 65902.5142 67434.8730 70561.400 74104.378
## 13 857.285 871.2951 893.1305 939.1301 1008.613 1096.760
## 14 27232.396 28266.0750 29606.8604 30943.0222 32463.028 33654.219
## 15 11416.622 12469.0294 13412.6705 14477.4668 15157.625 15656.080
## 16 56988.267 56475.0356 59674.5103 60262.3418 62995.023 60733.837
## 17 1833.918 2018.3432 2198.5960 2361.3236 2582.784 2793.639
## 18 6276.696 6388.1212 6715.2157 7019.9277 7313.335 7465.562
## 19 47824.922 48223.9053 49537.4494 48717.1070 49239.210 48906.473
## 20 219661.871 219363.0629 223048.0130 228176.7619 227444.260 231247.344
## 2012 2013 2014 2015 2016 2017 2018
## 1 4725.194 4914.861 5125.532 5335.265 5555.360 5843.860 6260.288
## 2 13791.200 14081.845 14517.411 14786.652 14693.163 14853.315 15323.172
## 3 11944.347 12419.702 12933.748 13703.596 14575.023 15516.318 16630.650
## 4 264211.087 273332.767 290076.049 298740.848 306806.793 330273.611 352363.533
## 5 8597.051 7949.721 8215.022 9013.433 9475.859 9817.397 10237.408
## 6 50037.092 49932.824 50044.737 51356.118 49541.295 49756.517 50243.866
## 7 67797.101 69563.863 67793.307 70029.804 70169.512 69803.544 69782.038
## 8 94587.568 102170.492 108552.589 119536.005 126015.995 130325.031 137308.308
## 9 42119.066 43479.180 44536.445 45057.387 46249.500 47707.109 49272.230
## 10 9349.762 10062.038 10860.105 11799.648 12578.292 13213.987 13992.891
## 11 52588.097 52237.199 51640.939 53952.828 52480.780 52373.982 52992.357
## 12 73417.688 74971.217 76506.136 75876.856 77467.624 75942.669 74941.416
## 13 1210.334 1321.047 1433.157 1524.855 1651.700 1775.375 1912.032
## 14 35246.121 37744.661 39821.695 41546.185 43667.270 45820.467 48088.579
## 15 16247.434 17142.538 17787.524 18223.313 18710.984 19158.015 19786.151
## 16 61206.046 62761.987 66399.283 67834.212 68056.073 66082.009 66725.593
## 17 3032.042 3303.634 3606.029 3893.231 4238.023 4580.742 4961.754
## 18 7684.523 7960.239 8396.893 8616.572 8640.123 8646.788 8461.427
## 19 49001.153 49329.994 49317.717 50372.669 51454.265 51644.638 52510.071
## 20 231694.418 232473.409 234690.137 237462.439 242567.086 242679.614 245906.180
## 2019
## 1 6621.358
## 2 15545.474
## 3 17661.279
## 4 368374.805
## 5 10599.894
## 6 50715.028
## 7 69661.660
## 8 144085.924
## 9 50628.321
## 10 14742.877
## 11 53095.405
## 12 74260.208
## 13 2053.257
## 14 50293.290
## 15 20251.092
## 16 67692.718
## 17 5330.214
## 18 8539.865
## 19 52730.451
## 20 251530.892
make a table where the rows are the diseases and columns are the sexes with values representing the incidence of each disease in 2019
Question: Can we combine the dataframes (ds and gapminder) using country names?
library(gapminder)
# gapminder
pop<- gapminder
unique(ds$location)[!unique(ds$location) %in% gapminder$country]
## [1] "Republic of Korea" "United States of America"
## [3] "Russian Federation" "Iran (Islamic Republic of)"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:gt':
##
## html
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
a<- which(unique(ds$location) %nin% pop$country)
unique(ds$location)[a]
## [1] "Republic of Korea" "United States of America"
## [3] "Russian Federation" "Iran (Islamic Republic of)"
a
## [1] 9 12 14 16
# [1] "Russian Federation" "Republic of Korea" "United States of America"
# [4] "Iran (Islamic Republic of)"
cat("\n gapminder countries \n")
##
## gapminder countries
unique(gapminder$country)
## [1] Afghanistan Albania Algeria
## [4] Angola Argentina Australia
## [7] Austria Bahrain Bangladesh
## [10] Belgium Benin Bolivia
## [13] Bosnia and Herzegovina Botswana Brazil
## [16] Bulgaria Burkina Faso Burundi
## [19] Cambodia Cameroon Canada
## [22] Central African Republic Chad Chile
## [25] China Colombia Comoros
## [28] Congo, Dem. Rep. Congo, Rep. Costa Rica
## [31] Cote d'Ivoire Croatia Cuba
## [34] Czech Republic Denmark Djibouti
## [37] Dominican Republic Ecuador Egypt
## [40] El Salvador Equatorial Guinea Eritrea
## [43] Ethiopia Finland France
## [46] Gabon Gambia Germany
## [49] Ghana Greece Guatemala
## [52] Guinea Guinea-Bissau Haiti
## [55] Honduras Hong Kong, China Hungary
## [58] Iceland India Indonesia
## [61] Iran Iraq Ireland
## [64] Israel Italy Jamaica
## [67] Japan Jordan Kenya
## [70] Korea, Dem. Rep. Korea, Rep. Kuwait
## [73] Lebanon Lesotho Liberia
## [76] Libya Madagascar Malawi
## [79] Malaysia Mali Mauritania
## [82] Mauritius Mexico Mongolia
## [85] Montenegro Morocco Mozambique
## [88] Myanmar Namibia Nepal
## [91] Netherlands New Zealand Nicaragua
## [94] Niger Nigeria Norway
## [97] Oman Pakistan Panama
## [100] Paraguay Peru Philippines
## [103] Poland Portugal Puerto Rico
## [106] Reunion Romania Rwanda
## [109] Sao Tome and Principe Saudi Arabia Senegal
## [112] Serbia Sierra Leone Singapore
## [115] Slovak Republic Slovenia Somalia
## [118] South Africa Spain Sri Lanka
## [121] Sudan Swaziland Sweden
## [124] Switzerland Syria Taiwan
## [127] Tanzania Thailand Togo
## [130] Trinidad and Tobago Tunisia Turkey
## [133] Uganda United Kingdom United States
## [136] Uruguay Venezuela Vietnam
## [139] West Bank and Gaza Yemen, Rep. Zambia
## [142] Zimbabwe
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
pop<- pop %>%
mutate(country=recode(country,
"United States"= "United States of America" ,
"Korea, Rep."= "Republic of Korea",
"Iran"="Iran (Islamic Republic of)"))
pop<- pop %>% filter(year==2007) %>%
select(-year)
Join the ds and population dataframes by country name
left_ds_pop<- left_join(ds, pop, by=c("location"="country"))
right_ds_pop<- right_join(ds, pop, by=c("location"="country"))
full_ds_pop<- full_join(ds, pop, by=c("location"="country"))
inner_ds_pop<- inner_join(ds, pop, by=c("location"="country"))
inner_ds_pop->ds
#mutate
Calculate the MIR (mortality incidence ratio) for all per country in the year 2019
mir<- ds %>% filter(sex=="Both", year==2019) %>%
select(location, measure, gdpPercap, val, continent) %>%
group_by(location, measure, gdpPercap, continent) %>%
summarise(val=sum(val)) %>% # colnames here will be those listed in group_by
ungroup() %>% # remember to ungroup
spread(measure, val)%>%
mutate(MIR=Deaths/Incidence)
## `summarise()` has grouped output by 'location', 'measure', 'gdpPercap'. You can
## override using the `.groups` argument.
mir
## # A tibble: 19 × 6
## location gdpPercap continent Deaths Incidence MIR
## <chr> <dbl> <fct> <dbl> <dbl> <dbl>
## 1 Algeria 6223. Africa 23609. 41763. 0.565
## 2 Australia 34435. Oceania 52063. 130867. 0.398
## 3 Bangladesh 1391. Asia 105425. 133983. 0.787
## 4 China 4959. Asia 2696412. 4470225. 0.603
## 5 Egypt 5581. Africa 57694. 90225. 0.639
## 6 France 30470. Europe 193279. 390356. 0.495
## 7 Germany 32170. Europe 269540. 566876. 0.475
## 8 India 2452. Asia 926274. 1188929. 0.779
## 9 Indonesia 3541. Asia 227290. 305810. 0.743
## 10 Iran (Islamic Republic of) 11606. Asia 66367. 117015. 0.567
## 11 Italy 28570. Europe 188146. 423320. 0.444
## 12 Japan 31656. Asia 436141. 868569. 0.502
## 13 Jordan 4519. Asia 5605. 11140. 0.503
## 14 Pakistan 2606. Asia 179113. 243350. 0.736
## 15 Republic of Korea 23348. Asia 102992. 216199. 0.476
## 16 Saudi Arabia 21655. Asia 13025. 32212. 0.404
## 17 South Africa 9270. Africa 54245. 69073. 0.785
## 18 United Kingdom 33203. Europe 192686. 391596. 0.492
## 19 United States of America 42952. Americas 758674. 1877695. 0.404
library(ggrepel)
library(ggplot2)
mir %>% ggplot(aes(x=gdpPercap, y=MIR))+
geom_point(aes(col=continent), size=3, alpha=0.8)+
theme_classic()+
geom_smooth(alpha=0.1,method='lm', formula= y~x, linetype="dashed", se=F)+
geom_text_repel(aes(label=location))+
labs(x="GPD per Capita (US$)", y="Mortality-incidence ratio (MIR)", col="Continent", title="MIR ~ GPD", subtitle="Data obtained from GBD study results tool", caption="prepared on 24 Sep 2022")+
theme(axis.text.x = element_text(size=10, angle=45, hjust=0.9, vjust=0.9), legend.text = element_text(size=12), legend.position = c(0.5,0.2))+
ylim(0, 0.8)+
guides(col=guide_legend(ncol=3))+
scale_color_brewer(palette = "Set1")+
scale_y_continuous(labels = scales::percent)
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
scales percent: https://thomasadventure.blog/posts/ggplot2-percentage-scale/